Commit 4b874fc4 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit 'a65bdceb'

* commit 'a65bdceb':
  x86: mmx2 ---> mmxext in variable names

Conflicts:
	libswscale/utils.c
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents 78ec407d a65bdceb
...@@ -312,12 +312,12 @@ typedef struct SwsContext { ...@@ -312,12 +312,12 @@ typedef struct SwsContext {
int vChrFilterSize; ///< Vertical filter size for chroma pixels. int vChrFilterSize; ///< Vertical filter size for chroma pixels.
//@} //@}
int lumMmx2FilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes. int lumMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for luma/alpha planes.
int chrMmx2FilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes. int chrMmxextFilterCodeSize; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code size for chroma planes.
uint8_t *lumMmx2FilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes. uint8_t *lumMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for luma/alpha planes.
uint8_t *chrMmx2FilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes. uint8_t *chrMmxextFilterCode; ///< Runtime-generated MMXEXT horizontal fast bilinear scaler code for chroma planes.
int canMMX2BeUsed; int canMMXEXTBeUsed;
int dstY; ///< Last destination vertical line output from last slice. int dstY; ///< Last destination vertical line output from last slice.
int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc... int flags; ///< Flags passed by the user to select scaler algorithm, optimizations, subsampling, etc...
......
...@@ -1062,19 +1062,21 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1062,19 +1062,21 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
c->srcBpc = 16; c->srcBpc = 16;
if (c->dstBpc == 16) if (c->dstBpc == 16)
dst_stride <<= 1; dst_stride <<= 1;
if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) { if (INLINE_MMXEXT(cpu_flags) && c->srcBpc == 8 && c->dstBpc <= 14) {
c->canMMX2BeUsed = (dstW >= srcW && (dstW & 31) == 0 && c->canMMXEXTBeUsed = (dstW >= srcW && (dstW & 31) == 0 &&
(srcW & 15) == 0) ? 1 : 0; (srcW & 15) == 0) ? 1 : 0;
if (!c->canMMX2BeUsed && dstW >= srcW && (srcW & 15) == 0 if (!c->canMMXEXTBeUsed && dstW >= srcW && (srcW & 15) == 0
&& (flags & SWS_FAST_BILINEAR)) { && (flags & SWS_FAST_BILINEAR)) {
if (flags & SWS_PRINT_INFO) if (flags & SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO, av_log(c, AV_LOG_INFO,
"output width is not a multiple of 32 -> no MMXEXT scaler\n"); "output width is not a multiple of 32 -> no MMXEXT scaler\n");
} }
if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat))
c->canMMX2BeUsed=0; c->canMMXEXTBeUsed = 0;
} else } else
c->canMMX2BeUsed = 0; c->canMMXEXTBeUsed = 0;
c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW; c->chrXInc = (((int64_t)c->chrSrcW << 16) + (c->chrDstW >> 1)) / c->chrDstW;
c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH; c->chrYInc = (((int64_t)c->chrSrcH << 16) + (c->chrDstH >> 1)) / c->chrDstH;
...@@ -1087,7 +1089,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1087,7 +1089,7 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
* correct variant would be like the vertical one, but that would require * correct variant would be like the vertical one, but that would require
* some special code for the first and last pixel */ * some special code for the first and last pixel */
if (flags & SWS_FAST_BILINEAR) { if (flags & SWS_FAST_BILINEAR) {
if (c->canMMX2BeUsed) { if (c->canMMXEXTBeUsed) {
c->lumXInc += 20; c->lumXInc += 20;
c->chrXInc += 20; c->chrXInc += 20;
} }
...@@ -1104,27 +1106,39 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1104,27 +1106,39 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
{ {
#if HAVE_MMXEXT_INLINE #if HAVE_MMXEXT_INLINE
// can't downscale !!! // can't downscale !!!
if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) { if (c->canMMXEXTBeUsed && (flags & SWS_FAST_BILINEAR)) {
c->lumMmx2FilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL, c->lumMmxextFilterCodeSize = initMMX2HScaler(dstW, c->lumXInc, NULL,
NULL, NULL, 8); NULL, NULL, 8);
c->chrMmx2FilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmxextFilterCodeSize = initMMX2HScaler(c->chrDstW, c->chrXInc,
NULL, NULL, NULL, 4); NULL, NULL, NULL, 4);
#if USE_MMAP #if USE_MMAP
c->lumMmx2FilterCode = mmap(NULL, c->lumMmx2FilterCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); c->lumMmxextFilterCode = mmap(NULL, c->lumMmxextFilterCodeSize,
c->chrMmx2FilterCode = mmap(NULL, c->chrMmx2FilterCodeSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
c->chrMmxextFilterCode = mmap(NULL, c->chrMmxextFilterCodeSize,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1, 0);
#elif HAVE_VIRTUALALLOC #elif HAVE_VIRTUALALLOC
c->lumMmx2FilterCode = VirtualAlloc(NULL, c->lumMmx2FilterCodeSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE); c->lumMmxextFilterCode = VirtualAlloc(NULL,
c->chrMmx2FilterCode = VirtualAlloc(NULL, c->chrMmx2FilterCodeSize, MEM_COMMIT, PAGE_EXECUTE_READWRITE); c->lumMmxextFilterCodeSize,
MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
c->chrMmxextFilterCode = VirtualAlloc(NULL,
c->chrMmxextFilterCodeSize,
MEM_COMMIT,
PAGE_EXECUTE_READWRITE);
#else #else
c->lumMmx2FilterCode = av_malloc(c->lumMmx2FilterCodeSize); c->lumMmxextFilterCode = av_malloc(c->lumMmxextFilterCodeSize);
c->chrMmx2FilterCode = av_malloc(c->chrMmx2FilterCodeSize); c->chrMmxextFilterCode = av_malloc(c->chrMmxextFilterCodeSize);
#endif #endif
#ifdef MAP_ANONYMOUS #ifdef MAP_ANONYMOUS
if (c->lumMmx2FilterCode == MAP_FAILED || c->chrMmx2FilterCode == MAP_FAILED) if (c->lumMmxextFilterCode == MAP_FAILED || c->chrMmxextFilterCode == MAP_FAILED)
#else #else
if (!c->lumMmx2FilterCode || !c->chrMmx2FilterCode) if (!c->lumMmxextFilterCode || !c->chrMmxextFilterCode)
#endif #endif
{ {
av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n"); av_log(c, AV_LOG_ERROR, "Failed to allocate MMX2FilterCode\n");
...@@ -1136,14 +1150,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter, ...@@ -1136,14 +1150,14 @@ av_cold int sws_init_context(SwsContext *c, SwsFilter *srcFilter,
FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hLumFilterPos, (dstW / 2 / 8 + 8) * sizeof(int32_t), fail);
FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail); FF_ALLOCZ_OR_GOTO(c, c->hChrFilterPos, (c->chrDstW / 2 / 4 + 8) * sizeof(int32_t), fail);
initMMX2HScaler( dstW, c->lumXInc, c->lumMmx2FilterCode, initMMX2HScaler( dstW, c->lumXInc, c->lumMmxextFilterCode,
c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8); c->hLumFilter, (uint32_t*)c->hLumFilterPos, 8);
initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmx2FilterCode, initMMX2HScaler(c->chrDstW, c->chrXInc, c->chrMmxextFilterCode,
c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4); c->hChrFilter, (uint32_t*)c->hChrFilterPos, 4);
#if USE_MMAP #if USE_MMAP
mprotect(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize, PROT_EXEC | PROT_READ); mprotect(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize, PROT_EXEC | PROT_READ);
mprotect(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize, PROT_EXEC | PROT_READ); mprotect(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize, PROT_EXEC | PROT_READ);
#endif #endif
} else } else
#endif /* HAVE_MMXEXT_INLINE */ #endif /* HAVE_MMXEXT_INLINE */
...@@ -1739,21 +1753,21 @@ void sws_freeContext(SwsContext *c) ...@@ -1739,21 +1753,21 @@ void sws_freeContext(SwsContext *c)
#if HAVE_MMX_INLINE #if HAVE_MMX_INLINE
#if USE_MMAP #if USE_MMAP
if (c->lumMmx2FilterCode) if (c->lumMmxextFilterCode)
munmap(c->lumMmx2FilterCode, c->lumMmx2FilterCodeSize); munmap(c->lumMmxextFilterCode, c->lumMmxextFilterCodeSize);
if (c->chrMmx2FilterCode) if (c->chrMmxextFilterCode)
munmap(c->chrMmx2FilterCode, c->chrMmx2FilterCodeSize); munmap(c->chrMmxextFilterCode, c->chrMmxextFilterCodeSize);
#elif HAVE_VIRTUALALLOC #elif HAVE_VIRTUALALLOC
if (c->lumMmx2FilterCode) if (c->lumMmxextFilterCode)
VirtualFree(c->lumMmx2FilterCode, 0, MEM_RELEASE); VirtualFree(c->lumMmxextFilterCode, 0, MEM_RELEASE);
if (c->chrMmx2FilterCode) if (c->chrMmxextFilterCode)
VirtualFree(c->chrMmx2FilterCode, 0, MEM_RELEASE); VirtualFree(c->chrMmxextFilterCode, 0, MEM_RELEASE);
#else #else
av_free(c->lumMmx2FilterCode); av_free(c->lumMmxextFilterCode);
av_free(c->chrMmx2FilterCode); av_free(c->chrMmxextFilterCode);
#endif #endif
c->lumMmx2FilterCode = NULL; c->lumMmxextFilterCode = NULL;
c->chrMmx2FilterCode = NULL; c->chrMmxextFilterCode = NULL;
#endif /* HAVE_MMX_INLINE */ #endif /* HAVE_MMX_INLINE */
av_freep(&c->yuvTable); av_freep(&c->yuvTable);
......
...@@ -1452,7 +1452,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, ...@@ -1452,7 +1452,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
{ {
int32_t *filterPos = c->hLumFilterPos; int32_t *filterPos = c->hLumFilterPos;
int16_t *filter = c->hLumFilter; int16_t *filter = c->hLumFilter;
void *mmx2FilterCode= c->lumMmx2FilterCode; void *mmxextFilterCode = c->lumMmxextFilterCode;
int i; int i;
#if defined(PIC) #if defined(PIC)
uint64_t ebxsave; uint64_t ebxsave;
...@@ -1525,7 +1525,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, ...@@ -1525,7 +1525,7 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
#endif #endif
#endif #endif
:: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
"m" (mmx2FilterCode) "m" (mmxextFilterCode)
#if defined(PIC) #if defined(PIC)
,"m" (ebxsave) ,"m" (ebxsave)
#endif #endif
...@@ -1548,7 +1548,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, ...@@ -1548,7 +1548,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
{ {
int32_t *filterPos = c->hChrFilterPos; int32_t *filterPos = c->hChrFilterPos;
int16_t *filter = c->hChrFilter; int16_t *filter = c->hChrFilter;
void *mmx2FilterCode= c->chrMmx2FilterCode; void *mmxextFilterCode = c->chrMmxextFilterCode;
int i; int i;
#if defined(PIC) #if defined(PIC)
DECLARE_ALIGNED(8, uint64_t, ebxsave); DECLARE_ALIGNED(8, uint64_t, ebxsave);
...@@ -1609,7 +1609,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, ...@@ -1609,7 +1609,7 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
#endif #endif
#endif #endif
:: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
"m" (mmx2FilterCode), "m" (src2), "m"(dst2) "m" (mmxextFilterCode), "m" (src2), "m"(dst2)
#if defined(PIC) #if defined(PIC)
,"m" (ebxsave) ,"m" (ebxsave)
#endif #endif
...@@ -1692,8 +1692,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c) ...@@ -1692,8 +1692,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
if (c->srcBpc == 8 && c->dstBpc <= 14) { if (c->srcBpc == 8 && c->dstBpc <= 14) {
// Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one). // Use the new MMX scaler if the MMXEXT one can't be used (it is faster than the x86 ASM one).
#if COMPILE_TEMPLATE_MMXEXT #if COMPILE_TEMPLATE_MMXEXT
if (c->flags & SWS_FAST_BILINEAR && c->canMMX2BeUsed) if (c->flags & SWS_FAST_BILINEAR && c->canMMXEXTBeUsed) {
{
c->hyscale_fast = RENAME(hyscale_fast); c->hyscale_fast = RENAME(hyscale_fast);
c->hcscale_fast = RENAME(hcscale_fast); c->hcscale_fast = RENAME(hcscale_fast);
} else { } else {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment