Commit 1e9c5bf4 authored by Diego Biurrun's avatar Diego Biurrun

asm: FF_-prefix internal macros used in inline assembly

These warnings conflict with system macros on Solaris, producing
truckloads of warnings about macro redefinition.
parent dc40a70c
...@@ -33,18 +33,18 @@ ...@@ -33,18 +33,18 @@
static void name(int16_t *blocks) \ static void name(int16_t *blocks) \
{ \ { \
__asm__ volatile ( \ __asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \ "pxor %%mm7, %%mm7 \n\t" \
"mov %1, %%"REG_a" \n\t" \ "mov %1, %%"FF_REG_a" \n\t" \
"1: \n\t" \ "1: \n\t" \
"movq %%mm7, (%0, %%"REG_a") \n\t" \ "movq %%mm7, (%0, %%"FF_REG_a") \n\t" \
"movq %%mm7, 8(%0, %%"REG_a") \n\t" \ "movq %%mm7, 8(%0, %%"FF_REG_a") \n\t" \
"movq %%mm7, 16(%0, %%"REG_a") \n\t" \ "movq %%mm7, 16(%0, %%"FF_REG_a") \n\t" \
"movq %%mm7, 24(%0, %%"REG_a") \n\t" \ "movq %%mm7, 24(%0, %%"FF_REG_a") \n\t" \
"add $32, %%"REG_a" \n\t" \ "add $32, %%"FF_REG_a" \n\t" \
"js 1b \n\t" \ "js 1b \n\t" \
:: "r"(((uint8_t *) blocks) + 128 * n), \ :: "r"(((uint8_t *) blocks) + 128 * n), \
"i"(-128 * n) \ "i"(-128 * n) \
: "%"REG_a); \ : "%"FF_REG_a); \
} }
CLEAR_BLOCKS(clear_blocks_mmx, 6) CLEAR_BLOCKS(clear_blocks_mmx, 6)
CLEAR_BLOCKS(clear_block_mmx, 1) CLEAR_BLOCKS(clear_block_mmx, 1)
...@@ -68,21 +68,21 @@ static void clear_block_sse(int16_t *block) ...@@ -68,21 +68,21 @@ static void clear_block_sse(int16_t *block)
static void clear_blocks_sse(int16_t *blocks) static void clear_blocks_sse(int16_t *blocks)
{ {
__asm__ volatile ( __asm__ volatile (
"xorps %%xmm0, %%xmm0 \n" "xorps %%xmm0, %%xmm0 \n"
"mov %1, %%"REG_a" \n" "mov %1, %%"FF_REG_a" \n"
"1: \n" "1: \n"
"movaps %%xmm0, (%0, %%"REG_a") \n" "movaps %%xmm0, (%0, %%"FF_REG_a") \n"
"movaps %%xmm0, 16(%0, %%"REG_a") \n" "movaps %%xmm0, 16(%0, %%"FF_REG_a") \n"
"movaps %%xmm0, 32(%0, %%"REG_a") \n" "movaps %%xmm0, 32(%0, %%"FF_REG_a") \n"
"movaps %%xmm0, 48(%0, %%"REG_a") \n" "movaps %%xmm0, 48(%0, %%"FF_REG_a") \n"
"movaps %%xmm0, 64(%0, %%"REG_a") \n" "movaps %%xmm0, 64(%0, %%"FF_REG_a") \n"
"movaps %%xmm0, 80(%0, %%"REG_a") \n" "movaps %%xmm0, 80(%0, %%"FF_REG_a") \n"
"movaps %%xmm0, 96(%0, %%"REG_a") \n" "movaps %%xmm0, 96(%0, %%"FF_REG_a") \n"
"movaps %%xmm0, 112(%0, %%"REG_a") \n" "movaps %%xmm0, 112(%0, %%"FF_REG_a") \n"
"add $128, %%"REG_a" \n" "add $128, %%"FF_REG_a" \n"
"js 1b \n" "js 1b \n"
:: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6) :: "r"(((uint8_t *) blocks) + 128 * 6), "i"(-128 * 6)
: "%"REG_a); : "%"FF_REG_a);
} }
#endif /* HAVE_INLINE_ASM */ #endif /* HAVE_INLINE_ASM */
......
...@@ -72,12 +72,12 @@ ...@@ -72,12 +72,12 @@
"mov "tmpbyte" , "statep" \n\t"\ "mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\ "test "lowword" , "lowword" \n\t"\
"jnz 2f \n\t"\ "jnz 2f \n\t"\
"mov "byte" , %%"REG_c" \n\t"\ "mov "byte" , %%"FF_REG_c" \n\t"\
"cmp "end" , %%"REG_c" \n\t"\ "cmp "end" , %%"FF_REG_c" \n\t"\
"jge 1f \n\t"\ "jge 1f \n\t"\
"add"OPSIZE" $2 , "byte" \n\t"\ "add"FF_OPSIZE" $2 , "byte" \n\t"\
"1: \n\t"\ "1: \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\ "movzwl (%%"FF_REG_c"), "tmp" \n\t"\
"lea -1("low") , %%ecx \n\t"\ "lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\ "xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\
...@@ -133,12 +133,12 @@ ...@@ -133,12 +133,12 @@
"mov "tmpbyte" , "statep" \n\t"\ "mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\ "test "lowword" , "lowword" \n\t"\
" jnz 2f \n\t"\ " jnz 2f \n\t"\
"mov "byte" , %%"REG_c" \n\t"\ "mov "byte" , %%"FF_REG_c" \n\t"\
"cmp "end" , %%"REG_c" \n\t"\ "cmp "end" , %%"FF_REG_c" \n\t"\
"jge 1f \n\t"\ "jge 1f \n\t"\
"add"OPSIZE" $2 , "byte" \n\t"\ "add"FF_OPSIZE" $2 , "byte" \n\t"\
"1: \n\t"\ "1: \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\ "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
"lea -1("low") , %%ecx \n\t"\ "lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\ "xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\
...@@ -183,7 +183,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, ...@@ -183,7 +183,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
"i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end)) "i"(offsetof(CABACContext, bytestream_end))
TABLES_ARG TABLES_ARG
: "%"REG_c, "memory" : "%"FF_REG_c, "memory"
); );
return bit & 1; return bit & 1;
} }
...@@ -214,7 +214,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) ...@@ -214,7 +214,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
"addl %%edx, %%eax \n\t" "addl %%edx, %%eax \n\t"
"cmp %c5(%2), %1 \n\t" "cmp %c5(%2), %1 \n\t"
"jge 1f \n\t" "jge 1f \n\t"
"add"OPSIZE" $2, %c4(%2) \n\t" "add"FF_OPSIZE" $2, %c4(%2) \n\t"
"1: \n\t" "1: \n\t"
"movl %%eax, %c3(%2) \n\t" "movl %%eax, %c3(%2) \n\t"
...@@ -254,7 +254,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c) ...@@ -254,7 +254,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
"addl %%ecx, %%eax \n\t" "addl %%ecx, %%eax \n\t"
"cmp %c5(%2), %1 \n\t" "cmp %c5(%2), %1 \n\t"
"jge 1f \n\t" "jge 1f \n\t"
"add"OPSIZE" $2, %c4(%2) \n\t" "add"FF_OPSIZE" $2, %c4(%2) \n\t"
"1: \n\t" "1: \n\t"
"movl %%eax, %c3(%2) \n\t" "movl %%eax, %c3(%2) \n\t"
......
...@@ -79,26 +79,26 @@ void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels, ...@@ -79,26 +79,26 @@ void ff_put_pixels8_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h) ptrdiff_t line_size, int h)
{ {
__asm__ volatile ( __asm__ volatile (
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1 ), %%mm0 \n\t" "movq (%1 ), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq %%mm0, (%2) \n\t" "movq %%mm0, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t" "movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1 ), %%mm0 \n\t" "movq (%1 ), %%mm0 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq %%mm0, (%2) \n\t" "movq %%mm0, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t" "movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
: "+g"(h), "+r"(pixels), "+r"(block) : "+g"(h), "+r"(pixels), "+r"(block)
: "r"((x86_reg)line_size) : "r"((x86_reg)line_size)
: "%"REG_a, "memory" : "%"FF_REG_a, "memory"
); );
} }
...@@ -106,7 +106,7 @@ void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ...@@ -106,7 +106,7 @@ void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h) ptrdiff_t line_size, int h)
{ {
__asm__ volatile ( __asm__ volatile (
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1 ), %%mm0 \n\t" "movq (%1 ), %%mm0 \n\t"
...@@ -117,8 +117,8 @@ void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ...@@ -117,8 +117,8 @@ void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
"movq %%mm4, 8(%2) \n\t" "movq %%mm4, 8(%2) \n\t"
"movq %%mm1, (%2, %3) \n\t" "movq %%mm1, (%2, %3) \n\t"
"movq %%mm5, 8(%2, %3) \n\t" "movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1 ), %%mm0 \n\t" "movq (%1 ), %%mm0 \n\t"
"movq 8(%1 ), %%mm4 \n\t" "movq 8(%1 ), %%mm4 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
...@@ -127,13 +127,13 @@ void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels, ...@@ -127,13 +127,13 @@ void ff_put_pixels16_mmx(uint8_t *block, const uint8_t *pixels,
"movq %%mm4, 8(%2) \n\t" "movq %%mm4, 8(%2) \n\t"
"movq %%mm1, (%2, %3) \n\t" "movq %%mm1, (%2, %3) \n\t"
"movq %%mm5, 8(%2, %3) \n\t" "movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
: "+g"(h), "+r"(pixels), "+r"(block) : "+g"(h), "+r"(pixels), "+r"(block)
: "r"((x86_reg)line_size) : "r"((x86_reg)line_size)
: "%"REG_a, "memory" : "%"FF_REG_a, "memory"
); );
} }
......
...@@ -84,13 +84,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -84,13 +84,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"sub %10, %1 \n\t" "sub %10, %1 \n\t"
"mov %2, %0 \n\t" "mov %2, %0 \n\t"
"movl %7, %%ecx \n\t" "movl %7, %%ecx \n\t"
"add %1, %%"REG_c" \n\t" "add %1, %%"FF_REG_c" \n\t"
"movl %%ecx, (%0) \n\t" "movl %%ecx, (%0) \n\t"
"test $1, %4 \n\t" "test $1, %4 \n\t"
" jnz 5f \n\t" " jnz 5f \n\t"
"add"OPSIZE" $4, %2 \n\t" "add"FF_OPSIZE" $4, %2 \n\t"
"4: \n\t" "4: \n\t"
"add $1, %1 \n\t" "add $1, %1 \n\t"
...@@ -98,7 +98,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -98,7 +98,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
" jb 3b \n\t" " jb 3b \n\t"
"mov %2, %0 \n\t" "mov %2, %0 \n\t"
"movl %7, %%ecx \n\t" "movl %7, %%ecx \n\t"
"add %1, %%"REG_c" \n\t" "add %1, %%"FF_REG_c" \n\t"
"movl %%ecx, (%0) \n\t" "movl %%ecx, (%0) \n\t"
"5: \n\t" "5: \n\t"
"add %9, %k0 \n\t" "add %9, %k0 \n\t"
...@@ -109,7 +109,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -109,7 +109,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end)) "i"(offsetof(CABACContext, bytestream_end))
TABLES_ARG TABLES_ARG
: "%"REG_c, "memory" : "%"FF_REG_c, "memory"
); );
return coeff_count; return coeff_count;
} }
...@@ -175,7 +175,7 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -175,7 +175,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"test $1, %4 \n\t" "test $1, %4 \n\t"
" jnz 5f \n\t" " jnz 5f \n\t"
"add"OPSIZE" $4, %2 \n\t" "add"FF_OPSIZE" $4, %2 \n\t"
"4: \n\t" "4: \n\t"
"addl $1, %k6 \n\t" "addl $1, %k6 \n\t"
...@@ -194,7 +194,7 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -194,7 +194,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end)), "i"(offsetof(CABACContext, bytestream_end)),
"i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
: "%"REG_c, "memory" : "%"FF_REG_c, "memory"
); );
return coeff_count; return coeff_count;
} }
......
...@@ -32,7 +32,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_ ...@@ -32,7 +32,7 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -42,8 +42,8 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_ ...@@ -42,8 +42,8 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t" "movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t" "movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t" "movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t" "movq (%1, %3), %%mm2 \n\t"
...@@ -51,20 +51,20 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_ ...@@ -51,20 +51,20 @@ static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t" "movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t" "movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -81,8 +81,8 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff ...@@ -81,8 +81,8 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, 8(%2) \n\t" "movq %%mm4, 8(%2) \n\t"
"movq %%mm5, 8(%2, %3) \n\t" "movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t" "movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t" "movq (%1, %3), %%mm2 \n\t"
...@@ -97,42 +97,42 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff ...@@ -97,42 +97,42 @@ static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, 8(%2) \n\t" "movq %%mm4, 8(%2) \n\t"
"movq %%mm5, 8(%2, %3) \n\t" "movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm2 \n\t" "movq (%1, %%"FF_REG_a"),%%mm2 \n\t"
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
"movq %%mm4, (%2) \n\t" "movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t" "movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm0 \n\t" "movq (%1, %%"FF_REG_a"),%%mm0 \n\t"
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
"movq %%mm4, (%2) \n\t" "movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t" "movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
...@@ -165,12 +165,12 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_ ...@@ -165,12 +165,12 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %%"FF_REG_a"), %%mm2 \n\t"
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
"movq (%2), %%mm3 \n\t" "movq (%2), %%mm3 \n\t"
PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6) PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6)
...@@ -178,11 +178,11 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_ ...@@ -178,11 +178,11 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
"movq %%mm0, (%2) \n\t" "movq %%mm0, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t" "movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
"movq (%2), %%mm3 \n\t" "movq (%2), %%mm3 \n\t"
PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6) PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6)
...@@ -190,12 +190,12 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_ ...@@ -190,12 +190,12 @@ static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_
PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
"movq %%mm2, (%2) \n\t" "movq %%mm2, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t" "movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
...@@ -812,15 +812,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -812,15 +812,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
__asm__ volatile ( __asm__ volatile (
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"FF_REG_a"), %%mm2 \n\t"
"movq (%2, %%"REG_a"), %%mm4 \n\t" "movq (%2, %%"FF_REG_a"), %%mm4 \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"psubusb %%mm0, %%mm2 \n\t" "psubusb %%mm0, %%mm2 \n\t"
"psubusb %%mm4, %%mm0 \n\t" "psubusb %%mm4, %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t" "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t" "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
"movq (%2, %%"REG_a"), %%mm5 \n\t" "movq (%2, %%"FF_REG_a"), %%mm5 \n\t"
"psubusb %%mm1, %%mm3 \n\t" "psubusb %%mm1, %%mm3 \n\t"
"psubusb %%mm5, %%mm1 \n\t" "psubusb %%mm5, %%mm1 \n\t"
"por %%mm2, %%mm0 \n\t" "por %%mm2, %%mm0 \n\t"
...@@ -835,7 +835,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -835,7 +835,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm2 \n\t" "paddw %%mm3, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t" "paddw %%mm0, %%mm6 \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1 - len), "r" (blk2 - len), "r" (stride)); : "r" (blk1 - len), "r" (blk2 - len), "r" (stride));
...@@ -971,18 +971,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, ...@@ -971,18 +971,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
__asm__ volatile ( __asm__ volatile (
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t" "movq (%2, %%"FF_REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %%"FF_REG_a"), %%mm2 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t" "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t" "punpckhbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t" "punpckhbw %%mm7, %%mm3 \n\t"
"paddw %%mm0, %%mm1 \n\t" "paddw %%mm0, %%mm1 \n\t"
"paddw %%mm2, %%mm3 \n\t" "paddw %%mm2, %%mm3 \n\t"
"movq (%3, %%"REG_a"), %%mm4 \n\t" "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
"movq (%3, %%"REG_a"), %%mm2 \n\t" "movq (%3, %%"FF_REG_a"), %%mm2 \n\t"
"paddw %%mm5, %%mm1 \n\t" "paddw %%mm5, %%mm1 \n\t"
"paddw %%mm5, %%mm3 \n\t" "paddw %%mm5, %%mm3 \n\t"
"psrlw $1, %%mm1 \n\t" "psrlw $1, %%mm1 \n\t"
...@@ -996,7 +996,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, ...@@ -996,7 +996,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
"punpckhbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t" "paddw %%mm1, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t" "paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t" "add %4, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1a - len), "r" (blk1b - len), "r" (blk2 - len), : "r" (blk1a - len), "r" (blk1b - len), "r" (blk2 - len),
...@@ -1008,8 +1008,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -1008,8 +1008,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
{ {
x86_reg len = -(stride * h); x86_reg len = -(stride * h);
__asm__ volatile ( __asm__ volatile (
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0\n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
...@@ -1020,8 +1020,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -1020,8 +1020,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm1 \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"FF_REG_a"), %%mm2\n\t"
"movq 1(%2, %%"REG_a"), %%mm4 \n\t" "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t" "movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -1035,8 +1035,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -1035,8 +1035,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm1 \n\t"
"paddw %%mm5, %%mm0 \n\t" "paddw %%mm5, %%mm0 \n\t"
"paddw %%mm5, %%mm1 \n\t" "paddw %%mm5, %%mm1 \n\t"
"movq (%3, %%"REG_a"), %%mm4 \n\t" "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
"movq (%3, %%"REG_a"), %%mm5 \n\t" "movq (%3, %%"FF_REG_a"), %%mm5 \n\t"
"psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t" "psrlw $2, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t" "packuswb %%mm1, %%mm0 \n\t"
...@@ -1050,7 +1050,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -1050,7 +1050,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm4, %%mm6 \n\t" "paddw %%mm4, %%mm6 \n\t"
"movq %%mm2, %%mm0 \n\t" "movq %%mm2, %%mm0 \n\t"
"movq %%mm3, %%mm1 \n\t" "movq %%mm3, %%mm1 \n\t"
"add %4, %%"REG_a" \n\t" "add %4, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len),
......
...@@ -187,13 +187,13 @@ __asm__ volatile( ...@@ -187,13 +187,13 @@ __asm__ volatile(
"movd %2, %%mm6 \n\t" "movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm0\n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t" "movq (%1, %%"FF_REG_a"), %%mm4\n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t" "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t" "pxor %%mm2, %%mm2 \n\t"
...@@ -208,8 +208,8 @@ __asm__ volatile( ...@@ -208,8 +208,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow "pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4\n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $3, %%mm0 \n\t" "psraw $3, %%mm0 \n\t"
"psraw $3, %%mm1 \n\t" "psraw $3, %%mm1 \n\t"
"psubw %%mm7, %%mm0 \n\t" "psubw %%mm7, %%mm0 \n\t"
...@@ -222,13 +222,13 @@ __asm__ volatile( ...@@ -222,13 +222,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1 \n\t" "psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t" "pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t" "pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t" "movq %%mm4, (%0, %%"FF_REG_a")\n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t" "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
"add $16, %%"REG_a" \n\t" "add $16, %%"FF_REG_a" \n\t"
"js 1b \n\t" "js 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory" : "%"FF_REG_a, "memory"
); );
block[0]= block0; block[0]= block0;
} }
...@@ -250,13 +250,13 @@ __asm__ volatile( ...@@ -250,13 +250,13 @@ __asm__ volatile(
"movd %2, %%mm6 \n\t" "movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm0\n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t" "movq (%1, %%"FF_REG_a"), %%mm4\n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t" "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t" "pxor %%mm2, %%mm2 \n\t"
...@@ -275,8 +275,8 @@ __asm__ volatile( ...@@ -275,8 +275,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q "pmullw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow "pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4\n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $4, %%mm0 \n\t" "psraw $4, %%mm0 \n\t"
"psraw $4, %%mm1 \n\t" "psraw $4, %%mm1 \n\t"
"psubw %%mm7, %%mm0 \n\t" "psubw %%mm7, %%mm0 \n\t"
...@@ -289,13 +289,13 @@ __asm__ volatile( ...@@ -289,13 +289,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1 \n\t" "psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t" "pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t" "pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t" "movq %%mm4, (%0, %%"FF_REG_a")\n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t" "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
"add $16, %%"REG_a" \n\t" "add $16, %%"FF_REG_a" \n\t"
"js 1b \n\t" "js 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory" : "%"FF_REG_a, "memory"
); );
} }
...@@ -322,13 +322,13 @@ __asm__ volatile( ...@@ -322,13 +322,13 @@ __asm__ volatile(
"movd %2, %%mm6 \n\t" "movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm0\n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t" "movq (%1, %%"FF_REG_a"), %%mm4\n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t" "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t" "pxor %%mm2, %%mm2 \n\t"
...@@ -343,8 +343,8 @@ __asm__ volatile( ...@@ -343,8 +343,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q "pmullw %%mm5, %%mm1 \n\t" // abs(block[i])*q
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow "pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4\n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psraw $3, %%mm0 \n\t" "psraw $3, %%mm0 \n\t"
"psraw $3, %%mm1 \n\t" "psraw $3, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t"
...@@ -353,13 +353,13 @@ __asm__ volatile( ...@@ -353,13 +353,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1 \n\t" "psubw %%mm3, %%mm1 \n\t"
"pandn %%mm0, %%mm4 \n\t" "pandn %%mm0, %%mm4 \n\t"
"pandn %%mm1, %%mm5 \n\t" "pandn %%mm1, %%mm5 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t" "movq %%mm4, (%0, %%"FF_REG_a")\n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t" "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
"add $16, %%"REG_a" \n\t" "add $16, %%"FF_REG_a" \n\t"
"jng 1b \n\t" "jng 1b \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "g" (-2*nCoeffs)
: "%"REG_a, "memory" : "%"FF_REG_a, "memory"
); );
block[0]= block0; block[0]= block0;
//Note, we do not do mismatch control for intra as errors cannot accumulate //Note, we do not do mismatch control for intra as errors cannot accumulate
...@@ -383,13 +383,13 @@ __asm__ volatile( ...@@ -383,13 +383,13 @@ __asm__ volatile(
"movd %2, %%mm6 \n\t" "movd %2, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"packssdw %%mm6, %%mm6 \n\t" "packssdw %%mm6, %%mm6 \n\t"
"mov %3, %%"REG_a" \n\t" "mov %3, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm0\n\t"
"movq 8(%0, %%"REG_a"), %%mm1 \n\t" "movq 8(%0, %%"FF_REG_a"), %%mm1\n\t"
"movq (%1, %%"REG_a"), %%mm4 \n\t" "movq (%1, %%"FF_REG_a"), %%mm4\n\t"
"movq 8(%1, %%"REG_a"), %%mm5 \n\t" "movq 8(%1, %%"FF_REG_a"), %%mm5\n\t"
"pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm4 \n\t" // q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i] "pmullw %%mm6, %%mm5 \n\t" // q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2 \n\t" "pxor %%mm2, %%mm2 \n\t"
...@@ -408,8 +408,8 @@ __asm__ volatile( ...@@ -408,8 +408,8 @@ __asm__ volatile(
"paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q "paddw %%mm5, %%mm1 \n\t" // (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4 \n\t" "pxor %%mm4, %%mm4 \n\t"
"pxor %%mm5, %%mm5 \n\t" // FIXME slow "pxor %%mm5, %%mm5 \n\t" // FIXME slow
"pcmpeqw (%0, %%"REG_a"), %%mm4 \n\t" // block[i] == 0 ? -1 : 0 "pcmpeqw (%0, %%"FF_REG_a"), %%mm4\n\t" // block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0 "pcmpeqw 8(%0, %%"FF_REG_a"), %%mm5\n\t" // block[i] == 0 ? -1 : 0
"psrlw $4, %%mm0 \n\t" "psrlw $4, %%mm0 \n\t"
"psrlw $4, %%mm1 \n\t" "psrlw $4, %%mm1 \n\t"
"pxor %%mm2, %%mm0 \n\t" "pxor %%mm2, %%mm0 \n\t"
...@@ -420,10 +420,10 @@ __asm__ volatile( ...@@ -420,10 +420,10 @@ __asm__ volatile(
"pandn %%mm1, %%mm5 \n\t" "pandn %%mm1, %%mm5 \n\t"
"pxor %%mm4, %%mm7 \n\t" "pxor %%mm4, %%mm7 \n\t"
"pxor %%mm5, %%mm7 \n\t" "pxor %%mm5, %%mm7 \n\t"
"movq %%mm4, (%0, %%"REG_a") \n\t" "movq %%mm4, (%0, %%"FF_REG_a")\n\t"
"movq %%mm5, 8(%0, %%"REG_a") \n\t" "movq %%mm5, 8(%0, %%"FF_REG_a")\n\t"
"add $16, %%"REG_a" \n\t" "add $16, %%"FF_REG_a" \n\t"
"jng 1b \n\t" "jng 1b \n\t"
"movd 124(%0, %3), %%mm0 \n\t" "movd 124(%0, %3), %%mm0 \n\t"
"movq %%mm7, %%mm6 \n\t" "movq %%mm7, %%mm6 \n\t"
...@@ -438,7 +438,7 @@ __asm__ volatile( ...@@ -438,7 +438,7 @@ __asm__ volatile(
"movd %%mm0, 124(%0, %3) \n\t" "movd %%mm0, 124(%0, %3) \n\t"
::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs) ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "rm" (qscale), "r" (-2*nCoeffs)
: "%"REG_a, "memory" : "%"FF_REG_a, "memory"
); );
} }
......
...@@ -147,33 +147,33 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -147,33 +147,33 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
__asm__ volatile( __asm__ volatile(
"movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1
SPREADW(MM"3") SPREADW(MM"3")
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"7, "MM"7 \n\t" // 0
"pxor "MM"4, "MM"4 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0
MOVQ" (%2), "MM"5 \n\t" // qmat[0] MOVQ" (%2), "MM"5 \n\t" // qmat[0]
"pxor "MM"6, "MM"6 \n\t" "pxor "MM"6, "MM"6 \n\t"
"psubw (%3), "MM"6 \n\t" // -bias[0] "psubw (%3), "MM"6 \n\t" // -bias[0]
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i]
SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) SAVE_SIGN(MM"1", MM"0") // ABS(block[i])
"psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] "psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0]
"pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
"por "MM"0, "MM"4 \n\t" "por "MM"0, "MM"4 \n\t"
RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ" "MM"0, (%5, %%"REG_a") \n\t" MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t"
"pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00
MOVQ" (%4, %%"REG_a"), "MM"1 \n\t" MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t"
MOVQ" "MM"7, (%1, %%"REG_a") \n\t" // 0 MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0
"pandn "MM"1, "MM"0 \n\t" "pandn "MM"1, "MM"0 \n\t"
PMAXW(MM"0", MM"3") PMAXW(MM"0", MM"3")
"add $"MMREG_WIDTH", %%"REG_a" \n\t" "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
PMAX(MM"3", MM"0") PMAX(MM"3", MM"0")
"movd "MM"3, %%"REG_a" \n\t" "movd "MM"3, %%"FF_REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 "movzb %%al, %%"FF_REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1) : "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat), "r" (bias), : "r" (block+64), "r" (qmat), "r" (bias),
"r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64) "r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
...@@ -182,32 +182,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -182,32 +182,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
); );
}else{ // FMT_H263 }else{ // FMT_H263
__asm__ volatile( __asm__ volatile(
"movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1
SPREADW(MM"3") SPREADW(MM"3")
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"7, "MM"7 \n\t" // 0
"pxor "MM"4, "MM"4 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i]
SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) SAVE_SIGN(MM"1", MM"0") // ABS(block[i])
MOVQ" (%3, %%"REG_a"), "MM"6 \n\t" // bias[0] MOVQ" (%3, %%"FF_REG_a"), "MM"6 \n\t" // bias[0]
"paddusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] "paddusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0]
MOVQ" (%2, %%"REG_a"), "MM"5 \n\t" // qmat[i] MOVQ" (%2, %%"FF_REG_a"), "MM"5 \n\t" // qmat[i]
"pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
"por "MM"0, "MM"4 \n\t" "por "MM"0, "MM"4 \n\t"
RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ" "MM"0, (%5, %%"REG_a") \n\t" MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t"
"pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00
MOVQ" (%4, %%"REG_a"), "MM"1 \n\t" MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t"
MOVQ" "MM"7, (%1, %%"REG_a") \n\t" // 0 MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0
"pandn "MM"1, "MM"0 \n\t" "pandn "MM"1, "MM"0 \n\t"
PMAXW(MM"0", MM"3") PMAXW(MM"0", MM"3")
"add $"MMREG_WIDTH", %%"REG_a" \n\t" "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
PMAX(MM"3", MM"0") PMAX(MM"3", MM"0")
"movd "MM"3, %%"REG_a" \n\t" "movd "MM"3, %%"FF_REG_a" \n\t"
"movzb %%al, %%"REG_a" \n\t" // last_non_zero_p1 "movzb %%al, %%"FF_REG_a" \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1) : "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat+64), "r" (bias+64), : "r" (block+64), "r" (qmat+64), "r" (bias+64),
"r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64) "r" (inv_zigzag_direct16 + 64), "r" (temp_block + 64)
......
...@@ -46,12 +46,12 @@ STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ...@@ -46,12 +46,12 @@ STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
"punpckhbw %%mm7, %%mm5 \n\t" "punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t" "paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0\n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
...@@ -67,11 +67,11 @@ STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ...@@ -67,11 +67,11 @@ STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
"psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm5 \n\t" "psrlw $2, %%mm5 \n\t"
"packuswb %%mm5, %%mm4 \n\t" "packuswb %%mm5, %%mm4 \n\t"
"movq %%mm4, (%2, %%"REG_a") \n\t" "movq %%mm4, (%2, %%"FF_REG_a") \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 "movq (%1, %%"FF_REG_a"), %%mm2\n\t" // 0 <-> 2 1 <-> 3
"movq 1(%1, %%"REG_a"), %%mm4 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm4\n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t" "movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -87,14 +87,14 @@ STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ...@@ -87,14 +87,14 @@ STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
"psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t" "psrlw $2, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t" "packuswb %%mm1, %%mm0 \n\t"
"movq %%mm0, (%2, %%"REG_a") \n\t" "movq %%mm0, (%2, %%"FF_REG_a") \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"subl $2, %0 \n\t" "subl $2, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels) :"+g"(h), "+S"(pixels)
:"D"(block), "r"((x86_reg)line_size) :"D"(block), "r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
// avg_pixels // avg_pixels
...@@ -115,12 +115,12 @@ STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ...@@ -115,12 +115,12 @@ STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
"punpckhbw %%mm7, %%mm5 \n\t" "punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t" "paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0\n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
...@@ -135,16 +135,16 @@ STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ...@@ -135,16 +135,16 @@ STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm5 \n\t" "psrlw $2, %%mm5 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t" "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
"packuswb %%mm5, %%mm4 \n\t" "packuswb %%mm5, %%mm4 \n\t"
"pcmpeqd %%mm2, %%mm2 \n\t" "pcmpeqd %%mm2, %%mm2 \n\t"
"paddb %%mm2, %%mm2 \n\t" "paddb %%mm2, %%mm2 \n\t"
PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2) PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2)
"movq %%mm5, (%2, %%"REG_a") \n\t" "movq %%mm5, (%2, %%"FF_REG_a") \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 "movq (%1, %%"FF_REG_a"), %%mm2\n\t" // 0 <-> 2 1 <-> 3
"movq 1(%1, %%"REG_a"), %%mm4 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm4\n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t" "movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -159,17 +159,17 @@ STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, ...@@ -159,17 +159,17 @@ STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels,
"paddusw %%mm5, %%mm1 \n\t" "paddusw %%mm5, %%mm1 \n\t"
"psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t" "psrlw $2, %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t" "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
"packuswb %%mm1, %%mm0 \n\t" "packuswb %%mm1, %%mm0 \n\t"
"pcmpeqd %%mm2, %%mm2 \n\t" "pcmpeqd %%mm2, %%mm2 \n\t"
"paddb %%mm2, %%mm2 \n\t" "paddb %%mm2, %%mm2 \n\t"
PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2) PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2)
"movq %%mm1, (%2, %%"REG_a") \n\t" "movq %%mm1, (%2, %%"FF_REG_a") \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"subl $2, %0 \n\t" "subl $2, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels) :"+g"(h), "+S"(pixels)
:"D"(block), "r"((x86_reg)line_size) :"D"(block), "r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
...@@ -87,7 +87,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, ...@@ -87,7 +87,7 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
int rnd, int64_t shift) int rnd, int64_t shift)
{ {
__asm__ volatile( __asm__ volatile(
"mov $3, %%"REG_c" \n\t" "mov $3, %%"FF_REG_c" \n\t"
LOAD_ROUNDER_MMX("%5") LOAD_ROUNDER_MMX("%5")
"movq "MANGLE(ff_pw_9)", %%mm6 \n\t" "movq "MANGLE(ff_pw_9)", %%mm6 \n\t"
"1: \n\t" "1: \n\t"
...@@ -106,12 +106,12 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst, ...@@ -106,12 +106,12 @@ static void vc1_put_ver_16b_shift2_mmx(int16_t *dst,
SHIFT2_LINE(168, 4, 1, 2, 3) SHIFT2_LINE(168, 4, 1, 2, 3)
"sub %6, %0 \n\t" "sub %6, %0 \n\t"
"add $8, %1 \n\t" "add $8, %1 \n\t"
"dec %%"REG_c" \n\t" "dec %%"FF_REG_c" \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
: "+r"(src), "+r"(dst) : "+r"(src), "+r"(dst)
: "r"(stride), "r"(-2*stride), : "r"(stride), "r"(-2*stride),
"m"(shift), "m"(rnd), "r"(9*stride-4) "m"(shift), "m"(rnd), "r"(9*stride-4)
: "%"REG_c, "memory" : "%"FF_REG_c, "memory"
); );
} }
...@@ -173,7 +173,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ ...@@ -173,7 +173,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
{\ {\
rnd = 8-rnd;\ rnd = 8-rnd;\
__asm__ volatile(\ __asm__ volatile(\
"mov $8, %%"REG_c" \n\t"\ "mov $8, %%"FF_REG_c" \n\t"\
LOAD_ROUNDER_MMX("%5")\ LOAD_ROUNDER_MMX("%5")\
"movq "MANGLE(ff_pw_9)", %%mm6\n\t"\ "movq "MANGLE(ff_pw_9)", %%mm6\n\t"\
"1: \n\t"\ "1: \n\t"\
...@@ -208,12 +208,12 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ ...@@ -208,12 +208,12 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
"movq %%mm3, (%1) \n\t"\ "movq %%mm3, (%1) \n\t"\
"add %6, %0 \n\t"\ "add %6, %0 \n\t"\
"add %4, %1 \n\t"\ "add %4, %1 \n\t"\
"dec %%"REG_c" \n\t"\ "dec %%"FF_REG_c" \n\t"\
"jnz 1b \n\t"\ "jnz 1b \n\t"\
: "+r"(src), "+r"(dst)\ : "+r"(src), "+r"(dst)\
: "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
"g"(stride-offset)\ "g"(stride-offset)\
: "%"REG_c, "memory"\ : "%"FF_REG_c, "memory"\
);\ );\
} }
......
...@@ -28,45 +28,45 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg; ...@@ -28,45 +28,45 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg; typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
#if ARCH_X86_64 #if ARCH_X86_64
# define OPSIZE "q" # define FF_OPSIZE "q"
# define REG_a "rax" # define FF_REG_a "rax"
# define REG_b "rbx" # define FF_REG_b "rbx"
# define REG_c "rcx" # define FF_REG_c "rcx"
# define REG_d "rdx" # define FF_REG_d "rdx"
# define REG_D "rdi" # define FF_REG_D "rdi"
# define REG_S "rsi" # define FF_REG_S "rsi"
# define PTR_SIZE "8" # define FF_PTR_SIZE "8"
typedef int64_t x86_reg; typedef int64_t x86_reg;
# define REG_SP "rsp" # define FF_REG_SP "rsp"
# define REG_BP "rbp" # define FF_REG_BP "rbp"
# define REGBP rbp # define FF_REGBP rbp
# define REGa rax # define FF_REGa rax
# define REGb rbx # define FF_REGb rbx
# define REGc rcx # define FF_REGc rcx
# define REGd rdx # define FF_REGd rdx
# define REGSP rsp # define FF_REGSP rsp
#elif ARCH_X86_32 #elif ARCH_X86_32
# define OPSIZE "l" # define FF_OPSIZE "l"
# define REG_a "eax" # define FF_REG_a "eax"
# define REG_b "ebx" # define FF_REG_b "ebx"
# define REG_c "ecx" # define FF_REG_c "ecx"
# define REG_d "edx" # define FF_REG_d "edx"
# define REG_D "edi" # define FF_REG_D "edi"
# define REG_S "esi" # define FF_REG_S "esi"
# define PTR_SIZE "4" # define FF_PTR_SIZE "4"
typedef int32_t x86_reg; typedef int32_t x86_reg;
# define REG_SP "esp" # define FF_REG_SP "esp"
# define REG_BP "ebp" # define FF_REG_BP "ebp"
# define REGBP ebp # define FF_REGBP ebp
# define REGa eax # define FF_REGa eax
# define REGb ebx # define FF_REGb ebx
# define REGc ecx # define FF_REGc ecx
# define REGd edx # define FF_REGd edx
# define REGSP esp # define FF_REGSP esp
#else #else
typedef int x86_reg; typedef int x86_reg;
#endif #endif
......
...@@ -41,9 +41,9 @@ ...@@ -41,9 +41,9 @@
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */ /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index, eax, ebx, ecx, edx) \ #define cpuid(index, eax, ebx, ecx, edx) \
__asm__ volatile ( \ __asm__ volatile ( \
"mov %%"REG_b", %%"REG_S" \n\t" \ "mov %%"FF_REG_b", %%"FF_REG_S" \n\t" \
"cpuid \n\t" \ "cpuid \n\t" \
"xchg %%"REG_b", %%"REG_S \ "xchg %%"FF_REG_b", %%"FF_REG_S \
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \ : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
: "0" (index)) : "0" (index))
......
...@@ -649,9 +649,9 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, ...@@ -649,9 +649,9 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"jmp 9f \n\t" "jmp 9f \n\t"
// Begin // Begin
"0: \n\t" "0: \n\t"
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" "movq (%%"FF_REG_d", %%"FF_REG_a"), %%mm3 \n\t"
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" "movd (%%"FF_REG_c", %%"FF_REG_S"), %%mm0 \n\t"
"movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t" "movd 1(%%"FF_REG_c", %%"FF_REG_S"), %%mm1 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm1, %%mm1 \n\t" "pshufw $0xFF, %%mm1, %%mm1 \n\t"
...@@ -659,14 +659,14 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, ...@@ -659,14 +659,14 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"pshufw $0xFF, %%mm0, %%mm0 \n\t" "pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t" "2: \n\t"
"psubw %%mm1, %%mm0 \n\t" "psubw %%mm1, %%mm0 \n\t"
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" "movl 8(%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"
"pmullw %%mm3, %%mm0 \n\t" "pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t" "psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t" "paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" "movq %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
// End // End
"9: \n\t" "9: \n\t"
// "int $3 \n\t" // "int $3 \n\t"
...@@ -689,22 +689,22 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode, ...@@ -689,22 +689,22 @@ static av_cold int init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"jmp 9f \n\t" "jmp 9f \n\t"
// Begin // Begin
"0: \n\t" "0: \n\t"
"movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t" "movq (%%"FF_REG_d", %%"FF_REG_a"), %%mm3 \n\t"
"movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t" "movd (%%"FF_REG_c", %%"FF_REG_S"), %%mm0 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"pshufw $0xFF, %%mm0, %%mm1 \n\t" "pshufw $0xFF, %%mm0, %%mm1 \n\t"
"1: \n\t" "1: \n\t"
"pshufw $0xFF, %%mm0, %%mm0 \n\t" "pshufw $0xFF, %%mm0, %%mm0 \n\t"
"2: \n\t" "2: \n\t"
"psubw %%mm1, %%mm0 \n\t" "psubw %%mm1, %%mm0 \n\t"
"movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t" "movl 8(%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"
"pmullw %%mm3, %%mm0 \n\t" "pmullw %%mm3, %%mm0 \n\t"
"psllw $7, %%mm1 \n\t" "psllw $7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t" "paddw %%mm1, %%mm0 \n\t"
"movq %%mm0, (%%"REG_D", %%"REG_a") \n\t" "movq %%mm0, (%%"FF_REG_D", %%"FF_REG_a") \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
// End // End
"9: \n\t" "9: \n\t"
// "int $3 \n\t" // "int $3 \n\t"
......
...@@ -1109,43 +1109,43 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr ...@@ -1109,43 +1109,43 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
unsigned i; unsigned i;
x86_reg mmx_size= 23 - src_size; x86_reg mmx_size= 23 - src_size;
__asm__ volatile ( __asm__ volatile (
"test %%"REG_a", %%"REG_a" \n\t" "test %%"FF_REG_a", %%"FF_REG_a" \n\t"
"jns 2f \n\t" "jns 2f \n\t"
"movq "MANGLE(mask24r)", %%mm5 \n\t" "movq "MANGLE(mask24r)", %%mm5 \n\t"
"movq "MANGLE(mask24g)", %%mm6 \n\t" "movq "MANGLE(mask24g)", %%mm6 \n\t"
"movq "MANGLE(mask24b)", %%mm7 \n\t" "movq "MANGLE(mask24b)", %%mm7 \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1, %%"REG_a") \n\t" PREFETCH" 32(%1, %%"FF_REG_a") \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG "movq (%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
"movq (%1, %%"REG_a"), %%mm1 \n\t" // BGR BGR BG "movq (%1, %%"FF_REG_a"), %%mm1 \n\t" // BGR BGR BG
"movq 2(%1, %%"REG_a"), %%mm2 \n\t" // R BGR BGR B "movq 2(%1, %%"FF_REG_a"), %%mm2 \n\t" // R BGR BGR B
"psllq $16, %%mm0 \n\t" // 00 BGR BGR "psllq $16, %%mm0 \n\t" // 00 BGR BGR
"pand %%mm5, %%mm0 \n\t" "pand %%mm5, %%mm0 \n\t"
"pand %%mm6, %%mm1 \n\t" "pand %%mm6, %%mm1 \n\t"
"pand %%mm7, %%mm2 \n\t" "pand %%mm7, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t" "por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t"
"movq 6(%1, %%"REG_a"), %%mm0 \n\t" // BGR BGR BG "movq 6(%1, %%"FF_REG_a"), %%mm0 \n\t" // BGR BGR BG
MOVNTQ" %%mm1, (%2, %%"REG_a") \n\t" // RGB RGB RG MOVNTQ" %%mm1, (%2, %%"FF_REG_a") \n\t" // RGB RGB RG
"movq 8(%1, %%"REG_a"), %%mm1 \n\t" // R BGR BGR B "movq 8(%1, %%"FF_REG_a"), %%mm1 \n\t" // R BGR BGR B
"movq 10(%1, %%"REG_a"), %%mm2 \n\t" // GR BGR BGR "movq 10(%1, %%"FF_REG_a"), %%mm2 \n\t" // GR BGR BGR
"pand %%mm7, %%mm0 \n\t" "pand %%mm7, %%mm0 \n\t"
"pand %%mm5, %%mm1 \n\t" "pand %%mm5, %%mm1 \n\t"
"pand %%mm6, %%mm2 \n\t" "pand %%mm6, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t" "por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t"
"movq 14(%1, %%"REG_a"), %%mm0 \n\t" // R BGR BGR B "movq 14(%1, %%"FF_REG_a"), %%mm0 \n\t" // R BGR BGR B
MOVNTQ" %%mm1, 8(%2, %%"REG_a") \n\t" // B RGB RGB R MOVNTQ" %%mm1, 8(%2, %%"FF_REG_a")\n\t" // B RGB RGB R
"movq 16(%1, %%"REG_a"), %%mm1 \n\t" // GR BGR BGR "movq 16(%1, %%"FF_REG_a"), %%mm1 \n\t" // GR BGR BGR
"movq 18(%1, %%"REG_a"), %%mm2 \n\t" // BGR BGR BG "movq 18(%1, %%"FF_REG_a"), %%mm2 \n\t" // BGR BGR BG
"pand %%mm6, %%mm0 \n\t" "pand %%mm6, %%mm0 \n\t"
"pand %%mm7, %%mm1 \n\t" "pand %%mm7, %%mm1 \n\t"
"pand %%mm5, %%mm2 \n\t" "pand %%mm5, %%mm2 \n\t"
"por %%mm0, %%mm1 \n\t" "por %%mm0, %%mm1 \n\t"
"por %%mm2, %%mm1 \n\t" "por %%mm2, %%mm1 \n\t"
MOVNTQ" %%mm1, 16(%2, %%"REG_a") \n\t" MOVNTQ" %%mm1, 16(%2, %%"FF_REG_a")\n\t"
"add $24, %%"REG_a" \n\t" "add $24, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
"2: \n\t" "2: \n\t"
: "+a" (mmx_size) : "+a" (mmx_size)
...@@ -1180,20 +1180,20 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ...@@ -1180,20 +1180,20 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
for (y=0; y<height; y++) { for (y=0; y<height; y++) {
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t" PREFETCH" 32(%2, %%"FF_REG_a") \n\t"
PREFETCH" 32(%3, %%"REG_a") \n\t" PREFETCH" 32(%3, %%"FF_REG_a") \n\t"
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) "movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0)
"movq %%mm0, %%mm2 \n\t" // U(0) "movq %%mm0, %%mm2 \n\t" // U(0)
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) "movq (%3, %%"FF_REG_a"), %%mm1 \n\t" // V(0)
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) "movq (%1, %%"FF_REG_a",2), %%mm3 \n\t" // Y(0)
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) "movq 8(%1, %%"FF_REG_a",2), %%mm5 \n\t" // Y(8)
"movq %%mm3, %%mm4 \n\t" // Y(0) "movq %%mm3, %%mm4 \n\t" // Y(0)
"movq %%mm5, %%mm6 \n\t" // Y(8) "movq %%mm5, %%mm6 \n\t" // Y(8)
"punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0) "punpcklbw %%mm0, %%mm3 \n\t" // YUYV YUYV(0)
...@@ -1201,16 +1201,16 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u ...@@ -1201,16 +1201,16 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
"punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8) "punpcklbw %%mm2, %%mm5 \n\t" // YUYV YUYV(8)
"punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12) "punpckhbw %%mm2, %%mm6 \n\t" // YUYV YUYV(12)
MOVNTQ" %%mm3, (%0, %%"REG_a", 4) \n\t" MOVNTQ" %%mm3, (%0, %%"FF_REG_a", 4) \n\t"
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
MOVNTQ" %%mm5, 16(%0, %%"REG_a", 4) \n\t" MOVNTQ" %%mm5, 16(%0, %%"FF_REG_a", 4) \n\t"
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a"\n\t"
"cmp %4, %%"REG_a" \n\t" "cmp %4, %%"FF_REG_a"\n\t"
" jb 1b \n\t" " jb 1b \n\t"
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"REG_a : "%"FF_REG_a
); );
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
usrc += chromStride; usrc += chromStride;
...@@ -1245,20 +1245,20 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ...@@ -1245,20 +1245,20 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
for (y=0; y<height; y++) { for (y=0; y<height; y++) {
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 32(%1, %%"REG_a", 2) \n\t" PREFETCH" 32(%1, %%"FF_REG_a", 2) \n\t"
PREFETCH" 32(%2, %%"REG_a") \n\t" PREFETCH" 32(%2, %%"FF_REG_a") \n\t"
PREFETCH" 32(%3, %%"REG_a") \n\t" PREFETCH" 32(%3, %%"FF_REG_a") \n\t"
"movq (%2, %%"REG_a"), %%mm0 \n\t" // U(0) "movq (%2, %%"FF_REG_a"), %%mm0 \n\t" // U(0)
"movq %%mm0, %%mm2 \n\t" // U(0) "movq %%mm0, %%mm2 \n\t" // U(0)
"movq (%3, %%"REG_a"), %%mm1 \n\t" // V(0) "movq (%3, %%"FF_REG_a"), %%mm1 \n\t" // V(0)
"punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0) "punpcklbw %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8) "punpckhbw %%mm1, %%mm2 \n\t" // UVUV UVUV(8)
"movq (%1, %%"REG_a",2), %%mm3 \n\t" // Y(0) "movq (%1, %%"FF_REG_a",2), %%mm3 \n\t" // Y(0)
"movq 8(%1, %%"REG_a",2), %%mm5 \n\t" // Y(8) "movq 8(%1, %%"FF_REG_a",2), %%mm5 \n\t" // Y(8)
"movq %%mm0, %%mm4 \n\t" // Y(0) "movq %%mm0, %%mm4 \n\t" // Y(0)
"movq %%mm2, %%mm6 \n\t" // Y(8) "movq %%mm2, %%mm6 \n\t" // Y(8)
"punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0) "punpcklbw %%mm3, %%mm0 \n\t" // YUYV YUYV(0)
...@@ -1266,16 +1266,16 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u ...@@ -1266,16 +1266,16 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
"punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8) "punpcklbw %%mm5, %%mm2 \n\t" // YUYV YUYV(8)
"punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12) "punpckhbw %%mm5, %%mm6 \n\t" // YUYV YUYV(12)
MOVNTQ" %%mm0, (%0, %%"REG_a", 4) \n\t" MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 4) \n\t"
MOVNTQ" %%mm4, 8(%0, %%"REG_a", 4) \n\t" MOVNTQ" %%mm4, 8(%0, %%"FF_REG_a", 4) \n\t"
MOVNTQ" %%mm2, 16(%0, %%"REG_a", 4) \n\t" MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 4) \n\t"
MOVNTQ" %%mm6, 24(%0, %%"REG_a", 4) \n\t" MOVNTQ" %%mm6, 24(%0, %%"FF_REG_a", 4) \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t" "cmp %4, %%"FF_REG_a" \n\t"
" jb 1b \n\t" " jb 1b \n\t"
::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth) ::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
: "%"REG_a : "%"FF_REG_a
); );
if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
usrc += chromStride; usrc += chromStride;
...@@ -1333,14 +1333,14 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1333,14 +1333,14 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) { for (y=0; y<height; y+=2) {
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00... "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
"movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0) "movq %%mm0, %%mm2 \n\t" // YUYV YUYV(0)
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4) "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(4)
"psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0) "psrlw $8, %%mm0 \n\t" // U0V0 U0V0(0)
...@@ -1350,10 +1350,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1350,10 +1350,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2)\n\t"
"movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8) "movq 16(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(8)
"movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12) "movq 24(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(12)
"movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8) "movq %%mm1, %%mm3 \n\t" // YUYV YUYV(8)
"movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12) "movq %%mm2, %%mm4 \n\t" // YUYV YUYV(12)
"psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8) "psrlw $8, %%mm1 \n\t" // U0V0 U0V0(8)
...@@ -1363,7 +1363,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1363,7 +1363,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
...@@ -1374,28 +1374,28 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1374,28 +1374,28 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a"\n\t"
"cmp %4, %%"REG_a" \n\t" "cmp %4, %%"FF_REG_a"\n\t"
" jb 1b \n\t" " jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a : "memory", "%"FF_REG_a
); );
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a"\n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
"movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
"movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
"pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0) "pand %%mm7, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
"pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4) "pand %%mm7, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
"pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8) "pand %%mm7, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
...@@ -1403,15 +1403,15 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1403,15 +1403,15 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a"\n\t"
"cmp %4, %%"REG_a" \n\t" "cmp %4, %%"FF_REG_a"\n\t"
" jb 1b \n\t" " jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a : "memory", "%"FF_REG_a
); );
udst += chromStride; udst += chromStride;
vdst += chromStride; vdst += chromStride;
...@@ -1443,23 +1443,23 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid ...@@ -1443,23 +1443,23 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
for (y=1; y<srcHeight; y++) { for (y=1; y<srcHeight; y++) {
const x86_reg mmxSize= srcWidth&~15; const x86_reg mmxSize= srcWidth&~15;
__asm__ volatile( __asm__ volatile(
"mov %4, %%"REG_a" \n\t" "mov %4, %%"FF_REG_a" \n\t"
"movq "MANGLE(mmx_ff)", %%mm0 \n\t" "movq "MANGLE(mmx_ff)", %%mm0 \n\t"
"movq (%0, %%"REG_a"), %%mm4 \n\t" "movq (%0, %%"FF_REG_a"), %%mm4 \n\t"
"movq %%mm4, %%mm2 \n\t" "movq %%mm4, %%mm2 \n\t"
"psllq $8, %%mm4 \n\t" "psllq $8, %%mm4 \n\t"
"pand %%mm0, %%mm2 \n\t" "pand %%mm0, %%mm2 \n\t"
"por %%mm2, %%mm4 \n\t" "por %%mm2, %%mm4 \n\t"
"movq (%1, %%"REG_a"), %%mm5 \n\t" "movq (%1, %%"FF_REG_a"), %%mm5 \n\t"
"movq %%mm5, %%mm3 \n\t" "movq %%mm5, %%mm3 \n\t"
"psllq $8, %%mm5 \n\t" "psllq $8, %%mm5 \n\t"
"pand %%mm0, %%mm3 \n\t" "pand %%mm0, %%mm3 \n\t"
"por %%mm3, %%mm5 \n\t" "por %%mm3, %%mm5 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t" "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
"movq 1(%0, %%"REG_a"), %%mm2 \n\t" "movq 1(%0, %%"FF_REG_a"), %%mm2 \n\t"
"movq 1(%1, %%"REG_a"), %%mm3 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm3 \n\t"
PAVGB" %%mm0, %%mm5 \n\t" PAVGB" %%mm0, %%mm5 \n\t"
PAVGB" %%mm0, %%mm3 \n\t" PAVGB" %%mm0, %%mm3 \n\t"
PAVGB" %%mm0, %%mm5 \n\t" PAVGB" %%mm0, %%mm5 \n\t"
...@@ -1474,18 +1474,18 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid ...@@ -1474,18 +1474,18 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
"punpckhbw %%mm3, %%mm7 \n\t" "punpckhbw %%mm3, %%mm7 \n\t"
"punpcklbw %%mm2, %%mm4 \n\t" "punpcklbw %%mm2, %%mm4 \n\t"
"punpckhbw %%mm2, %%mm6 \n\t" "punpckhbw %%mm2, %%mm6 \n\t"
MOVNTQ" %%mm5, (%2, %%"REG_a", 2) \n\t" MOVNTQ" %%mm5, (%2, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm7, 8(%2, %%"REG_a", 2) \n\t" MOVNTQ" %%mm7, 8(%2, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm4, (%3, %%"REG_a", 2) \n\t" MOVNTQ" %%mm4, (%3, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm6, 8(%3, %%"REG_a", 2) \n\t" MOVNTQ" %%mm6, 8(%3, %%"FF_REG_a", 2) \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
"movq -1(%0, %%"REG_a"), %%mm4 \n\t" "movq -1(%0, %%"FF_REG_a"), %%mm4 \n\t"
"movq -1(%1, %%"REG_a"), %%mm5 \n\t" "movq -1(%1, %%"FF_REG_a"), %%mm5 \n\t"
" js 1b \n\t" " js 1b \n\t"
:: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ), :: "r" (src + mmxSize ), "r" (src + srcStride + mmxSize ),
"r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2), "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
"g" (-mmxSize) "g" (-mmxSize)
: "%"REG_a : "%"FF_REG_a
); );
for (x=mmxSize-1; x<srcWidth-1; x++) { for (x=mmxSize-1; x<srcWidth-1; x++) {
...@@ -1531,14 +1531,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1531,14 +1531,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
const x86_reg chromWidth= width>>1; const x86_reg chromWidth= width>>1;
for (y=0; y<height; y+=2) { for (y=0; y<height; y+=2) {
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"pcmpeqw %%mm7, %%mm7 \n\t" "pcmpeqw %%mm7, %%mm7 \n\t"
"psrlw $8, %%mm7 \n\t" // FF,00,FF,00... "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0) "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // UYVY UYVY(0)
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4) "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // UYVY UYVY(4)
"movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0) "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4) "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
"pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0) "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
...@@ -1548,10 +1548,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1548,10 +1548,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0) "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0) "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
MOVNTQ" %%mm2, (%1, %%"REG_a", 2) \n\t" MOVNTQ" %%mm2, (%1, %%"FF_REG_a", 2) \n\t"
"movq 16(%0, %%"REG_a", 4), %%mm1 \n\t" // UYVY UYVY(8) "movq 16(%0, %%"FF_REG_a", 4), %%mm1\n\t" // UYVY UYVY(8)
"movq 24(%0, %%"REG_a", 4), %%mm2 \n\t" // UYVY UYVY(12) "movq 24(%0, %%"FF_REG_a", 4), %%mm2\n\t" // UYVY UYVY(12)
"movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8) "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
"movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12) "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
"pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8) "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
...@@ -1561,7 +1561,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1561,7 +1561,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8) "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
"packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8) "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
MOVNTQ" %%mm3, 8(%1, %%"REG_a", 2) \n\t" MOVNTQ" %%mm3, 8(%1, %%"FF_REG_a", 2) \n\t"
"movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0) "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
"movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8) "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
...@@ -1572,28 +1572,28 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1572,28 +1572,28 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0) "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
"packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0) "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
MOVNTQ" %%mm0, (%3, %%"REG_a") \n\t" MOVNTQ" %%mm0, (%3, %%"FF_REG_a") \n\t"
MOVNTQ" %%mm2, (%2, %%"REG_a") \n\t" MOVNTQ" %%mm2, (%2, %%"FF_REG_a") \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t" "cmp %4, %%"FF_REG_a" \n\t"
" jb 1b \n\t" " jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a : "memory", "%"FF_REG_a
); );
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
__asm__ volatile( __asm__ volatile(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_a", 4) \n\t" PREFETCH" 64(%0, %%"FF_REG_a", 4) \n\t"
"movq (%0, %%"REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0) "movq (%0, %%"FF_REG_a", 4), %%mm0 \n\t" // YUYV YUYV(0)
"movq 8(%0, %%"REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4) "movq 8(%0, %%"FF_REG_a", 4), %%mm1 \n\t" // YUYV YUYV(4)
"movq 16(%0, %%"REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8) "movq 16(%0, %%"FF_REG_a", 4), %%mm2 \n\t" // YUYV YUYV(8)
"movq 24(%0, %%"REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12) "movq 24(%0, %%"FF_REG_a", 4), %%mm3 \n\t" // YUYV YUYV(12)
"psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0) "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
"psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4) "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
"psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8) "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
...@@ -1601,15 +1601,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t ...@@ -1601,15 +1601,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0) "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
"packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8) "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
MOVNTQ" %%mm0, (%1, %%"REG_a", 2) \n\t" MOVNTQ" %%mm0, (%1, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm2, 8(%1, %%"REG_a", 2) \n\t" MOVNTQ" %%mm2, 8(%1, %%"FF_REG_a", 2) \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
"cmp %4, %%"REG_a" \n\t" "cmp %4, %%"FF_REG_a" \n\t"
" jb 1b \n\t" " jb 1b \n\t"
::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
: "memory", "%"REG_a : "memory", "%"FF_REG_a
); );
udst += chromStride; udst += chromStride;
vdst += chromStride; vdst += chromStride;
...@@ -1639,20 +1639,20 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1639,20 +1639,20 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
int i; int i;
for (i=0; i<2; i++) { for (i=0; i<2; i++) {
__asm__ volatile( __asm__ volatile(
"mov %2, %%"REG_a" \n\t" "mov %2, %%"FF_REG_a"\n\t"
"movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
"movq "MANGLE(ff_w1111)", %%mm5 \n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d"\n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_d") \n\t" PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
"movd (%0, %%"REG_d"), %%mm0 \n\t" "movd (%0, %%"FF_REG_d"), %%mm0 \n\t"
"movd 3(%0, %%"REG_d"), %%mm1 \n\t" "movd 3(%0, %%"FF_REG_d"), %%mm1 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"movd 6(%0, %%"REG_d"), %%mm2 \n\t" "movd 6(%0, %%"FF_REG_d"), %%mm2 \n\t"
"movd 9(%0, %%"REG_d"), %%mm3 \n\t" "movd 9(%0, %%"FF_REG_d"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm3 \n\t" "punpcklbw %%mm7, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm0 \n\t" "pmaddwd %%mm6, %%mm0 \n\t"
...@@ -1672,12 +1672,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1672,12 +1672,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm2, %%mm0 \n\t" "packssdw %%mm2, %%mm0 \n\t"
"psraw $7, %%mm0 \n\t" "psraw $7, %%mm0 \n\t"
"movd 12(%0, %%"REG_d"), %%mm4 \n\t" "movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
"movd 15(%0, %%"REG_d"), %%mm1 \n\t" "movd 15(%0, %%"FF_REG_d"), %%mm1 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"movd 18(%0, %%"REG_d"), %%mm2 \n\t" "movd 18(%0, %%"FF_REG_d"), %%mm2 \n\t"
"movd 21(%0, %%"REG_d"), %%mm3 \n\t" "movd 21(%0, %%"FF_REG_d"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
"punpcklbw %%mm7, %%mm3 \n\t" "punpcklbw %%mm7, %%mm3 \n\t"
"pmaddwd %%mm6, %%mm4 \n\t" "pmaddwd %%mm6, %%mm4 \n\t"
...@@ -1694,39 +1694,39 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1694,39 +1694,39 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm3, %%mm2 \n\t" "packssdw %%mm3, %%mm2 \n\t"
"pmaddwd %%mm5, %%mm4 \n\t" "pmaddwd %%mm5, %%mm4 \n\t"
"pmaddwd %%mm5, %%mm2 \n\t" "pmaddwd %%mm5, %%mm2 \n\t"
"add $24, %%"REG_d" \n\t" "add $24, %%"FF_REG_d"\n\t"
"packssdw %%mm2, %%mm4 \n\t" "packssdw %%mm2, %%mm4 \n\t"
"psraw $7, %%mm4 \n\t" "psraw $7, %%mm4 \n\t"
"packuswb %%mm4, %%mm0 \n\t" "packuswb %%mm4, %%mm0 \n\t"
"paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t" "paddusb "MANGLE(ff_bgr2YOffset)", %%mm0 \n\t"
MOVNTQ" %%mm0, (%1, %%"REG_a") \n\t" MOVNTQ" %%mm0, (%1, %%"FF_REG_a") \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width) : : "r" (src+width*3), "r" (ydst+width), "g" ((x86_reg)-width)
: "%"REG_a, "%"REG_d : "%"FF_REG_a, "%"FF_REG_d
); );
ydst += lumStride; ydst += lumStride;
src += srcStride; src += srcStride;
} }
src -= srcStride*2; src -= srcStride*2;
__asm__ volatile( __asm__ volatile(
"mov %4, %%"REG_a" \n\t" "mov %4, %%"FF_REG_a"\n\t"
"movq "MANGLE(ff_w1111)", %%mm5 \n\t" "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
"movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t" "movq "MANGLE(ff_bgr2UCoeff)", %%mm6 \n\t"
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_d" \n\t" "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_d" \n\t"
"add %%"REG_d", %%"REG_d" \n\t" "add %%"FF_REG_d", %%"FF_REG_d"\n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%0, %%"REG_d") \n\t" PREFETCH" 64(%0, %%"FF_REG_d") \n\t"
PREFETCH" 64(%1, %%"REG_d") \n\t" PREFETCH" 64(%1, %%"FF_REG_d") \n\t"
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq (%0, %%"REG_d"), %%mm0 \n\t" "movq (%0, %%"FF_REG_d"), %%mm0 \n\t"
"movq (%1, %%"REG_d"), %%mm1 \n\t" "movq (%1, %%"FF_REG_d"), %%mm1 \n\t"
"movq 6(%0, %%"REG_d"), %%mm2 \n\t" "movq 6(%0, %%"FF_REG_d"), %%mm2 \n\t"
"movq 6(%1, %%"REG_d"), %%mm3 \n\t" "movq 6(%1, %%"FF_REG_d"), %%mm3 \n\t"
PAVGB" %%mm1, %%mm0 \n\t" PAVGB" %%mm1, %%mm0 \n\t"
PAVGB" %%mm3, %%mm2 \n\t" PAVGB" %%mm3, %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
...@@ -1738,10 +1738,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1738,10 +1738,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
#else #else
"movd (%0, %%"REG_d"), %%mm0 \n\t" "movd (%0, %%"FF_REG_d"), %%mm0 \n\t"
"movd (%1, %%"REG_d"), %%mm1 \n\t" "movd (%1, %%"FF_REG_d"), %%mm1 \n\t"
"movd 3(%0, %%"REG_d"), %%mm2 \n\t" "movd 3(%0, %%"FF_REG_d"), %%mm2 \n\t"
"movd 3(%1, %%"REG_d"), %%mm3 \n\t" "movd 3(%1, %%"FF_REG_d"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -1749,10 +1749,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1749,10 +1749,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"paddw %%mm1, %%mm0 \n\t" "paddw %%mm1, %%mm0 \n\t"
"paddw %%mm3, %%mm2 \n\t" "paddw %%mm3, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t"
"movd 6(%0, %%"REG_d"), %%mm4 \n\t" "movd 6(%0, %%"FF_REG_d"), %%mm4 \n\t"
"movd 6(%1, %%"REG_d"), %%mm1 \n\t" "movd 6(%1, %%"FF_REG_d"), %%mm1 \n\t"
"movd 9(%0, %%"REG_d"), %%mm2 \n\t" "movd 9(%0, %%"FF_REG_d"), %%mm2 \n\t"
"movd 9(%1, %%"REG_d"), %%mm3 \n\t" "movd 9(%1, %%"FF_REG_d"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -1784,10 +1784,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1784,10 +1784,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psraw $7, %%mm0 \n\t" "psraw $7, %%mm0 \n\t"
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW #if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq 12(%0, %%"REG_d"), %%mm4 \n\t" "movq 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
"movq 12(%1, %%"REG_d"), %%mm1 \n\t" "movq 12(%1, %%"FF_REG_d"), %%mm1 \n\t"
"movq 18(%0, %%"REG_d"), %%mm2 \n\t" "movq 18(%0, %%"FF_REG_d"), %%mm2 \n\t"
"movq 18(%1, %%"REG_d"), %%mm3 \n\t" "movq 18(%1, %%"FF_REG_d"), %%mm3 \n\t"
PAVGB" %%mm1, %%mm4 \n\t" PAVGB" %%mm1, %%mm4 \n\t"
PAVGB" %%mm3, %%mm2 \n\t" PAVGB" %%mm3, %%mm2 \n\t"
"movq %%mm4, %%mm1 \n\t" "movq %%mm4, %%mm1 \n\t"
...@@ -1799,10 +1799,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1799,10 +1799,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
#else #else
"movd 12(%0, %%"REG_d"), %%mm4 \n\t" "movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t"
"movd 12(%1, %%"REG_d"), %%mm1 \n\t" "movd 12(%1, %%"FF_REG_d"), %%mm1 \n\t"
"movd 15(%0, %%"REG_d"), %%mm2 \n\t" "movd 15(%0, %%"FF_REG_d"), %%mm2 \n\t"
"movd 15(%1, %%"REG_d"), %%mm3 \n\t" "movd 15(%1, %%"FF_REG_d"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm4 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -1810,10 +1810,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1810,10 +1810,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"paddw %%mm1, %%mm4 \n\t" "paddw %%mm1, %%mm4 \n\t"
"paddw %%mm3, %%mm2 \n\t" "paddw %%mm3, %%mm2 \n\t"
"paddw %%mm2, %%mm4 \n\t" "paddw %%mm2, %%mm4 \n\t"
"movd 18(%0, %%"REG_d"), %%mm5 \n\t" "movd 18(%0, %%"FF_REG_d"), %%mm5 \n\t"
"movd 18(%1, %%"REG_d"), %%mm1 \n\t" "movd 18(%1, %%"FF_REG_d"), %%mm1 \n\t"
"movd 21(%0, %%"REG_d"), %%mm2 \n\t" "movd 21(%0, %%"FF_REG_d"), %%mm2 \n\t"
"movd 21(%1, %%"REG_d"), %%mm3 \n\t" "movd 21(%1, %%"FF_REG_d"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm5 \n\t" "punpcklbw %%mm7, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -1842,7 +1842,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1842,7 +1842,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm3, %%mm1 \n\t" "packssdw %%mm3, %%mm1 \n\t"
"pmaddwd %%mm5, %%mm4 \n\t" "pmaddwd %%mm5, %%mm4 \n\t"
"pmaddwd %%mm5, %%mm1 \n\t" "pmaddwd %%mm5, %%mm1 \n\t"
"add $24, %%"REG_d" \n\t" "add $24, %%"FF_REG_d"\n\t"
"packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2
"psraw $7, %%mm4 \n\t" "psraw $7, %%mm4 \n\t"
...@@ -1851,13 +1851,13 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ ...@@ -1851,13 +1851,13 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpckhdq %%mm4, %%mm1 \n\t" "punpckhdq %%mm4, %%mm1 \n\t"
"packsswb %%mm1, %%mm0 \n\t" "packsswb %%mm1, %%mm0 \n\t"
"paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t" "paddb "MANGLE(ff_bgr2UVOffset)", %%mm0 \n\t"
"movd %%mm0, (%2, %%"REG_a") \n\t" "movd %%mm0, (%2, %%"FF_REG_a") \n\t"
"punpckhdq %%mm0, %%mm0 \n\t" "punpckhdq %%mm0, %%mm0 \n\t"
"movd %%mm0, (%3, %%"REG_a") \n\t" "movd %%mm0, (%3, %%"FF_REG_a") \n\t"
"add $4, %%"REG_a" \n\t" "add $4, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth) : : "r" (src+chromWidth*6), "r" (src+srcStride+chromWidth*6), "r" (udst+chromWidth), "r" (vdst+chromWidth), "g" (-chromWidth)
: "%"REG_a, "%"REG_d : "%"FF_REG_a, "%"FF_REG_d
); );
udst += chromStride; udst += chromStride;
...@@ -1885,48 +1885,48 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui ...@@ -1885,48 +1885,48 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
#if COMPILE_TEMPLATE_SSE2 #if COMPILE_TEMPLATE_SSE2
__asm__( __asm__(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%1, %%"REG_a") \n\t" PREFETCH" 64(%1, %%"FF_REG_a") \n\t"
PREFETCH" 64(%2, %%"REG_a") \n\t" PREFETCH" 64(%2, %%"FF_REG_a") \n\t"
"movdqa (%1, %%"REG_a"), %%xmm0 \n\t" "movdqa (%1, %%"FF_REG_a"), %%xmm0 \n\t"
"movdqa (%1, %%"REG_a"), %%xmm1 \n\t" "movdqa (%1, %%"FF_REG_a"), %%xmm1 \n\t"
"movdqa (%2, %%"REG_a"), %%xmm2 \n\t" "movdqa (%2, %%"FF_REG_a"), %%xmm2 \n\t"
"punpcklbw %%xmm2, %%xmm0 \n\t" "punpcklbw %%xmm2, %%xmm0 \n\t"
"punpckhbw %%xmm2, %%xmm1 \n\t" "punpckhbw %%xmm2, %%xmm1 \n\t"
"movntdq %%xmm0, (%0, %%"REG_a", 2) \n\t" "movntdq %%xmm0, (%0, %%"FF_REG_a", 2) \n\t"
"movntdq %%xmm1, 16(%0, %%"REG_a", 2) \n\t" "movntdq %%xmm1, 16(%0, %%"FF_REG_a", 2) \n\t"
"add $16, %%"REG_a" \n\t" "add $16, %%"FF_REG_a" \n\t"
"cmp %3, %%"REG_a" \n\t" "cmp %3, %%"FF_REG_a" \n\t"
" jb 1b \n\t" " jb 1b \n\t"
::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
: "memory", "%"REG_a"" : "memory", "%"FF_REG_a""
); );
#else #else
__asm__( __asm__(
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"1: \n\t" "1: \n\t"
PREFETCH" 64(%1, %%"REG_a") \n\t" PREFETCH" 64(%1, %%"FF_REG_a") \n\t"
PREFETCH" 64(%2, %%"REG_a") \n\t" PREFETCH" 64(%2, %%"FF_REG_a") \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
"movq 8(%1, %%"REG_a"), %%mm2 \n\t" "movq 8(%1, %%"FF_REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"movq (%2, %%"REG_a"), %%mm4 \n\t" "movq (%2, %%"FF_REG_a"), %%mm4 \n\t"
"movq 8(%2, %%"REG_a"), %%mm5 \n\t" "movq 8(%2, %%"FF_REG_a"), %%mm5 \n\t"
"punpcklbw %%mm4, %%mm0 \n\t" "punpcklbw %%mm4, %%mm0 \n\t"
"punpckhbw %%mm4, %%mm1 \n\t" "punpckhbw %%mm4, %%mm1 \n\t"
"punpcklbw %%mm5, %%mm2 \n\t" "punpcklbw %%mm5, %%mm2 \n\t"
"punpckhbw %%mm5, %%mm3 \n\t" "punpckhbw %%mm5, %%mm3 \n\t"
MOVNTQ" %%mm0, (%0, %%"REG_a", 2) \n\t" MOVNTQ" %%mm0, (%0, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm1, 8(%0, %%"REG_a", 2) \n\t" MOVNTQ" %%mm1, 8(%0, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm2, 16(%0, %%"REG_a", 2) \n\t" MOVNTQ" %%mm2, 16(%0, %%"FF_REG_a", 2) \n\t"
MOVNTQ" %%mm3, 24(%0, %%"REG_a", 2) \n\t" MOVNTQ" %%mm3, 24(%0, %%"FF_REG_a", 2) \n\t"
"add $16, %%"REG_a" \n\t" "add $16, %%"FF_REG_a" \n\t"
"cmp %3, %%"REG_a" \n\t" "cmp %3, %%"FF_REG_a" \n\t"
" jb 1b \n\t" " jb 1b \n\t"
::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
: "memory", "%"REG_a : "memory", "%"FF_REG_a
); );
#endif #endif
for (w= (width&(~15)); w < width; w++) { for (w= (width&(~15)); w < width; w++) {
......
...@@ -42,46 +42,46 @@ ...@@ -42,46 +42,46 @@
#define YSCALEYUV2PACKEDX_UV \ #define YSCALEYUV2PACKEDX_UV \
__asm__ volatile(\ __asm__ volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"nop \n\t"\ "nop \n\t"\
"1: \n\t"\ "1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\ "movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\ "movq %%mm3, %%mm4 \n\t"\
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t" /* filterCoeff */\ "movq 8(%%"FF_REG_d"), %%mm0 \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* UsrcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* UsrcData */\
"add %6, %%"REG_S" \n\t" \ "add %6, %%"FF_REG_S" \n\t" \
"movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t" /* VsrcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm5 \n\t" /* VsrcData */\
"add $16, %%"REG_d" \n\t"\ "add $16, %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\ "pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\ "pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\ "paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\ "paddw %%mm5, %%mm4 \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\ "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
" jnz 2b \n\t"\ " jnz 2b \n\t"\
#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \ #define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
"lea "offset"(%0), %%"REG_d" \n\t"\ "lea "offset"(%0), %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\ "movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
"movq "#dst1", "#dst2" \n\t"\ "movq "#dst1", "#dst2" \n\t"\
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq 8(%%"REG_d"), "#coeff" \n\t" /* filterCoeff */\ "movq 8(%%"FF_REG_d"), "#coeff" \n\t" /* filterCoeff */\
"movq (%%"REG_S", %%"REG_a", 2), "#src1" \n\t" /* Y1srcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a", 2), "#src1" \n\t" /* Y1srcData */\
"movq 8(%%"REG_S", %%"REG_a", 2), "#src2" \n\t" /* Y2srcData */\ "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), "#src2" \n\t" /* Y2srcData */\
"add $16, %%"REG_d" \n\t"\ "add $16, %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"pmulhw "#coeff", "#src1" \n\t"\ "pmulhw "#coeff", "#src1" \n\t"\
"pmulhw "#coeff", "#src2" \n\t"\ "pmulhw "#coeff", "#src2" \n\t"\
"paddw "#src1", "#dst1" \n\t"\ "paddw "#src1", "#dst1" \n\t"\
"paddw "#src2", "#dst2" \n\t"\ "paddw "#src2", "#dst2" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\ "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
" jnz 2b \n\t"\ " jnz 2b \n\t"\
#define YSCALEYUV2PACKEDX \ #define YSCALEYUV2PACKEDX \
...@@ -92,41 +92,41 @@ ...@@ -92,41 +92,41 @@
:: "r" (&c->redDither), \ :: "r" (&c->redDither), \
"m" (dummy), "m" (dummy), "m" (dummy),\ "m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW_reg), "m"(uv_off) \ "r" (dest), "m" (dstW_reg), "m"(uv_off) \
: "%"REG_a, "%"REG_d, "%"REG_S \ : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_S \
); );
#define YSCALEYUV2PACKEDX_ACCURATE_UV \ #define YSCALEYUV2PACKEDX_ACCURATE_UV \
__asm__ volatile(\ __asm__ volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"nop \n\t"\ "nop \n\t"\
"1: \n\t"\ "1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"REG_d" \n\t"\ "lea "CHR_MMX_FILTER_OFFSET"(%0), %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\ "pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\ "pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t" /* UsrcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm0 \n\t" /* UsrcData */\
"add %6, %%"REG_S" \n\t" \ "add %6, %%"FF_REG_S" \n\t" \
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t" /* VsrcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm2 \n\t" /* VsrcData */\
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t" /* UsrcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm1 \n\t" /* UsrcData */\
"movq %%mm0, %%mm3 \n\t"\ "movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\ "punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\ "punpckhwd %%mm1, %%mm3 \n\t"\
"movq "STR(APCK_COEF)"(%%"REG_d"),%%mm1 \n\t" /* filterCoeff */\ "movq "STR(APCK_COEF)"(%%"FF_REG_d"),%%mm1 \n\t" /* filterCoeff */\
"pmaddwd %%mm1, %%mm0 \n\t"\ "pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\ "pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\ "paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\
"add %6, %%"REG_S" \n\t" \ "add %6, %%"FF_REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t" /* VsrcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a"), %%mm3 \n\t" /* VsrcData */\
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\ "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\ "punpckhwd %%mm3, %%mm0 \n\t"\
...@@ -148,30 +148,30 @@ ...@@ -148,30 +148,30 @@
"movq %%mm6, "V_TEMP"(%0) \n\t"\ "movq %%mm6, "V_TEMP"(%0) \n\t"\
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \ #define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
"lea "offset"(%0), %%"REG_d" \n\t"\ "lea "offset"(%0), %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"pxor %%mm1, %%mm1 \n\t"\ "pxor %%mm1, %%mm1 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\ "pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\ "pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\ "pxor %%mm6, %%mm6 \n\t"\
".p2align 4 \n\t"\ ".p2align 4 \n\t"\
"2: \n\t"\ "2: \n\t"\
"movq (%%"REG_S", %%"REG_a", 2), %%mm0 \n\t" /* Y1srcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm0 \n\t" /* Y1srcData */\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm2 \n\t" /* Y2srcData */\ "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm2 \n\t" /* Y2srcData */\
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_PTR2)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a", 2), %%mm4 \n\t" /* Y1srcData */\ "movq (%%"FF_REG_S", %%"FF_REG_a", 2), %%mm4 \n\t" /* Y1srcData */\
"movq %%mm0, %%mm3 \n\t"\ "movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm4, %%mm0 \n\t"\ "punpcklwd %%mm4, %%mm0 \n\t"\
"punpckhwd %%mm4, %%mm3 \n\t"\ "punpckhwd %%mm4, %%mm3 \n\t"\
"movq "STR(APCK_COEF)"(%%"REG_d"), %%mm4 \n\t" /* filterCoeff */\ "movq "STR(APCK_COEF)"(%%"FF_REG_d"), %%mm4 \n\t" /* filterCoeff */\
"pmaddwd %%mm4, %%mm0 \n\t"\ "pmaddwd %%mm4, %%mm0 \n\t"\
"pmaddwd %%mm4, %%mm3 \n\t"\ "pmaddwd %%mm4, %%mm3 \n\t"\
"paddd %%mm0, %%mm1 \n\t"\ "paddd %%mm0, %%mm1 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\ "paddd %%mm3, %%mm5 \n\t"\
"movq 8(%%"REG_S", %%"REG_a", 2), %%mm3 \n\t" /* Y2srcData */\ "movq 8(%%"FF_REG_S", %%"FF_REG_a", 2), %%mm3 \n\t" /* Y2srcData */\
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\ "mov "STR(APCK_SIZE)"(%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\ "add $"STR(APCK_SIZE)", %%"FF_REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\ "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
"movq %%mm2, %%mm0 \n\t"\ "movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\ "punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\ "punpckhwd %%mm3, %%mm0 \n\t"\
...@@ -278,13 +278,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter, ...@@ -278,13 +278,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1 \n\t" "psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t" "psraw $3, %%mm7 \n\t"
"packuswb %%mm7, %%mm1 \n\t" "packuswb %%mm7, %%mm1 \n\t"
WRITEBGR32(%4, %5, %%REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6) WRITEBGR32(%4, %5, %%FF_REGa, %%mm3, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm2, %%mm6)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} else { } else {
YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) WRITEBGR32(%4, %5, %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} }
} }
...@@ -307,13 +307,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter, ...@@ -307,13 +307,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1 \n\t" "psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t" "psraw $3, %%mm7 \n\t"
"packuswb %%mm7, %%mm1 \n\t" "packuswb %%mm7, %%mm1 \n\t"
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) WRITEBGR32(%4, %5, %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} else { } else {
YSCALEYUV2PACKEDX YSCALEYUV2PACKEDX
YSCALEYUV2RGBX YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
WRITEBGR32(%4, %5, %%REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) WRITEBGR32(%4, %5, %%FF_REGa, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} }
} }
...@@ -366,7 +366,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter, ...@@ -366,7 +366,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t" "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
"paddusb "RED_DITHER"(%0), %%mm5\n\t" "paddusb "RED_DITHER"(%0), %%mm5\n\t"
#endif #endif
WRITERGB16(%4, %5, %%REGa) WRITERGB16(%4, %5, %%FF_REGa)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} }
...@@ -390,7 +390,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter, ...@@ -390,7 +390,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
"paddusb "RED_DITHER"(%0), %%mm5 \n\t" "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
#endif #endif
WRITERGB16(%4, %5, %%REGa) WRITERGB16(%4, %5, %%FF_REGa)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} }
...@@ -443,7 +443,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter, ...@@ -443,7 +443,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4\n\t" "paddusb "GREEN_DITHER"(%0), %%mm4\n\t"
"paddusb "RED_DITHER"(%0), %%mm5\n\t" "paddusb "RED_DITHER"(%0), %%mm5\n\t"
#endif #endif
WRITERGB15(%4, %5, %%REGa) WRITERGB15(%4, %5, %%FF_REGa)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} }
...@@ -467,7 +467,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter, ...@@ -467,7 +467,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "GREEN_DITHER"(%0), %%mm4 \n\t" "paddusb "GREEN_DITHER"(%0), %%mm4 \n\t"
"paddusb "RED_DITHER"(%0), %%mm5 \n\t" "paddusb "RED_DITHER"(%0), %%mm5 \n\t"
#endif #endif
WRITERGB15(%4, %5, %%REGa) WRITERGB15(%4, %5, %%FF_REGa)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} }
...@@ -593,14 +593,14 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter, ...@@ -593,14 +593,14 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2PACKEDX_ACCURATE YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c"\n\t" //FIXME optimize "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" // FIXME optimize
"add %4, %%"REG_c" \n\t" "add %4, %%"FF_REG_c" \n\t"
WRITEBGR24(%%REGc, %5, %%REGa) WRITEBGR24(%%FF_REGc, %5, %%FF_REGa)
:: "r" (&c->redDither), :: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off) "r" (dest), "m" (dstW_reg), "m"(uv_off)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
); );
} }
...@@ -617,14 +617,14 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter, ...@@ -617,14 +617,14 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2PACKEDX YSCALEYUV2PACKEDX
YSCALEYUV2RGBX YSCALEYUV2RGBX
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"lea (%%"REG_a", %%"REG_a", 2), %%"REG_c" \n\t" //FIXME optimize "lea (%%"FF_REG_a", %%"FF_REG_a", 2), %%"FF_REG_c" \n\t" // FIXME optimize
"add %4, %%"REG_c" \n\t" "add %4, %%"FF_REG_c" \n\t"
WRITEBGR24(%%REGc, %5, %%REGa) WRITEBGR24(%%FF_REGc, %5, %%FF_REGa)
:: "r" (&c->redDither), :: "r" (&c->redDither),
"m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy), "m" (dummy),
"r" (dest), "m" (dstW_reg), "m"(uv_off) "r" (dest), "m" (dstW_reg), "m"(uv_off)
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S
); );
} }
...@@ -662,7 +662,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter, ...@@ -662,7 +662,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4 \n\t" "psraw $3, %%mm4 \n\t"
"psraw $3, %%mm1 \n\t" "psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t" "psraw $3, %%mm7 \n\t"
WRITEYUY2(%4, %5, %%REGa) WRITEYUY2(%4, %5, %%FF_REGa)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} }
...@@ -683,7 +683,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter, ...@@ -683,7 +683,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4 \n\t" "psraw $3, %%mm4 \n\t"
"psraw $3, %%mm1 \n\t" "psraw $3, %%mm1 \n\t"
"psraw $3, %%mm7 \n\t" "psraw $3, %%mm7 \n\t"
WRITEYUY2(%4, %5, %%REGa) WRITEYUY2(%4, %5, %%FF_REGa)
YSCALEYUV2PACKEDX_END YSCALEYUV2PACKEDX_END
} }
...@@ -794,37 +794,37 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2], ...@@ -794,37 +794,37 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
*(const uint16_t **)(&c->u_temp)=abuf0; *(const uint16_t **)(&c->u_temp)=abuf0;
*(const uint16_t **)(&c->v_temp)=abuf1; *(const uint16_t **)(&c->v_temp)=abuf1;
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5) YSCALEYUV2RGB(%%FF_REGBP, %5)
"push %0 \n\t" "push %0 \n\t"
"push %1 \n\t" "push %1 \n\t"
"mov "U_TEMP"(%5), %0 \n\t" "mov "U_TEMP"(%5), %0 \n\t"
"mov "V_TEMP"(%5), %1 \n\t" "mov "V_TEMP"(%5), %1 \n\t"
YSCALEYUV2RGB_YA(%%REGBP, %5, %0, %1) YSCALEYUV2RGB_YA(%%FF_REGBP, %5, %0, %1)
"psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ "psraw $3, %%mm1 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/ "psraw $3, %%mm7 \n\t" /* abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1 \n\t" "packuswb %%mm7, %%mm1 \n\t"
"pop %1 \n\t" "pop %1 \n\t"
"pop %0 \n\t" "pop %0 \n\t"
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6) WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm1, %%mm0, %%mm7, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
#endif #endif
} else { } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5) YSCALEYUV2RGB(%%FF_REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -841,14 +841,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2], ...@@ -841,14 +841,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5) YSCALEYUV2RGB(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
WRITEBGR24(%%REGb, 8280(%5), %%REGBP) WRITEBGR24(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -864,10 +864,10 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], ...@@ -864,10 +864,10 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5) YSCALEYUV2RGB(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
...@@ -875,9 +875,9 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2], ...@@ -875,9 +875,9 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif #endif
WRITERGB15(%%REGb, 8280(%5), %%REGBP) WRITERGB15(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -893,10 +893,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], ...@@ -893,10 +893,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB(%%REGBP, %5) YSCALEYUV2RGB(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
...@@ -904,9 +904,9 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2], ...@@ -904,9 +904,9 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif #endif
WRITERGB16(%%REGb, 8280(%5), %%REGBP) WRITERGB16(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -962,13 +962,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2], ...@@ -962,13 +962,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :( //Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2PACKED(%%REGBP, %5) YSCALEYUV2PACKED(%%FF_REGBP, %5)
WRITEYUY2(%%REGb, 8280(%5), %%REGBP) WRITEYUY2(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -1104,27 +1104,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, ...@@ -1104,27 +1104,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
const int16_t *ubuf1 = ubuf[0]; const int16_t *ubuf1 = ubuf[0];
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5) YSCALEYUV2RGB1(%%FF_REGBP, %5)
YSCALEYUV2RGB1_ALPHA(%%REGBP) YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
} else { } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5) YSCALEYUV2RGB1(%%FF_REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -1133,27 +1133,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0, ...@@ -1133,27 +1133,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
const int16_t *ubuf1 = ubuf[1]; const int16_t *ubuf1 = ubuf[1];
if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) { if (CONFIG_SWSCALE_ALPHA && c->alpPixBuf) {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5) YSCALEYUV2RGB1b(%%FF_REGBP, %5)
YSCALEYUV2RGB1_ALPHA(%%REGBP) YSCALEYUV2RGB1_ALPHA(%%FF_REGBP)
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (abuf0), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
} else { } else {
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5) YSCALEYUV2RGB1b(%%FF_REGBP, %5)
"pcmpeqd %%mm7, %%mm7 \n\t" "pcmpeqd %%mm7, %%mm7 \n\t"
WRITEBGR32(%%REGb, 8280(%5), %%REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6) WRITEBGR32(%%FF_REGb, 8280(%5), %%FF_REGBP, %%mm2, %%mm4, %%mm5, %%mm7, %%mm0, %%mm1, %%mm3, %%mm6)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -1172,28 +1172,28 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0, ...@@ -1172,28 +1172,28 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const int16_t *ubuf1 = ubuf[0]; const int16_t *ubuf1 = ubuf[0];
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5) YSCALEYUV2RGB1(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
WRITEBGR24(%%REGb, 8280(%5), %%REGBP) WRITEBGR24(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
} else { } else {
const int16_t *ubuf1 = ubuf[1]; const int16_t *ubuf1 = ubuf[1];
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5) YSCALEYUV2RGB1b(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
WRITEBGR24(%%REGb, 8280(%5), %%REGBP) WRITEBGR24(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -1211,10 +1211,10 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, ...@@ -1211,10 +1211,10 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const int16_t *ubuf1 = ubuf[0]; const int16_t *ubuf1 = ubuf[0];
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5) YSCALEYUV2RGB1(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
...@@ -1222,19 +1222,19 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, ...@@ -1222,19 +1222,19 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif #endif
WRITERGB15(%%REGb, 8280(%5), %%REGBP) WRITERGB15(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
} else { } else {
const int16_t *ubuf1 = ubuf[1]; const int16_t *ubuf1 = ubuf[1];
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5) YSCALEYUV2RGB1b(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
...@@ -1242,9 +1242,9 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0, ...@@ -1242,9 +1242,9 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif #endif
WRITERGB15(%%REGb, 8280(%5), %%REGBP) WRITERGB15(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -1262,10 +1262,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, ...@@ -1262,10 +1262,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const int16_t *ubuf1 = ubuf[0]; const int16_t *ubuf1 = ubuf[0];
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1(%%REGBP, %5) YSCALEYUV2RGB1(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
...@@ -1273,19 +1273,19 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, ...@@ -1273,19 +1273,19 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif #endif
WRITERGB16(%%REGb, 8280(%5), %%REGBP) WRITERGB16(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
} else { } else {
const int16_t *ubuf1 = ubuf[1]; const int16_t *ubuf1 = ubuf[1];
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2RGB1b(%%REGBP, %5) YSCALEYUV2RGB1b(%%FF_REGBP, %5)
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP #ifdef DITHER1XBPP
...@@ -1293,9 +1293,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0, ...@@ -1293,9 +1293,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "GREEN_DITHER"(%5), %%mm4 \n\t" "paddusb "GREEN_DITHER"(%5), %%mm4 \n\t"
"paddusb "RED_DITHER"(%5), %%mm5 \n\t" "paddusb "RED_DITHER"(%5), %%mm5 \n\t"
#endif #endif
WRITERGB16(%%REGb, 8280(%5), %%REGBP) WRITERGB16(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -1350,26 +1350,26 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0, ...@@ -1350,26 +1350,26 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster if (uvalpha < 2048) { // note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const int16_t *ubuf1 = ubuf[0]; const int16_t *ubuf1 = ubuf[0];
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2PACKED1(%%REGBP, %5) YSCALEYUV2PACKED1(%%FF_REGBP, %5)
WRITEYUY2(%%REGb, 8280(%5), %%REGBP) WRITEYUY2(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
} else { } else {
const int16_t *ubuf1 = ubuf[1]; const int16_t *ubuf1 = ubuf[1];
__asm__ volatile( __asm__ volatile(
"mov %%"REG_b", "ESP_OFFSET"(%5) \n\t" "mov %%"FF_REG_b", "ESP_OFFSET"(%5) \n\t"
"mov %4, %%"REG_b" \n\t" "mov %4, %%"FF_REG_b" \n\t"
"push %%"REG_BP" \n\t" "push %%"FF_REG_BP" \n\t"
YSCALEYUV2PACKED1b(%%REGBP, %5) YSCALEYUV2PACKED1b(%%FF_REGBP, %5)
WRITEYUY2(%%REGb, 8280(%5), %%REGBP) WRITEYUY2(%%FF_REGb, 8280(%5), %%FF_REGBP)
"pop %%"REG_BP" \n\t" "pop %%"FF_REG_BP" \n\t"
"mov "ESP_OFFSET"(%5), %%"REG_b" \n\t" "mov "ESP_OFFSET"(%5), %%"FF_REG_b" \n\t"
:: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest), :: "c" (buf0), "d" (buf1), "S" (ubuf0), "D" (ubuf1), "m" (dest),
"a" (&c->redDither) "a" (&c->redDither)
); );
...@@ -1394,43 +1394,43 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, ...@@ -1394,43 +1394,43 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
__asm__ volatile( __asm__ volatile(
#if defined(PIC) #if defined(PIC)
"mov %%"REG_b", %5 \n\t" "mov %%"FF_REG_b", %5 \n\t"
#if ARCH_X86_64 #if ARCH_X86_64
"mov -8(%%rsp), %%"REG_a" \n\t" "mov -8(%%rsp), %%"FF_REG_a" \n\t"
"mov %%"REG_a", %6 \n\t" "mov %%"FF_REG_a", %6 \n\t"
#endif #endif
#else #else
#if ARCH_X86_64 #if ARCH_X86_64
"mov -8(%%rsp), %%"REG_a" \n\t" "mov -8(%%rsp), %%"FF_REG_a" \n\t"
"mov %%"REG_a", %5 \n\t" "mov %%"FF_REG_a", %5 \n\t"
#endif #endif
#endif #endif
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"mov %0, %%"REG_c" \n\t" "mov %0, %%"FF_REG_c" \n\t"
"mov %1, %%"REG_D" \n\t" "mov %1, %%"FF_REG_D" \n\t"
"mov %2, %%"REG_d" \n\t" "mov %2, %%"FF_REG_d" \n\t"
"mov %3, %%"REG_b" \n\t" "mov %3, %%"FF_REG_b" \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
PREFETCH" (%%"REG_c") \n\t" PREFETCH" (%%"FF_REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t" PREFETCH" 32(%%"FF_REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t" PREFETCH" 64(%%"FF_REG_c") \n\t"
#if ARCH_X86_64 #if ARCH_X86_64
#define CALL_MMXEXT_FILTER_CODE \ #define CALL_MMXEXT_FILTER_CODE \
"movl (%%"REG_b"), %%esi \n\t"\ "movl (%%"FF_REG_b"), %%esi \n\t"\
"call *%4 \n\t"\ "call *%4 \n\t"\
"movl (%%"REG_b", %%"REG_a"), %%esi \n\t"\ "movl (%%"FF_REG_b", %%"FF_REG_a"), %%esi \n\t"\
"add %%"REG_S", %%"REG_c" \n\t"\ "add %%"FF_REG_S", %%"FF_REG_c" \n\t"\
"add %%"REG_a", %%"REG_D" \n\t"\ "add %%"FF_REG_a", %%"FF_REG_D" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
#else #else
#define CALL_MMXEXT_FILTER_CODE \ #define CALL_MMXEXT_FILTER_CODE \
"movl (%%"REG_b"), %%esi \n\t"\ "movl (%%"FF_REG_b"), %%esi \n\t"\
"call *%4 \n\t"\ "call *%4 \n\t"\
"addl (%%"REG_b", %%"REG_a"), %%"REG_c" \n\t"\ "addl (%%"FF_REG_b", %%"FF_REG_a"), %%"FF_REG_c" \n\t"\
"add %%"REG_a", %%"REG_D" \n\t"\ "add %%"FF_REG_a", %%"FF_REG_D" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\ "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"\
#endif /* ARCH_X86_64 */ #endif /* ARCH_X86_64 */
...@@ -1444,15 +1444,15 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, ...@@ -1444,15 +1444,15 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE
#if defined(PIC) #if defined(PIC)
"mov %5, %%"REG_b" \n\t" "mov %5, %%"FF_REG_b" \n\t"
#if ARCH_X86_64 #if ARCH_X86_64
"mov %6, %%"REG_a" \n\t" "mov %6, %%"FF_REG_a" \n\t"
"mov %%"REG_a", -8(%%rsp) \n\t" "mov %%"FF_REG_a", -8(%%rsp) \n\t"
#endif #endif
#else #else
#if ARCH_X86_64 #if ARCH_X86_64
"mov %5, %%"REG_a" \n\t" "mov %5, %%"FF_REG_a" \n\t"
"mov %%"REG_a", -8(%%rsp) \n\t" "mov %%"FF_REG_a", -8(%%rsp) \n\t"
#endif #endif
#endif #endif
:: "m" (src), "m" (dst), "m" (filter), "m" (filterPos), :: "m" (src), "m" (dst), "m" (filter), "m" (filterPos),
...@@ -1463,9 +1463,9 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst, ...@@ -1463,9 +1463,9 @@ static void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
#if ARCH_X86_64 #if ARCH_X86_64
,"m"(retsave) ,"m"(retsave)
#endif #endif
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D
#if !defined(PIC) #if !defined(PIC)
,"%"REG_b ,"%"FF_REG_b
#endif #endif
); );
...@@ -1490,37 +1490,37 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, ...@@ -1490,37 +1490,37 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
__asm__ volatile( __asm__ volatile(
#if defined(PIC) #if defined(PIC)
"mov %%"REG_b", %7 \n\t" "mov %%"FF_REG_b", %7 \n\t"
#if ARCH_X86_64 #if ARCH_X86_64
"mov -8(%%rsp), %%"REG_a" \n\t" "mov -8(%%rsp), %%"FF_REG_a" \n\t"
"mov %%"REG_a", %8 \n\t" "mov %%"FF_REG_a", %8 \n\t"
#endif #endif
#else #else
#if ARCH_X86_64 #if ARCH_X86_64
"mov -8(%%rsp), %%"REG_a" \n\t" "mov -8(%%rsp), %%"FF_REG_a" \n\t"
"mov %%"REG_a", %7 \n\t" "mov %%"FF_REG_a", %7 \n\t"
#endif #endif
#endif #endif
"pxor %%mm7, %%mm7 \n\t" "pxor %%mm7, %%mm7 \n\t"
"mov %0, %%"REG_c" \n\t" "mov %0, %%"FF_REG_c" \n\t"
"mov %1, %%"REG_D" \n\t" "mov %1, %%"FF_REG_D" \n\t"
"mov %2, %%"REG_d" \n\t" "mov %2, %%"FF_REG_d" \n\t"
"mov %3, %%"REG_b" \n\t" "mov %3, %%"FF_REG_b" \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
PREFETCH" (%%"REG_c") \n\t" PREFETCH" (%%"FF_REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t" PREFETCH" 32(%%"FF_REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t" PREFETCH" 64(%%"FF_REG_c") \n\t"
CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE
"xor %%"REG_a", %%"REG_a" \n\t" // i "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" // i
"mov %5, %%"REG_c" \n\t" // src "mov %5, %%"FF_REG_c" \n\t" // src
"mov %6, %%"REG_D" \n\t" // buf2 "mov %6, %%"FF_REG_D" \n\t" // buf2
PREFETCH" (%%"REG_c") \n\t" PREFETCH" (%%"FF_REG_c") \n\t"
PREFETCH" 32(%%"REG_c") \n\t" PREFETCH" 32(%%"FF_REG_c") \n\t"
PREFETCH" 64(%%"REG_c") \n\t" PREFETCH" 64(%%"FF_REG_c") \n\t"
CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE
...@@ -1528,15 +1528,15 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, ...@@ -1528,15 +1528,15 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
CALL_MMXEXT_FILTER_CODE CALL_MMXEXT_FILTER_CODE
#if defined(PIC) #if defined(PIC)
"mov %7, %%"REG_b" \n\t" "mov %7, %%"FF_REG_b" \n\t"
#if ARCH_X86_64 #if ARCH_X86_64
"mov %8, %%"REG_a" \n\t" "mov %8, %%"FF_REG_a" \n\t"
"mov %%"REG_a", -8(%%rsp) \n\t" "mov %%"FF_REG_a", -8(%%rsp) \n\t"
#endif #endif
#else #else
#if ARCH_X86_64 #if ARCH_X86_64
"mov %7, %%"REG_a" \n\t" "mov %7, %%"FF_REG_a" \n\t"
"mov %%"REG_a", -8(%%rsp) \n\t" "mov %%"FF_REG_a", -8(%%rsp) \n\t"
#endif #endif
#endif #endif
:: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos), :: "m" (src1), "m" (dst1), "m" (filter), "m" (filterPos),
...@@ -1547,9 +1547,9 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2, ...@@ -1547,9 +1547,9 @@ static void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst1, int16_t *dst2,
#if ARCH_X86_64 #if ARCH_X86_64
,"m"(retsave) ,"m"(retsave)
#endif #endif
: "%"REG_a, "%"REG_c, "%"REG_d, "%"REG_S, "%"REG_D : "%"FF_REG_a, "%"FF_REG_c, "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_D
#if !defined(PIC) #if !defined(PIC)
,"%"REG_b ,"%"FF_REG_b
#endif #endif
); );
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment