Commit 9eb3da2f authored by Matthieu Bouron's avatar Matthieu Bouron

asm: FF_-prefix internal macros used in inline assembly

See merge commit '39d6d361'.
parent 39d6d361
...@@ -45,7 +45,7 @@ ...@@ -45,7 +45,7 @@
#define END_CHECK(end) "" #define END_CHECK(end) ""
#else #else
#define END_CHECK(end) \ #define END_CHECK(end) \
"cmp "end" , %%"REG_c" \n\t"\ "cmp "end" , %%"FF_REG_c" \n\t"\
"jge 1f \n\t" "jge 1f \n\t"
#endif #endif
...@@ -92,11 +92,11 @@ ...@@ -92,11 +92,11 @@
"mov "tmpbyte" , "statep" \n\t"\ "mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\ "test "lowword" , "lowword" \n\t"\
"jnz 2f \n\t"\ "jnz 2f \n\t"\
"mov "byte" , %%"REG_c" \n\t"\ "mov "byte" , %%"FF_REG_c" \n\t"\
END_CHECK(end)\ END_CHECK(end)\
"add"OPSIZE" $2 , "byte" \n\t"\ "add"FF_OPSIZE" $2 , "byte" \n\t"\
"1: \n\t"\ "1: \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\ "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
"lea -1("low") , %%ecx \n\t"\ "lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\ "xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\
...@@ -153,11 +153,11 @@ ...@@ -153,11 +153,11 @@
"mov "tmpbyte" , "statep" \n\t"\ "mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\ "test "lowword" , "lowword" \n\t"\
" jnz 2f \n\t"\ " jnz 2f \n\t"\
"mov "byte" , %%"REG_c" \n\t"\ "mov "byte" , %%"FF_REG_c" \n\t"\
END_CHECK(end)\ END_CHECK(end)\
"add"OPSIZE" $2 , "byte" \n\t"\ "add"FF_OPSIZE" $2 , "byte" \n\t"\
"1: \n\t"\ "1: \n\t"\
"movzwl (%%"REG_c") , "tmp" \n\t"\ "movzwl (%%"FF_REG_c") , "tmp" \n\t"\
"lea -1("low") , %%ecx \n\t"\ "lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\ "xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\ "shr $15 , %%ecx \n\t"\
...@@ -203,7 +203,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c, ...@@ -203,7 +203,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
"i"(offsetof(CABACContext, bytestream_end)) "i"(offsetof(CABACContext, bytestream_end))
TABLES_ARG TABLES_ARG
,"1"(c->low), "2"(c->range) ,"1"(c->low), "2"(c->range)
: "%"REG_c, "memory" : "%"FF_REG_c, "memory"
); );
return bit & 1; return bit & 1;
} }
...@@ -240,7 +240,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val) ...@@ -240,7 +240,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
"addl %%edx, %%eax \n\t" "addl %%edx, %%eax \n\t"
"cmp %c5(%2), %1 \n\t" "cmp %c5(%2), %1 \n\t"
"jge 1f \n\t" "jge 1f \n\t"
"add"OPSIZE" $2, %c4(%2) \n\t" "add"FF_OPSIZE" $2, %c4(%2) \n\t"
#endif #endif
"1: \n\t" "1: \n\t"
"movl %%eax, %c3(%2) \n\t" "movl %%eax, %c3(%2) \n\t"
...@@ -281,7 +281,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c) ...@@ -281,7 +281,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
"addl %%ecx, %%eax \n\t" "addl %%ecx, %%eax \n\t"
"cmp %c5(%2), %1 \n\t" "cmp %c5(%2), %1 \n\t"
"jge 1f \n\t" "jge 1f \n\t"
"add"OPSIZE" $2, %c4(%2) \n\t" "add"FF_OPSIZE" $2, %c4(%2) \n\t"
"1: \n\t" "1: \n\t"
"movl %%eax, %c3(%2) \n\t" "movl %%eax, %c3(%2) \n\t"
......
...@@ -91,13 +91,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -91,13 +91,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"sub %10, %1 \n\t" "sub %10, %1 \n\t"
"mov %2, %0 \n\t" "mov %2, %0 \n\t"
"movl %7, %%ecx \n\t" "movl %7, %%ecx \n\t"
"add %1, %%"REG_c" \n\t" "add %1, %%"FF_REG_c" \n\t"
"movl %%ecx, (%0) \n\t" "movl %%ecx, (%0) \n\t"
"test $1, %4 \n\t" "test $1, %4 \n\t"
" jnz 5f \n\t" " jnz 5f \n\t"
"add"OPSIZE" $4, %2 \n\t" "add"FF_OPSIZE" $4, %2 \n\t"
"4: \n\t" "4: \n\t"
"add $1, %1 \n\t" "add $1, %1 \n\t"
...@@ -105,7 +105,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -105,7 +105,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
" jb 3b \n\t" " jb 3b \n\t"
"mov %2, %0 \n\t" "mov %2, %0 \n\t"
"movl %7, %%ecx \n\t" "movl %7, %%ecx \n\t"
"add %1, %%"REG_c" \n\t" "add %1, %%"FF_REG_c" \n\t"
"movl %%ecx, (%0) \n\t" "movl %%ecx, (%0) \n\t"
"5: \n\t" "5: \n\t"
"add %9, %k0 \n\t" "add %9, %k0 \n\t"
...@@ -116,7 +116,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff, ...@@ -116,7 +116,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end)) "i"(offsetof(CABACContext, bytestream_end))
TABLES_ARG TABLES_ARG
: "%"REG_c, "memory" : "%"FF_REG_c, "memory"
); );
return coeff_count; return coeff_count;
} }
...@@ -183,7 +183,7 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -183,7 +183,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"test $1, %4 \n\t" "test $1, %4 \n\t"
" jnz 5f \n\t" " jnz 5f \n\t"
"add"OPSIZE" $4, %2 \n\t" "add"FF_OPSIZE" $4, %2 \n\t"
"4: \n\t" "4: \n\t"
"add $1, %6 \n\t" "add $1, %6 \n\t"
...@@ -202,7 +202,7 @@ static int decode_significance_8x8_x86(CABACContext *c, ...@@ -202,7 +202,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"i"(offsetof(CABACContext, bytestream)), "i"(offsetof(CABACContext, bytestream)),
"i"(offsetof(CABACContext, bytestream_end)), "i"(offsetof(CABACContext, bytestream_end)),
"i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG "i"(H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET) TABLES_ARG
: "%"REG_c, "memory" : "%"FF_REG_c, "memory"
); );
return coeff_count; return coeff_count;
} }
......
...@@ -32,7 +32,7 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels ...@@ -32,7 +32,7 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -42,8 +42,8 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels ...@@ -42,8 +42,8 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t" "movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t" "movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t" "movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t" "movq (%1, %3), %%mm2 \n\t"
...@@ -51,20 +51,20 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels ...@@ -51,20 +51,20 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, (%2) \n\t" "movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t" "movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
...@@ -81,8 +81,8 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel ...@@ -81,8 +81,8 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, 8(%2) \n\t" "movq %%mm4, 8(%2) \n\t"
"movq %%mm5, 8(%2, %3) \n\t" "movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
"movq 1(%1), %%mm1 \n\t" "movq 1(%1), %%mm1 \n\t"
"movq (%1, %3), %%mm2 \n\t" "movq (%1, %3), %%mm2 \n\t"
...@@ -97,42 +97,42 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel ...@@ -97,42 +97,42 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel
PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5) PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
"movq %%mm4, 8(%2) \n\t" "movq %%mm4, 8(%2) \n\t"
"movq %%mm5, 8(%2, %3) \n\t" "movq %%mm5, 8(%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) av_unused static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm2 \n\t" "movq (%1, %%"FF_REG_a"),%%mm2\n\t"
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
"movq %%mm4, (%2) \n\t" "movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t" "movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"),%%mm0 \n\t" "movq (%1, %%"FF_REG_a"),%%mm0\n\t"
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
"movq %%mm4, (%2) \n\t" "movq %%mm4, (%2) \n\t"
"movq %%mm5, (%2, %3) \n\t" "movq %%mm5, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) av_unused static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
...@@ -166,12 +166,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels ...@@ -166,12 +166,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
{ {
MOVQ_BFE(mm6); MOVQ_BFE(mm6);
__asm__ volatile( __asm__ volatile(
"lea (%3, %3), %%"REG_a" \n\t" "lea (%3, %3), %%"FF_REG_a" \n\t"
"movq (%1), %%mm0 \n\t" "movq (%1), %%mm0 \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %%"FF_REG_a"), %%mm2 \n\t"
PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5) PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
"movq (%2), %%mm3 \n\t" "movq (%2), %%mm3 \n\t"
PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6) PAVGB_MMX(%%mm3, %%mm4, %%mm0, %%mm6)
...@@ -179,11 +179,11 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels ...@@ -179,11 +179,11 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
"movq %%mm0, (%2) \n\t" "movq %%mm0, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t" "movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"movq (%1, %3), %%mm1 \n\t" "movq (%1, %3), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5) PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
"movq (%2), %%mm3 \n\t" "movq (%2), %%mm3 \n\t"
PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6) PAVGB_MMX(%%mm3, %%mm4, %%mm2, %%mm6)
...@@ -191,12 +191,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels ...@@ -191,12 +191,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6) PAVGB_MMX(%%mm3, %%mm5, %%mm1, %%mm6)
"movq %%mm2, (%2) \n\t" "movq %%mm2, (%2) \n\t"
"movq %%mm1, (%2, %3) \n\t" "movq %%mm1, (%2, %3) \n\t"
"add %%"REG_a", %1 \n\t" "add %%"FF_REG_a", %1 \n\t"
"add %%"REG_a", %2 \n\t" "add %%"FF_REG_a", %2 \n\t"
"subl $4, %0 \n\t" "subl $4, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels), "+D"(block) :"+g"(h), "+S"(pixels), "+D"(block)
:"r"((x86_reg)line_size) :"r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
...@@ -283,15 +283,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -283,15 +283,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
__asm__ volatile ( __asm__ volatile (
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"FF_REG_a"), %%mm2 \n\t"
"movq (%2, %%"REG_a"), %%mm4 \n\t" "movq (%2, %%"FF_REG_a"), %%mm4 \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"psubusb %%mm0, %%mm2 \n\t" "psubusb %%mm0, %%mm2 \n\t"
"psubusb %%mm4, %%mm0 \n\t" "psubusb %%mm4, %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t" "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t" "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
"movq (%2, %%"REG_a"), %%mm5 \n\t" "movq (%2, %%"FF_REG_a"), %%mm5 \n\t"
"psubusb %%mm1, %%mm3 \n\t" "psubusb %%mm1, %%mm3 \n\t"
"psubusb %%mm5, %%mm1 \n\t" "psubusb %%mm5, %%mm1 \n\t"
"por %%mm2, %%mm0 \n\t" "por %%mm2, %%mm0 \n\t"
...@@ -306,7 +306,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -306,7 +306,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm2 \n\t" "paddw %%mm3, %%mm2 \n\t"
"paddw %%mm2, %%mm0 \n\t" "paddw %%mm2, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t" "paddw %%mm0, %%mm6 \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1 - len), "r" (blk2 - len), "r" (stride)); : "r" (blk1 - len), "r" (blk2 - len), "r" (stride));
...@@ -319,18 +319,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, ...@@ -319,18 +319,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
__asm__ volatile ( __asm__ volatile (
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
"movq (%2, %%"REG_a"), %%mm1 \n\t" "movq (%2, %%"FF_REG_a"), %%mm1 \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" "movq (%1, %%"FF_REG_a"), %%mm2 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t" "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
"punpcklbw %%mm7, %%mm1 \n\t" "punpcklbw %%mm7, %%mm1 \n\t"
"punpckhbw %%mm7, %%mm2 \n\t" "punpckhbw %%mm7, %%mm2 \n\t"
"punpckhbw %%mm7, %%mm3 \n\t" "punpckhbw %%mm7, %%mm3 \n\t"
"paddw %%mm0, %%mm1 \n\t" "paddw %%mm0, %%mm1 \n\t"
"paddw %%mm2, %%mm3 \n\t" "paddw %%mm2, %%mm3 \n\t"
"movq (%3, %%"REG_a"), %%mm4 \n\t" "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
"movq (%3, %%"REG_a"), %%mm2 \n\t" "movq (%3, %%"FF_REG_a"), %%mm2 \n\t"
"paddw %%mm5, %%mm1 \n\t" "paddw %%mm5, %%mm1 \n\t"
"paddw %%mm5, %%mm3 \n\t" "paddw %%mm5, %%mm3 \n\t"
"psrlw $1, %%mm1 \n\t" "psrlw $1, %%mm1 \n\t"
...@@ -344,7 +344,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, ...@@ -344,7 +344,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
"punpckhbw %%mm7, %%mm1 \n\t" "punpckhbw %%mm7, %%mm1 \n\t"
"paddw %%mm1, %%mm0 \n\t" "paddw %%mm1, %%mm0 \n\t"
"paddw %%mm0, %%mm6 \n\t" "paddw %%mm0, %%mm6 \n\t"
"add %4, %%"REG_a" \n\t" "add %4, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1a - len), "r" (blk1b - len), "r" (blk2 - len), : "r" (blk1a - len), "r" (blk1b - len), "r" (blk2 - len),
...@@ -356,8 +356,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -356,8 +356,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
{ {
x86_reg len = -stride * h; x86_reg len = -stride * h;
__asm__ volatile ( __asm__ volatile (
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0\n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm2\n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
...@@ -368,8 +368,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -368,8 +368,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm1 \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%2, %%"REG_a"), %%mm2 \n\t" "movq (%2, %%"FF_REG_a"), %%mm2\n\t"
"movq 1(%2, %%"REG_a"), %%mm4 \n\t" "movq 1(%2, %%"FF_REG_a"), %%mm4\n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t" "movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -383,8 +383,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -383,8 +383,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm1 \n\t" "paddw %%mm3, %%mm1 \n\t"
"paddw %%mm5, %%mm0 \n\t" "paddw %%mm5, %%mm0 \n\t"
"paddw %%mm5, %%mm1 \n\t" "paddw %%mm5, %%mm1 \n\t"
"movq (%3, %%"REG_a"), %%mm4 \n\t" "movq (%3, %%"FF_REG_a"), %%mm4 \n\t"
"movq (%3, %%"REG_a"), %%mm5 \n\t" "movq (%3, %%"FF_REG_a"), %%mm5 \n\t"
"psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t" "psrlw $2, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t" "packuswb %%mm1, %%mm0 \n\t"
...@@ -398,7 +398,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, ...@@ -398,7 +398,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm4, %%mm6 \n\t" "paddw %%mm4, %%mm6 \n\t"
"movq %%mm2, %%mm0 \n\t" "movq %%mm2, %%mm0 \n\t"
"movq %%mm3, %%mm1 \n\t" "movq %%mm3, %%mm1 \n\t"
"add %4, %%"REG_a" \n\t" "add %4, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
: "+a" (len) : "+a" (len)
: "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len),
......
This diff is collapsed.
...@@ -150,32 +150,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -150,32 +150,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){ if((s->out_format == FMT_H263 || s->out_format == FMT_H261) && s->mpeg_quant==0){
__asm__ volatile( __asm__ volatile(
"movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1
SPREADW(MM"3") SPREADW(MM"3")
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"7, "MM"7 \n\t" // 0
"pxor "MM"4, "MM"4 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0
MOVQ" (%2), "MM"5 \n\t" // qmat[0] MOVQ" (%2), "MM"5 \n\t" // qmat[0]
"pxor "MM"6, "MM"6 \n\t" "pxor "MM"6, "MM"6 \n\t"
"psubw (%3), "MM"6 \n\t" // -bias[0] "psubw (%3), "MM"6 \n\t" // -bias[0]
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i]
SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) SAVE_SIGN(MM"1", MM"0") // ABS(block[i])
"psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] "psubusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0]
"pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16 "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
"por "MM"0, "MM"4 \n\t" "por "MM"0, "MM"4 \n\t"
RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ" "MM"0, (%5, %%"REG_a") \n\t" MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t"
"pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00
MOVQ" (%4, %%"REG_a"), "MM"1 \n\t" MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t"
MOVQ" "MM"7, (%1, %%"REG_a") \n\t" // 0 MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0
"pandn "MM"1, "MM"0 \n\t" "pandn "MM"1, "MM"0 \n\t"
PMAXW(MM"0", MM"3") PMAXW(MM"0", MM"3")
"add $"MMREG_WIDTH", %%"REG_a" \n\t" "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
PMAX(MM"3", MM"0") PMAX(MM"3", MM"0")
"movd "MM"3, %%"REG_a" \n\t" "movd "MM"3, %%"FF_REG_a" \n\t"
"movzbl %%al, %%eax \n\t" // last_non_zero_p1 "movzbl %%al, %%eax \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1) : "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat), "r" (bias), : "r" (block+64), "r" (qmat), "r" (bias),
...@@ -185,31 +185,31 @@ static int RENAME(dct_quantize)(MpegEncContext *s, ...@@ -185,31 +185,31 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
); );
}else{ // FMT_H263 }else{ // FMT_H263
__asm__ volatile( __asm__ volatile(
"movd %%"REG_a", "MM"3 \n\t" // last_non_zero_p1 "movd %%"FF_REG_a", "MM"3 \n\t" // last_non_zero_p1
SPREADW(MM"3") SPREADW(MM"3")
"pxor "MM"7, "MM"7 \n\t" // 0 "pxor "MM"7, "MM"7 \n\t" // 0
"pxor "MM"4, "MM"4 \n\t" // 0 "pxor "MM"4, "MM"4 \n\t" // 0
"mov $-128, %%"REG_a" \n\t" "mov $-128, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
MOVQ" (%1, %%"REG_a"), "MM"0 \n\t" // block[i] MOVQ" (%1, %%"FF_REG_a"), "MM"0 \n\t" // block[i]
SAVE_SIGN(MM"1", MM"0") // ABS(block[i]) SAVE_SIGN(MM"1", MM"0") // ABS(block[i])
MOVQ" (%3, %%"REG_a"), "MM"6 \n\t" // bias[0] MOVQ" (%3, %%"FF_REG_a"), "MM"6 \n\t" // bias[0]
"paddusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0] "paddusw "MM"6, "MM"0 \n\t" // ABS(block[i]) + bias[0]
MOVQ" (%2, %%"REG_a"), "MM"5 \n\t" // qmat[i] MOVQ" (%2, %%"FF_REG_a"), "MM"5 \n\t" // qmat[i]
"pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16 "pmulhw "MM"5, "MM"0 \n\t" // (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
"por "MM"0, "MM"4 \n\t" "por "MM"0, "MM"4 \n\t"
RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i]) RESTORE_SIGN(MM"1", MM"0") // out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ" "MM"0, (%5, %%"REG_a") \n\t" MOVQ" "MM"0, (%5, %%"FF_REG_a") \n\t"
"pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00 "pcmpeqw "MM"7, "MM"0 \n\t" // out==0 ? 0xFF : 0x00
MOVQ" (%4, %%"REG_a"), "MM"1 \n\t" MOVQ" (%4, %%"FF_REG_a"), "MM"1 \n\t"
MOVQ" "MM"7, (%1, %%"REG_a") \n\t" // 0 MOVQ" "MM"7, (%1, %%"FF_REG_a") \n\t" // 0
"pandn "MM"1, "MM"0 \n\t" "pandn "MM"1, "MM"0 \n\t"
PMAXW(MM"0", MM"3") PMAXW(MM"0", MM"3")
"add $"MMREG_WIDTH", %%"REG_a" \n\t" "add $"MMREG_WIDTH", %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
PMAX(MM"3", MM"0") PMAX(MM"3", MM"0")
"movd "MM"3, %%"REG_a" \n\t" "movd "MM"3, %%"FF_REG_a" \n\t"
"movzbl %%al, %%eax \n\t" // last_non_zero_p1 "movzbl %%al, %%eax \n\t" // last_non_zero_p1
: "+a" (last_non_zero_p1) : "+a" (last_non_zero_p1)
: "r" (block+64), "r" (qmat+64), "r" (bias+64), : "r" (block+64), "r" (qmat+64), "r" (bias+64),
......
...@@ -46,12 +46,12 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel ...@@ -46,12 +46,12 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"punpckhbw %%mm7, %%mm5 \n\t" "punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t" "paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
...@@ -67,11 +67,11 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel ...@@ -67,11 +67,11 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm5 \n\t" "psrlw $2, %%mm5 \n\t"
"packuswb %%mm5, %%mm4 \n\t" "packuswb %%mm5, %%mm4 \n\t"
"movq %%mm4, (%2, %%"REG_a") \n\t" "movq %%mm4, (%2, %%"FF_REG_a") \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
"movq 1(%1, %%"REG_a"), %%mm4 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm4 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t" "movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -87,14 +87,14 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel ...@@ -87,14 +87,14 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t" "psrlw $2, %%mm1 \n\t"
"packuswb %%mm1, %%mm0 \n\t" "packuswb %%mm1, %%mm0 \n\t"
"movq %%mm0, (%2, %%"REG_a") \n\t" "movq %%mm0, (%2, %%"FF_REG_a") \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"subl $2, %0 \n\t" "subl $2, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels) :"+g"(h), "+S"(pixels)
:"D"(block), "r"((x86_reg)line_size) :"D"(block), "r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
// avg_pixels // avg_pixels
...@@ -115,12 +115,12 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel ...@@ -115,12 +115,12 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"punpckhbw %%mm7, %%mm5 \n\t" "punpckhbw %%mm7, %%mm5 \n\t"
"paddusw %%mm0, %%mm4 \n\t" "paddusw %%mm0, %%mm4 \n\t"
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"xor %%"REG_a", %%"REG_a" \n\t" "xor %%"FF_REG_a", %%"FF_REG_a" \n\t"
"add %3, %1 \n\t" "add %3, %1 \n\t"
".p2align 3 \n\t" ".p2align 3 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm0 \n\t" "movq (%1, %%"FF_REG_a"), %%mm0 \n\t"
"movq 1(%1, %%"REG_a"), %%mm2 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm2 \n\t"
"movq %%mm0, %%mm1 \n\t" "movq %%mm0, %%mm1 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm0 \n\t"
...@@ -135,16 +135,16 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel ...@@ -135,16 +135,16 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"paddusw %%mm1, %%mm5 \n\t" "paddusw %%mm1, %%mm5 \n\t"
"psrlw $2, %%mm4 \n\t" "psrlw $2, %%mm4 \n\t"
"psrlw $2, %%mm5 \n\t" "psrlw $2, %%mm5 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t" "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
"packuswb %%mm5, %%mm4 \n\t" "packuswb %%mm5, %%mm4 \n\t"
"pcmpeqd %%mm2, %%mm2 \n\t" "pcmpeqd %%mm2, %%mm2 \n\t"
"paddb %%mm2, %%mm2 \n\t" "paddb %%mm2, %%mm2 \n\t"
PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2) PAVGB_MMX(%%mm3, %%mm4, %%mm5, %%mm2)
"movq %%mm5, (%2, %%"REG_a") \n\t" "movq %%mm5, (%2, %%"FF_REG_a") \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"movq (%1, %%"REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3 "movq (%1, %%"FF_REG_a"), %%mm2 \n\t" // 0 <-> 2 1 <-> 3
"movq 1(%1, %%"REG_a"), %%mm4 \n\t" "movq 1(%1, %%"FF_REG_a"), %%mm4 \n\t"
"movq %%mm2, %%mm3 \n\t" "movq %%mm2, %%mm3 \n\t"
"movq %%mm4, %%mm5 \n\t" "movq %%mm4, %%mm5 \n\t"
"punpcklbw %%mm7, %%mm2 \n\t" "punpcklbw %%mm7, %%mm2 \n\t"
...@@ -159,17 +159,17 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel ...@@ -159,17 +159,17 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"paddusw %%mm5, %%mm1 \n\t" "paddusw %%mm5, %%mm1 \n\t"
"psrlw $2, %%mm0 \n\t" "psrlw $2, %%mm0 \n\t"
"psrlw $2, %%mm1 \n\t" "psrlw $2, %%mm1 \n\t"
"movq (%2, %%"REG_a"), %%mm3 \n\t" "movq (%2, %%"FF_REG_a"), %%mm3 \n\t"
"packuswb %%mm1, %%mm0 \n\t" "packuswb %%mm1, %%mm0 \n\t"
"pcmpeqd %%mm2, %%mm2 \n\t" "pcmpeqd %%mm2, %%mm2 \n\t"
"paddb %%mm2, %%mm2 \n\t" "paddb %%mm2, %%mm2 \n\t"
PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2) PAVGB_MMX(%%mm3, %%mm0, %%mm1, %%mm2)
"movq %%mm1, (%2, %%"REG_a") \n\t" "movq %%mm1, (%2, %%"FF_REG_a") \n\t"
"add %3, %%"REG_a" \n\t" "add %3, %%"FF_REG_a" \n\t"
"subl $2, %0 \n\t" "subl $2, %0 \n\t"
"jnz 1b \n\t" "jnz 1b \n\t"
:"+g"(h), "+S"(pixels) :"+g"(h), "+S"(pixels)
:"D"(block), "r"((x86_reg)line_size) :"D"(block), "r"((x86_reg)line_size)
:REG_a, "memory"); :FF_REG_a, "memory");
} }
This diff is collapsed.
...@@ -84,7 +84,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ ...@@ -84,7 +84,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
{\ {\
rnd = 8-rnd;\ rnd = 8-rnd;\
__asm__ volatile(\ __asm__ volatile(\
"mov $8, %%"REG_c" \n\t"\ "mov $8, %%"FF_REG_c" \n\t"\
LOAD_ROUNDER_MMX("%5")\ LOAD_ROUNDER_MMX("%5")\
"movq "MANGLE(ff_pw_9)", %%mm6\n\t"\ "movq "MANGLE(ff_pw_9)", %%mm6\n\t"\
"1: \n\t"\ "1: \n\t"\
...@@ -119,13 +119,13 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\ ...@@ -119,13 +119,13 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
"movq %%mm3, (%1) \n\t"\ "movq %%mm3, (%1) \n\t"\
"add %6, %0 \n\t"\ "add %6, %0 \n\t"\
"add %4, %1 \n\t"\ "add %4, %1 \n\t"\
"dec %%"REG_c" \n\t"\ "dec %%"FF_REG_c" \n\t"\
"jnz 1b \n\t"\ "jnz 1b \n\t"\
: "+r"(src), "+r"(dst)\ : "+r"(src), "+r"(dst)\
: "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\ : "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
"g"(stride-offset)\ "g"(stride-offset)\
NAMED_CONSTRAINTS_ADD(ff_pw_9)\ NAMED_CONSTRAINTS_ADD(ff_pw_9)\
: "%"REG_c, "memory"\ : "%"FF_REG_c, "memory"\
);\ );\
} }
......
...@@ -32,22 +32,22 @@ static void line_noise_mmx(uint8_t *dst, const uint8_t *src, ...@@ -32,22 +32,22 @@ static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
noise += shift; noise += shift;
__asm__ volatile( __asm__ volatile(
"mov %3, %%"REG_a" \n\t" "mov %3, %%"FF_REG_a" \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t" "pcmpeqb %%mm7, %%mm7 \n\t"
"psllw $15, %%mm7 \n\t" "psllw $15, %%mm7 \n\t"
"packsswb %%mm7, %%mm7 \n\t" "packsswb %%mm7, %%mm7 \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t" "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
"pxor %%mm7, %%mm0 \n\t" "pxor %%mm7, %%mm0 \n\t"
"paddsb %%mm1, %%mm0 \n\t" "paddsb %%mm1, %%mm0 \n\t"
"pxor %%mm7, %%mm0 \n\t" "pxor %%mm7, %%mm0 \n\t"
"movq %%mm0, (%2, %%"REG_a") \n\t" "movq %%mm0, (%2, %%"FF_REG_a") \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
: "%"REG_a : "%"FF_REG_a
); );
if (mmx_len != len) if (mmx_len != len)
ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
...@@ -60,13 +60,13 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, ...@@ -60,13 +60,13 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
x86_reg mmx_len = len & (~7); x86_reg mmx_len = len & (~7);
__asm__ volatile( __asm__ volatile(
"mov %5, %%"REG_a" \n\t" "mov %5, %%"FF_REG_a" \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t" "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
"paddb (%2, %%"REG_a"), %%mm1 \n\t" "paddb (%2, %%"FF_REG_a"), %%mm1\n\t"
"paddb (%3, %%"REG_a"), %%mm1 \n\t" "paddb (%3, %%"FF_REG_a"), %%mm1\n\t"
"movq %%mm0, %%mm2 \n\t" "movq %%mm0, %%mm2 \n\t"
"movq %%mm1, %%mm3 \n\t" "movq %%mm1, %%mm3 \n\t"
"punpcklbw %%mm0, %%mm0 \n\t" "punpcklbw %%mm0, %%mm0 \n\t"
...@@ -82,12 +82,12 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src, ...@@ -82,12 +82,12 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
"psrlw $8, %%mm1 \n\t" "psrlw $8, %%mm1 \n\t"
"psrlw $8, %%mm3 \n\t" "psrlw $8, %%mm3 \n\t"
"packuswb %%mm3, %%mm1 \n\t" "packuswb %%mm3, %%mm1 \n\t"
"movq %%mm1, (%4, %%"REG_a") \n\t" "movq %%mm1, (%4, %%"FF_REG_a") \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
:: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len), :: "r" (src+mmx_len), "r" (shift[0]+mmx_len), "r" (shift[1]+mmx_len), "r" (shift[2]+mmx_len),
"r" (dst+mmx_len), "g" (-mmx_len) "r" (dst+mmx_len), "g" (-mmx_len)
: "%"REG_a : "%"FF_REG_a
); );
if (mmx_len != len){ if (mmx_len != len){
...@@ -104,22 +104,22 @@ static void line_noise_mmxext(uint8_t *dst, const uint8_t *src, ...@@ -104,22 +104,22 @@ static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
noise += shift; noise += shift;
__asm__ volatile( __asm__ volatile(
"mov %3, %%"REG_a" \n\t" "mov %3, %%"FF_REG_a" \n\t"
"pcmpeqb %%mm7, %%mm7 \n\t" "pcmpeqb %%mm7, %%mm7 \n\t"
"psllw $15, %%mm7 \n\t" "psllw $15, %%mm7 \n\t"
"packsswb %%mm7, %%mm7 \n\t" "packsswb %%mm7, %%mm7 \n\t"
".p2align 4 \n\t" ".p2align 4 \n\t"
"1: \n\t" "1: \n\t"
"movq (%0, %%"REG_a"), %%mm0 \n\t" "movq (%0, %%"FF_REG_a"), %%mm0 \n\t"
"movq (%1, %%"REG_a"), %%mm1 \n\t" "movq (%1, %%"FF_REG_a"), %%mm1 \n\t"
"pxor %%mm7, %%mm0 \n\t" "pxor %%mm7, %%mm0 \n\t"
"paddsb %%mm1, %%mm0 \n\t" "paddsb %%mm1, %%mm0 \n\t"
"pxor %%mm7, %%mm0 \n\t" "pxor %%mm7, %%mm0 \n\t"
"movntq %%mm0, (%2, %%"REG_a") \n\t" "movntq %%mm0, (%2, %%"FF_REG_a") \n\t"
"add $8, %%"REG_a" \n\t" "add $8, %%"FF_REG_a" \n\t"
" js 1b \n\t" " js 1b \n\t"
:: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len) :: "r" (src+mmx_len), "r" (noise+mmx_len), "r" (dst+mmx_len), "g" (-mmx_len)
: "%"REG_a : "%"FF_REG_a
); );
if (mmx_len != len) if (mmx_len != len)
ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0); ff_line_noise_c(dst+mmx_len, src+mmx_len, noise+mmx_len, len-mmx_len, 0);
......
...@@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg; ...@@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg; typedef struct ymm_reg { uint64_t a, b, c, d; } ymm_reg;
#if ARCH_X86_64 #if ARCH_X86_64
# define OPSIZE "q" # define FF_OPSIZE "q"
# define REG_a "rax" # define FF_REG_a "rax"
# define REG_b "rbx" # define FF_REG_b "rbx"
# define REG_c "rcx" # define FF_REG_c "rcx"
# define REG_d "rdx" # define FF_REG_d "rdx"
# define REG_D "rdi" # define FF_REG_D "rdi"
# define REG_S "rsi" # define FF_REG_S "rsi"
# define PTR_SIZE "8" # define FF_PTR_SIZE "8"
typedef int64_t x86_reg; typedef int64_t x86_reg;
/* REG_SP is defined in Solaris sys headers, so use REG_sp */ /* FF_REG_SP is defined in Solaris sys headers, so use FF_REG_sp */
# define REG_sp "rsp" # define FF_REG_sp "rsp"
# define REG_BP "rbp" # define FF_REG_BP "rbp"
# define REGBP rbp # define FF_REGBP rbp
# define REGa rax # define FF_REGa rax
# define REGb rbx # define FF_REGb rbx
# define REGc rcx # define FF_REGc rcx
# define REGd rdx # define FF_REGd rdx
# define REGSP rsp # define FF_REGSP rsp
#elif ARCH_X86_32 #elif ARCH_X86_32
# define OPSIZE "l" # define FF_OPSIZE "l"
# define REG_a "eax" # define FF_REG_a "eax"
# define REG_b "ebx" # define FF_REG_b "ebx"
# define REG_c "ecx" # define FF_REG_c "ecx"
# define REG_d "edx" # define FF_REG_d "edx"
# define REG_D "edi" # define FF_REG_D "edi"
# define REG_S "esi" # define FF_REG_S "esi"
# define PTR_SIZE "4" # define FF_PTR_SIZE "4"
typedef int32_t x86_reg; typedef int32_t x86_reg;
# define REG_sp "esp" # define FF_REG_sp "esp"
# define REG_BP "ebp" # define FF_REG_BP "ebp"
# define REGBP ebp # define FF_REGBP ebp
# define REGa eax # define FF_REGa eax
# define REGb ebx # define FF_REGb ebx
# define REGc ecx # define FF_REGc ecx
# define REGd edx # define FF_REGd edx
# define REGSP esp # define FF_REGSP esp
#else #else
typedef int x86_reg; typedef int x86_reg;
#endif #endif
......
...@@ -41,9 +41,9 @@ ...@@ -41,9 +41,9 @@
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */ /* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index, eax, ebx, ecx, edx) \ #define cpuid(index, eax, ebx, ecx, edx) \
__asm__ volatile ( \ __asm__ volatile ( \
"mov %%"REG_b", %%"REG_S" \n\t" \ "mov %%"FF_REG_b", %%"FF_REG_S" \n\t" \
"cpuid \n\t" \ "cpuid \n\t" \
"xchg %%"REG_b", %%"REG_S \ "xchg %%"FF_REG_b", %%"FF_REG_S \
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \ : "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
: "0" (index), "2"(0)) : "0" (index), "2"(0))
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -220,16 +220,16 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, ...@@ -220,16 +220,16 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"movdqa %%xmm3, %%xmm4 \n\t" \ "movdqa %%xmm3, %%xmm4 \n\t" \
"movdqa %%xmm3, %%xmm7 \n\t" \ "movdqa %%xmm3, %%xmm7 \n\t" \
"movl %3, %%ecx \n\t" \ "movl %3, %%ecx \n\t" \
"mov %0, %%"REG_d" \n\t"\ "mov %0, %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
".p2align 4 \n\t" /* FIXME Unroll? */\ ".p2align 4 \n\t" /* FIXME Unroll? */\
"1: \n\t"\ "1: \n\t"\
"movddup 8(%%"REG_d"), %%xmm0 \n\t" /* filterCoeff */\ "movddup 8(%%"FF_REG_d"), %%xmm0 \n\t" /* filterCoeff */\
"movdqa (%%"REG_S", %%"REG_c", 2), %%xmm2 \n\t" /* srcData */\ "movdqa (%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm2 \n\t" /* srcData */\
"movdqa 16(%%"REG_S", %%"REG_c", 2), %%xmm5 \n\t" /* srcData */\ "movdqa 16(%%"FF_REG_S", %%"FF_REG_c", 2), %%xmm5 \n\t" /* srcData */\
"add $16, %%"REG_d" \n\t"\ "add $16, %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\ "test %%"FF_REG_S", %%"FF_REG_S" \n\t"\
"pmulhw %%xmm0, %%xmm2 \n\t"\ "pmulhw %%xmm0, %%xmm2 \n\t"\
"pmulhw %%xmm0, %%xmm5 \n\t"\ "pmulhw %%xmm0, %%xmm5 \n\t"\
"paddw %%xmm2, %%xmm3 \n\t"\ "paddw %%xmm2, %%xmm3 \n\t"\
...@@ -238,13 +238,13 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, ...@@ -238,13 +238,13 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"psraw $3, %%xmm3 \n\t"\ "psraw $3, %%xmm3 \n\t"\
"psraw $3, %%xmm4 \n\t"\ "psraw $3, %%xmm4 \n\t"\
"packuswb %%xmm4, %%xmm3 \n\t"\ "packuswb %%xmm4, %%xmm3 \n\t"\
"movntdq %%xmm3, (%1, %%"REG_c")\n\t"\ "movntdq %%xmm3, (%1, %%"FF_REG_c") \n\t"\
"add $16, %%"REG_c" \n\t"\ "add $16, %%"FF_REG_c" \n\t"\
"cmp %2, %%"REG_c" \n\t"\ "cmp %2, %%"FF_REG_c" \n\t"\
"movdqa %%xmm7, %%xmm3 \n\t" \ "movdqa %%xmm7, %%xmm3 \n\t" \
"movdqa %%xmm7, %%xmm4 \n\t" \ "movdqa %%xmm7, %%xmm4 \n\t" \
"mov %0, %%"REG_d" \n\t"\ "mov %0, %%"FF_REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\ "mov (%%"FF_REG_d"), %%"FF_REG_S" \n\t"\
"jb 1b \n\t" "jb 1b \n\t"
if (offset) { if (offset) {
...@@ -259,7 +259,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, ...@@ -259,7 +259,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
"m"(filterSize), "m"(((uint64_t *) dither)[0]) "m"(filterSize), "m"(((uint64_t *) dither)[0])
: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
"%"REG_d, "%"REG_S, "%"REG_c "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
); );
} else { } else {
__asm__ volatile( __asm__ volatile(
...@@ -269,7 +269,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize, ...@@ -269,7 +269,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset), "r" (dest-offset), "g" ((x86_reg)(dstW+offset)), "m" (offset),
"m"(filterSize), "m"(((uint64_t *) dither)[0]) "m"(filterSize), "m"(((uint64_t *) dither)[0])
: XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,) : XMM_CLOBBERS("%xmm0" , "%xmm1" , "%xmm2" , "%xmm3" , "%xmm4" , "%xmm5" , "%xmm7" ,)
"%"REG_d, "%"REG_S, "%"REG_c "%"FF_REG_d, "%"FF_REG_S, "%"FF_REG_c
); );
} }
} }
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment