Commit a52ffc3f authored by Ronald S. Bultje's avatar Ronald S. Bultje

Move static inline function to a macro, so that constant propagation in

inline asm works for gcc-3.x also (hopefully). Should fix gcc-3.x FATE
breakage after r25254.

Originally committed as revision 25262 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent fc7c40c2
...@@ -63,123 +63,119 @@ void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTEL ...@@ -63,123 +63,119 @@ void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTEL
/***********************************/ /***********************************/
/* deblocking */ /* deblocking */
static av_always_inline void h264_loop_filter_strength_iteration_mmx2(int16_t bS[2][4][4], uint8_t nnz[40], #define h264_loop_filter_strength_iteration_mmx2(bS, nz, ref, mv, bidir, edges, step, mask_mv, dir, d_idx, mask_dir) \
int8_t ref[2][40], int16_t mv[2][40][2], do { \
int bidir, int edges, int step, x86_reg b_idx; \
int mask_mv, int dir, const int d_idx, mask_mv <<= 3; \
const uint64_t mask_dir) for( b_idx=0; b_idx<edges; b_idx+=step ) { \
{ if (!mask_dir) \
x86_reg b_idx; __asm__ volatile( \
mask_mv <<= 3; "pxor %%mm0, %%mm0 \n\t" \
for( b_idx=0; b_idx<edges; b_idx+=step ) { :: \
if (!mask_dir) ); \
__asm__ volatile( if(!(mask_mv & b_idx)) { \
"pxor %%mm0, %%mm0 \n\t" if(bidir) { \
:: __asm__ volatile( \
); "movd %a3(%0,%2), %%mm2 \n" \
if(!(mask_mv & b_idx)) { "punpckldq %a4(%0,%2), %%mm2 \n" /* { ref0[bn], ref1[bn] } */ \
if(bidir) { "pshufw $0x44, 12(%0,%2), %%mm0 \n" /* { ref0[b], ref0[b] } */ \
__asm__ volatile( "pshufw $0x44, 52(%0,%2), %%mm1 \n" /* { ref1[b], ref1[b] } */ \
"movd %a3(%0,%2), %%mm2 \n" "pshufw $0x4E, %%mm2, %%mm3 \n" \
"punpckldq %a4(%0,%2), %%mm2 \n" // { ref0[bn], ref1[bn] } "psubb %%mm2, %%mm0 \n" /* { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } */ \
"pshufw $0x44, 12(%0,%2), %%mm0 \n" // { ref0[b], ref0[b] } "psubb %%mm3, %%mm1 \n" /* { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } */ \
"pshufw $0x44, 52(%0,%2), %%mm1 \n" // { ref1[b], ref1[b] } \
"pshufw $0x4E, %%mm2, %%mm3 \n" "por %%mm1, %%mm0 \n" \
"psubb %%mm2, %%mm0 \n" // { ref0[b]!=ref0[bn], ref0[b]!=ref1[bn] } "movq %a5(%1,%2,4), %%mm1 \n" \
"psubb %%mm3, %%mm1 \n" // { ref1[b]!=ref1[bn], ref1[b]!=ref0[bn] } "movq %a6(%1,%2,4), %%mm2 \n" \
"movq %%mm1, %%mm3 \n" \
"por %%mm1, %%mm0 \n" "movq %%mm2, %%mm4 \n" \
"movq %a5(%1,%2,4), %%mm1 \n" "psubw 48(%1,%2,4), %%mm1 \n" \
"movq %a6(%1,%2,4), %%mm2 \n" "psubw 56(%1,%2,4), %%mm2 \n" \
"movq %%mm1, %%mm3 \n" "psubw 208(%1,%2,4), %%mm3 \n" \
"movq %%mm2, %%mm4 \n" "psubw 216(%1,%2,4), %%mm4 \n" \
"psubw 48(%1,%2,4), %%mm1 \n" "packsswb %%mm2, %%mm1 \n" \
"psubw 56(%1,%2,4), %%mm2 \n" "packsswb %%mm4, %%mm3 \n" \
"psubw 208(%1,%2,4), %%mm3 \n" "paddb %%mm6, %%mm1 \n" \
"psubw 216(%1,%2,4), %%mm4 \n" "paddb %%mm6, %%mm3 \n" \
"packsswb %%mm2, %%mm1 \n" "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
"packsswb %%mm4, %%mm3 \n" "psubusb %%mm5, %%mm3 \n" \
"paddb %%mm6, %%mm1 \n" "packsswb %%mm3, %%mm1 \n" \
"paddb %%mm6, %%mm3 \n" \
"psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit "por %%mm1, %%mm0 \n" \
"psubusb %%mm5, %%mm3 \n" "movq %a7(%1,%2,4), %%mm1 \n" \
"packsswb %%mm3, %%mm1 \n" "movq %a8(%1,%2,4), %%mm2 \n" \
"movq %%mm1, %%mm3 \n" \
"por %%mm1, %%mm0 \n" "movq %%mm2, %%mm4 \n" \
"movq %a7(%1,%2,4), %%mm1 \n" "psubw 48(%1,%2,4), %%mm1 \n" \
"movq %a8(%1,%2,4), %%mm2 \n" "psubw 56(%1,%2,4), %%mm2 \n" \
"movq %%mm1, %%mm3 \n" "psubw 208(%1,%2,4), %%mm3 \n" \
"movq %%mm2, %%mm4 \n" "psubw 216(%1,%2,4), %%mm4 \n" \
"psubw 48(%1,%2,4), %%mm1 \n" "packsswb %%mm2, %%mm1 \n" \
"psubw 56(%1,%2,4), %%mm2 \n" "packsswb %%mm4, %%mm3 \n" \
"psubw 208(%1,%2,4), %%mm3 \n" "paddb %%mm6, %%mm1 \n" \
"psubw 216(%1,%2,4), %%mm4 \n" "paddb %%mm6, %%mm3 \n" \
"packsswb %%mm2, %%mm1 \n" "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
"packsswb %%mm4, %%mm3 \n" "psubusb %%mm5, %%mm3 \n" \
"paddb %%mm6, %%mm1 \n" "packsswb %%mm3, %%mm1 \n" \
"paddb %%mm6, %%mm3 \n" \
"psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit "pshufw $0x4E, %%mm1, %%mm1 \n" \
"psubusb %%mm5, %%mm3 \n" "por %%mm1, %%mm0 \n" \
"packsswb %%mm3, %%mm1 \n" "pshufw $0x4E, %%mm0, %%mm1 \n" \
"pminub %%mm1, %%mm0 \n" \
"pshufw $0x4E, %%mm1, %%mm1 \n" ::"r"(ref), \
"por %%mm1, %%mm0 \n" "r"(mv), \
"pshufw $0x4E, %%mm0, %%mm1 \n" "r"(b_idx), \
"pminub %%mm1, %%mm0 \n" "i"(d_idx+12), \
::"r"(ref), "i"(d_idx+52), \
"r"(mv), "i"(d_idx*4+48), \
"r"(b_idx), "i"(d_idx*4+56), \
"i"(d_idx+12), "i"(d_idx*4+208), \
"i"(d_idx+52), "i"(d_idx*4+216) \
"i"(d_idx*4+48), ); \
"i"(d_idx*4+56), } else { \
"i"(d_idx*4+208), __asm__ volatile( \
"i"(d_idx*4+216) "movd 12(%0,%2), %%mm0 \n" \
); "psubb %a3(%0,%2), %%mm0 \n" /* ref[b] != ref[bn] */ \
} else { "movq 48(%1,%2,4), %%mm1 \n" \
__asm__ volatile( "movq 56(%1,%2,4), %%mm2 \n" \
"movd 12(%0,%2), %%mm0 \n" "psubw %a4(%1,%2,4), %%mm1 \n" \
"psubb %a3(%0,%2), %%mm0 \n" // ref[b] != ref[bn] "psubw %a5(%1,%2,4), %%mm2 \n" \
"movq 48(%1,%2,4), %%mm1 \n" "packsswb %%mm2, %%mm1 \n" \
"movq 56(%1,%2,4), %%mm2 \n" "paddb %%mm6, %%mm1 \n" \
"psubw %a4(%1,%2,4), %%mm1 \n" "psubusb %%mm5, %%mm1 \n" /* abs(mv[b] - mv[bn]) >= limit */ \
"psubw %a5(%1,%2,4), %%mm2 \n" "packsswb %%mm1, %%mm1 \n" \
"packsswb %%mm2, %%mm1 \n" "por %%mm1, %%mm0 \n" \
"paddb %%mm6, %%mm1 \n" ::"r"(ref), \
"psubusb %%mm5, %%mm1 \n" // abs(mv[b] - mv[bn]) >= limit "r"(mv), \
"packsswb %%mm1, %%mm1 \n" "r"(b_idx), \
"por %%mm1, %%mm0 \n" "i"(d_idx+12), \
::"r"(ref), "i"(d_idx*4+48), \
"r"(mv), "i"(d_idx*4+56) \
"r"(b_idx), ); \
"i"(d_idx+12), } \
"i"(d_idx*4+48), } \
"i"(d_idx*4+56) __asm__ volatile( \
); "movd 12(%0,%1), %%mm1 \n" \
} "por %a2(%0,%1), %%mm1 \n" /* nnz[b] || nnz[bn] */ \
} ::"r"(nnz), \
__asm__ volatile( "r"(b_idx), \
"movd 12(%0,%1), %%mm1 \n" "i"(d_idx+12) \
"por %a2(%0,%1), %%mm1 \n" // nnz[b] || nnz[bn] ); \
::"r"(nnz), __asm__ volatile( \
"r"(b_idx), "pminub %%mm7, %%mm1 \n" \
"i"(d_idx+12) "pminub %%mm7, %%mm0 \n" \
); "psllw $1, %%mm1 \n" \
__asm__ volatile( "pxor %%mm2, %%mm2 \n" \
"pminub %%mm7, %%mm1 \n" "pmaxub %%mm0, %%mm1 \n" \
"pminub %%mm7, %%mm0 \n" "punpcklbw %%mm2, %%mm1 \n" \
"psllw $1, %%mm1 \n" "movq %%mm1, %a1(%0,%2) \n" \
"pxor %%mm2, %%mm2 \n" ::"r"(bS), \
"pmaxub %%mm0, %%mm1 \n" "i"(32*dir), \
"punpcklbw %%mm2, %%mm1 \n" "r"(b_idx) \
"movq %%mm1, %a1(%0,%2) \n" :"memory" \
::"r"(bS), ); \
"i"(32*dir), } \
"r"(b_idx) } while (0)
:"memory"
);
}
}
static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2], static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40], int8_t ref[2][40], int16_t mv[2][40][2],
int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) { int bidir, int edges, int step, int mask_mv0, int mask_mv1, int field ) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment