Commit 348493db authored by Daniel Kang's avatar Daniel Kang Committed by Ronald S. Bultje

Update 8-bit H.264 IDCT function names to reflect bit-depth.

Signed-off-by: 's avatarRonald S. Bultje <rbultje@google.com>
parent 836f47d3
...@@ -66,7 +66,6 @@ typedef struct H264DSPContext{ ...@@ -66,7 +66,6 @@ typedef struct H264DSPContext{
void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride); void (*h264_idct_dc_add)(uint8_t *dst/*align 4*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride); void (*h264_idct8_dc_add)(uint8_t *dst/*align 8*/, DCTELEM *block/*align 16*/, int stride);
void (*h264_dct)(DCTELEM block[4][4]);
void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); void (*h264_idct_add16)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); void (*h264_idct8_add4)(uint8_t *dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]); void (*h264_idct_add8)(uint8_t **dst/*align 16*/, const int *blockoffset, DCTELEM *block/*align 16*/, int stride, const uint8_t nnzc[6*8]);
......
...@@ -73,7 +73,7 @@ SECTION .text ...@@ -73,7 +73,7 @@ SECTION .text
INIT_MMX INIT_MMX
; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride) ; ff_h264_idct_add_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct_add_mmx, 3, 3, 0 cglobal h264_idct_add_8_mmx, 3, 3, 0
IDCT4_ADD r0, r1, r2 IDCT4_ADD r0, r1, r2
RET RET
...@@ -125,7 +125,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0 ...@@ -125,7 +125,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
SUMSUB_BA w, 0, 4 SUMSUB_BA w, 0, 4
SUMSUB_BA w, 3, 2 SUMSUB_BA w, 3, 2
SUMSUB_BA w, 1, 5 SUMSUB_BA w, 1, 5
SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567 SWAP 7, 6, 4, 5, 2, 3, 1, 0 ; 70315246 -> 01234567
%endmacro %endmacro
%macro IDCT8_1D_FULL 1 %macro IDCT8_1D_FULL 1
...@@ -177,7 +177,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0 ...@@ -177,7 +177,7 @@ cglobal h264_idct_add_mmx, 3, 3, 0
INIT_MMX INIT_MMX
; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride) ; ff_h264_idct8_add_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_add_mmx, 3, 4, 0 cglobal h264_idct8_add_8_mmx, 3, 4, 0
%assign pad 128+4-(stack_offset&7) %assign pad 128+4-(stack_offset&7)
SUB rsp, pad SUB rsp, pad
...@@ -237,7 +237,7 @@ cglobal h264_idct8_add_mmx, 3, 4, 0 ...@@ -237,7 +237,7 @@ cglobal h264_idct8_add_mmx, 3, 4, 0
INIT_XMM INIT_XMM
; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride) ; ff_h264_idct8_add_sse2(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_add_sse2, 3, 4, 10 cglobal h264_idct8_add_8_sse2, 3, 4, 10
IDCT8_ADD_SSE r0, r1, r2, r3 IDCT8_ADD_SSE r0, r1, r2, r3
RET RET
...@@ -261,7 +261,7 @@ cglobal h264_idct8_add_sse2, 3, 4, 10 ...@@ -261,7 +261,7 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
packuswb m1, m1 packuswb m1, m1
%endmacro %endmacro
%macro DC_ADD_MMX2_OP 3-4 %macro DC_ADD_MMX2_OP 4
%1 m2, [%2 ] %1 m2, [%2 ]
%1 m3, [%2+%3 ] %1 m3, [%2+%3 ]
%1 m4, [%2+%3*2] %1 m4, [%2+%3*2]
...@@ -282,13 +282,13 @@ cglobal h264_idct8_add_sse2, 3, 4, 10 ...@@ -282,13 +282,13 @@ cglobal h264_idct8_add_sse2, 3, 4, 10
INIT_MMX INIT_MMX
; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) ; ff_h264_idct_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct_dc_add_mmx2, 3, 3, 0 cglobal h264_idct_dc_add_8_mmx2, 3, 3, 0
DC_ADD_MMX2_INIT r1, r2 DC_ADD_MMX2_INIT r1, r2
DC_ADD_MMX2_OP movh, r0, r2, r1 DC_ADD_MMX2_OP movh, r0, r2, r1
RET RET
; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride) ; ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_mmx2, 3, 3, 0 cglobal h264_idct8_dc_add_8_mmx2, 3, 3, 0
DC_ADD_MMX2_INIT r1, r2 DC_ADD_MMX2_INIT r1, r2
DC_ADD_MMX2_OP mova, r0, r2, r1 DC_ADD_MMX2_OP mova, r0, r2, r1
lea r0, [r0+r2*4] lea r0, [r0+r2*4]
...@@ -297,7 +297,7 @@ cglobal h264_idct8_dc_add_mmx2, 3, 3, 0 ...@@ -297,7 +297,7 @@ cglobal h264_idct8_dc_add_mmx2, 3, 3, 0
; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset, ; ff_h264_idct_add16_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16_mmx, 5, 7, 0 cglobal h264_idct_add16_8_mmx, 5, 7, 0
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
lea r11, [scan8_mem] lea r11, [scan8_mem]
...@@ -319,7 +319,7 @@ cglobal h264_idct_add16_mmx, 5, 7, 0 ...@@ -319,7 +319,7 @@ cglobal h264_idct_add16_mmx, 5, 7, 0
; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset, ; ff_h264_idct8_add4_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct8_add4_mmx, 5, 7, 0 cglobal h264_idct8_add4_8_mmx, 5, 7, 0
%assign pad 128+4-(stack_offset&7) %assign pad 128+4-(stack_offset&7)
SUB rsp, pad SUB rsp, pad
...@@ -351,7 +351,7 @@ cglobal h264_idct8_add4_mmx, 5, 7, 0 ...@@ -351,7 +351,7 @@ cglobal h264_idct8_add4_mmx, 5, 7, 0
; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset, ; ff_h264_idct_add16_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16_mmx2, 5, 7, 0 cglobal h264_idct_add16_8_mmx2, 5, 7, 0
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
lea r11, [scan8_mem] lea r11, [scan8_mem]
...@@ -398,7 +398,7 @@ cglobal h264_idct_add16_mmx2, 5, 7, 0 ...@@ -398,7 +398,7 @@ cglobal h264_idct_add16_mmx2, 5, 7, 0
; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset, ; ff_h264_idct_add16intra_mmx(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16intra_mmx, 5, 7, 0 cglobal h264_idct_add16intra_8_mmx, 5, 7, 0
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
lea r11, [scan8_mem] lea r11, [scan8_mem]
...@@ -421,7 +421,7 @@ cglobal h264_idct_add16intra_mmx, 5, 7, 0 ...@@ -421,7 +421,7 @@ cglobal h264_idct_add16intra_mmx, 5, 7, 0
; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset, ; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16intra_mmx2, 5, 7, 0 cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
lea r11, [scan8_mem] lea r11, [scan8_mem]
...@@ -466,7 +466,7 @@ cglobal h264_idct_add16intra_mmx2, 5, 7, 0 ...@@ -466,7 +466,7 @@ cglobal h264_idct_add16intra_mmx2, 5, 7, 0
; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset, ; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct8_add4_mmx2, 5, 7, 0 cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
%assign pad 128+4-(stack_offset&7) %assign pad 128+4-(stack_offset&7)
SUB rsp, pad SUB rsp, pad
...@@ -529,7 +529,7 @@ cglobal h264_idct8_add4_mmx2, 5, 7, 0 ...@@ -529,7 +529,7 @@ cglobal h264_idct8_add4_mmx2, 5, 7, 0
INIT_XMM INIT_XMM
; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset, ; ff_h264_idct8_add4_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct8_add4_sse2, 5, 7, 10 cglobal h264_idct8_add4_8_sse2, 5, 7, 10
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
lea r11, [scan8_mem] lea r11, [scan8_mem]
...@@ -607,7 +607,7 @@ h264_idct_add8_mmx_plane: ...@@ -607,7 +607,7 @@ h264_idct_add8_mmx_plane:
; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset, ; ff_h264_idct_add8_mmx(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add8_mmx, 5, 7, 0 cglobal h264_idct_add8_8_mmx, 5, 7, 0
mov r5, 16 mov r5, 16
add r2, 512 add r2, 512
%ifdef PIC %ifdef PIC
...@@ -668,7 +668,7 @@ h264_idct_add8_mmx2_plane ...@@ -668,7 +668,7 @@ h264_idct_add8_mmx2_plane
; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset, ; ff_h264_idct_add8_mmx2(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add8_mmx2, 5, 7, 0 cglobal h264_idct_add8_8_mmx2, 5, 7, 0
mov r5, 16 mov r5, 16
add r2, 512 add r2, 512
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
...@@ -744,7 +744,7 @@ x264_add8x4_idct_sse2: ...@@ -744,7 +744,7 @@ x264_add8x4_idct_sse2:
; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset, ; ff_h264_idct_add16_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16_sse2, 5, 5, 8 cglobal h264_idct_add16_8_sse2, 5, 5, 8
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
mov r10, r0 mov r10, r0
%endif %endif
...@@ -791,7 +791,7 @@ cglobal h264_idct_add16_sse2, 5, 5, 8 ...@@ -791,7 +791,7 @@ cglobal h264_idct_add16_sse2, 5, 5, 8
; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, ; ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add16intra_sse2, 5, 7, 8 cglobal h264_idct_add16intra_8_sse2, 5, 7, 8
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
mov r10, r0 mov r10, r0
%endif %endif
...@@ -840,7 +840,7 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8 ...@@ -840,7 +840,7 @@ cglobal h264_idct_add16intra_sse2, 5, 7, 8
; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset, ; ff_h264_idct_add8_sse2(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8]) ; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal h264_idct_add8_sse2, 5, 7, 8 cglobal h264_idct_add8_8_sse2, 5, 7, 8
add r2, 512 add r2, 512
%ifdef ARCH_X86_64 %ifdef ARCH_X86_64
mov r10, r0 mov r10, r0
......
...@@ -30,9 +30,14 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL; ...@@ -30,9 +30,14 @@ DECLARE_ALIGNED(8, static const uint64_t, ff_pb_3_1 ) = 0x0103010301030103ULL;
#define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \ #define IDCT_ADD_FUNC(NUM, DEPTH, OPT) \
void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride); void ff_h264_idct ## NUM ## _add_ ## DEPTH ## _ ## OPT (uint8_t *dst, int16_t *block, int stride);
IDCT_ADD_FUNC(, 8, mmx)
IDCT_ADD_FUNC(, 10, sse2) IDCT_ADD_FUNC(, 10, sse2)
IDCT_ADD_FUNC(_dc, 8, mmx2)
IDCT_ADD_FUNC(_dc, 10, mmx2) IDCT_ADD_FUNC(_dc, 10, mmx2)
IDCT_ADD_FUNC(8_dc, 8, mmx2)
IDCT_ADD_FUNC(8_dc, 10, sse2) IDCT_ADD_FUNC(8_dc, 10, sse2)
IDCT_ADD_FUNC(8, 8, mmx)
IDCT_ADD_FUNC(8, 8, sse2)
IDCT_ADD_FUNC(8, 10, sse2) IDCT_ADD_FUNC(8, 10, sse2)
#if HAVE_AVX #if HAVE_AVX
IDCT_ADD_FUNC(, 10, avx) IDCT_ADD_FUNC(, 10, avx)
...@@ -46,9 +51,18 @@ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ ...@@ -46,9 +51,18 @@ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, const int *block_offset, \ (uint8_t *dst, const int *block_offset, \
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
IDCT_ADD_REP_FUNC(8, 4, 8, mmx)
IDCT_ADD_REP_FUNC(8, 4, 8, mmx2)
IDCT_ADD_REP_FUNC(8, 4, 8, sse2)
IDCT_ADD_REP_FUNC(8, 4, 10, sse2) IDCT_ADD_REP_FUNC(8, 4, 10, sse2)
IDCT_ADD_REP_FUNC(8, 4, 10, avx) IDCT_ADD_REP_FUNC(8, 4, 10, avx)
IDCT_ADD_REP_FUNC(, 16, 8, mmx)
IDCT_ADD_REP_FUNC(, 16, 8, mmx2)
IDCT_ADD_REP_FUNC(, 16, 8, sse2)
IDCT_ADD_REP_FUNC(, 16, 10, sse2) IDCT_ADD_REP_FUNC(, 16, 10, sse2)
IDCT_ADD_REP_FUNC(, 16intra, 8, mmx)
IDCT_ADD_REP_FUNC(, 16intra, 8, mmx2)
IDCT_ADD_REP_FUNC(, 16intra, 8, sse2)
IDCT_ADD_REP_FUNC(, 16intra, 10, sse2) IDCT_ADD_REP_FUNC(, 16intra, 10, sse2)
#if HAVE_AVX #if HAVE_AVX
IDCT_ADD_REP_FUNC(, 16, 10, avx) IDCT_ADD_REP_FUNC(, 16, 10, avx)
...@@ -60,42 +74,14 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx) ...@@ -60,42 +74,14 @@ IDCT_ADD_REP_FUNC(, 16intra, 10, avx)
void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \ void ff_h264_idct ## NUM ## _add ## REP ## _ ## DEPTH ## _ ## OPT \
(uint8_t **dst, const int *block_offset, \ (uint8_t **dst, const int *block_offset, \
DCTELEM *block, int stride, const uint8_t nnzc[6*8]); DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
IDCT_ADD_REP_FUNC2(, 8, 8, mmx)
IDCT_ADD_REP_FUNC2(, 8, 8, mmx2)
IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
IDCT_ADD_REP_FUNC2(, 8, 10, sse2) IDCT_ADD_REP_FUNC2(, 8, 10, sse2)
#if HAVE_AVX #if HAVE_AVX
IDCT_ADD_REP_FUNC2(, 8, 10, avx) IDCT_ADD_REP_FUNC2(, 8, 10, avx)
#endif #endif
void ff_h264_idct_add_mmx (uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_mmx (uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_add_sse2 (uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_dc_add_mmx2 (uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct8_dc_add_mmx2(uint8_t *dst, int16_t *block, int stride);
void ff_h264_idct_add16_mmx (uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct8_add4_mmx (uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16_mmx2 (uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16intra_mmx (uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct8_add4_mmx2 (uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct8_add4_sse2 (uint8_t *dst, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add8_mmx (uint8_t **dest, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add8_mmx2 (uint8_t **dest, const int *block_offset,
DCTELEM *block, int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16_sse2 (uint8_t *dst, const int *block_offset, DCTELEM *block,
int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add16intra_sse2(uint8_t *dst, const int *block_offset, DCTELEM *block,
int stride, const uint8_t nnzc[6*8]);
void ff_h264_idct_add8_sse2 (uint8_t **dest, const int *block_offset, DCTELEM *block,
int stride, const uint8_t nnzc[6*8]);
void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul); void ff_h264_luma_dc_dequant_idct_mmx (DCTELEM *output, DCTELEM *input, int qmul);
void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul); void ff_h264_luma_dc_dequant_idct_sse2(DCTELEM *output, DCTELEM *input, int qmul);
...@@ -350,24 +336,24 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) ...@@ -350,24 +336,24 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
} }
#if HAVE_YASM #if HAVE_YASM
if (mm_flags & AV_CPU_FLAG_MMX) { if (mm_flags & AV_CPU_FLAG_MMX) {
c->h264_idct_dc_add= c->h264_idct_dc_add =
c->h264_idct_add= ff_h264_idct_add_mmx; c->h264_idct_add = ff_h264_idct_add_8_mmx;
c->h264_idct8_dc_add= c->h264_idct8_dc_add =
c->h264_idct8_add= ff_h264_idct8_add_mmx; c->h264_idct8_add = ff_h264_idct8_add_8_mmx;
c->h264_idct_add16 = ff_h264_idct_add16_mmx; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx;
c->h264_idct8_add4 = ff_h264_idct8_add4_mmx; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx;
c->h264_idct_add8 = ff_h264_idct_add8_mmx; c->h264_idct_add8 = ff_h264_idct_add8_8_mmx;
c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx; c->h264_idct_add16intra = ff_h264_idct_add16intra_8_mmx;
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_mmx;
if (mm_flags & AV_CPU_FLAG_MMX2) { if (mm_flags & AV_CPU_FLAG_MMX2) {
c->h264_idct_dc_add= ff_h264_idct_dc_add_mmx2; c->h264_idct_dc_add = ff_h264_idct_dc_add_8_mmx2;
c->h264_idct8_dc_add= ff_h264_idct8_dc_add_mmx2; c->h264_idct8_dc_add = ff_h264_idct8_dc_add_8_mmx2;
c->h264_idct_add16 = ff_h264_idct_add16_mmx2; c->h264_idct_add16 = ff_h264_idct_add16_8_mmx2;
c->h264_idct8_add4 = ff_h264_idct8_add4_mmx2; c->h264_idct8_add4 = ff_h264_idct8_add4_8_mmx2;
c->h264_idct_add8 = ff_h264_idct_add8_mmx2; c->h264_idct_add8 = ff_h264_idct_add8_8_mmx2;
c->h264_idct_add16intra= ff_h264_idct_add16intra_mmx2; c->h264_idct_add16intra= ff_h264_idct_add16intra_8_mmx2;
c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext; c->h264_v_loop_filter_chroma= ff_deblock_v_chroma_8_mmxext;
c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext; c->h264_h_loop_filter_chroma= ff_deblock_h_chroma_8_mmxext;
...@@ -398,8 +384,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) ...@@ -398,8 +384,12 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2;
if (mm_flags&AV_CPU_FLAG_SSE2) { if (mm_flags&AV_CPU_FLAG_SSE2) {
c->h264_idct8_add = ff_h264_idct8_add_sse2; c->h264_idct8_add = ff_h264_idct8_add_8_sse2;
c->h264_idct8_add4= ff_h264_idct8_add4_sse2;
c->h264_idct_add16 = ff_h264_idct_add16_8_sse2;
c->h264_idct8_add4 = ff_h264_idct8_add4_8_sse2;
c->h264_idct_add8 = ff_h264_idct_add8_8_sse2;
c->h264_idct_add16intra = ff_h264_idct_add16intra_8_sse2;
c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2; c->h264_luma_dc_dequant_idct= ff_h264_luma_dc_dequant_idct_sse2;
c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2; c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_sse2;
...@@ -420,10 +410,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth) ...@@ -420,10 +410,6 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2; c->h264_v_loop_filter_luma_intra = ff_deblock_v_luma_intra_8_sse2;
c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2; c->h264_h_loop_filter_luma_intra = ff_deblock_h_luma_intra_8_sse2;
#endif #endif
c->h264_idct_add16 = ff_h264_idct_add16_sse2;
c->h264_idct_add8 = ff_h264_idct_add8_sse2;
c->h264_idct_add16intra = ff_h264_idct_add16intra_sse2;
} }
if (mm_flags&AV_CPU_FLAG_SSSE3) { if (mm_flags&AV_CPU_FLAG_SSSE3) {
c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3; c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_ssse3;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment