Commit 4104eb44 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge commit '55519926'

* commit '55519926':
  x86: Make function prototype comments in assembly code consistent

Conflicts:
	libavcodec/x86/sbrdsp.asm
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents a9b1936a 55519926
...@@ -36,7 +36,7 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 ...@@ -36,7 +36,7 @@ pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
SECTION_TEXT SECTION_TEXT
%macro SCALARPRODUCT 0 %macro SCALARPRODUCT 0
; int scalarproduct_int16(int16_t *v1, int16_t *v2, int order) ; int ff_scalarproduct_int16(int16_t *v1, int16_t *v2, int order)
cglobal scalarproduct_int16, 3,3,3, v1, v2, order cglobal scalarproduct_int16, 3,3,3, v1, v2, order
shl orderq, 1 shl orderq, 1
add v1q, orderq add v1q, orderq
...@@ -66,7 +66,8 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order ...@@ -66,7 +66,8 @@ cglobal scalarproduct_int16, 3,3,3, v1, v2, order
%endif %endif
RET RET
; int scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul) ; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
; int order, int mul)
cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul cglobal scalarproduct_and_madd_int16, 4,4,8, v1, v2, v3, order, mul
shl orderq, 1 shl orderq, 1
movd m7, mulm movd m7, mulm
...@@ -162,7 +163,8 @@ align 16 ...@@ -162,7 +163,8 @@ align 16
%endif %endif
%endmacro %endmacro
; int scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3, int order, int mul) ; int ff_scalarproduct_and_madd_int16(int16_t *v1, int16_t *v2, int16_t *v3,
; int order, int mul)
INIT_XMM ssse3 INIT_XMM ssse3
cglobal scalarproduct_and_madd_int16, 4,5,10, v1, v2, v3, order, mul cglobal scalarproduct_and_madd_int16, 4,5,10, v1, v2, v3, order, mul
shl orderq, 1 shl orderq, 1
...@@ -336,7 +338,9 @@ INIT_XMM ssse3, atom ...@@ -336,7 +338,9 @@ INIT_XMM ssse3, atom
APPLY_WINDOW_INT16 1 APPLY_WINDOW_INT16 1
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top) ; void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w,
; int *left, int *left_top)
INIT_MMX mmxext INIT_MMX mmxext
cglobal add_hfyu_median_prediction, 6,6,0, dst, top, diff, w, left, left_top cglobal add_hfyu_median_prediction, 6,6,0, dst, top, diff, w, left, left_top
movq mm0, [topq] movq mm0, [topq]
...@@ -439,7 +443,8 @@ cglobal add_hfyu_median_prediction, 6,6,0, dst, top, diff, w, left, left_top ...@@ -439,7 +443,8 @@ cglobal add_hfyu_median_prediction, 6,6,0, dst, top, diff, w, left, left_top
RET RET
%endmacro %endmacro
; int add_hfyu_left_prediction(uint8_t *dst, const uint8_t *src, int w, int left) ; int ff_add_hfyu_left_prediction(uint8_t *dst, const uint8_t *src,
; int w, int left)
INIT_MMX ssse3 INIT_MMX ssse3
cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left cglobal add_hfyu_left_prediction, 3,3,7, dst, src, w, left
.skip_prologue: .skip_prologue:
...@@ -601,7 +606,7 @@ VECTOR_CLIP_INT32 6, 1, 0, 0 ...@@ -601,7 +606,7 @@ VECTOR_CLIP_INT32 6, 1, 0, 0
add r0, 16 add r0, 16
%endmacro %endmacro
; void bswap_buf(uint32_t *dst, const uint32_t *src, int w); ; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
%macro BSWAP32_BUF 0 %macro BSWAP32_BUF 0
%if cpuflag(ssse3) %if cpuflag(ssse3)
cglobal bswap32_buf, 3,4,3 cglobal bswap32_buf, 3,4,3
......
...@@ -209,8 +209,8 @@ hadamard8x8_diff %+ SUFFIX: ...@@ -209,8 +209,8 @@ hadamard8x8_diff %+ SUFFIX:
hadamard8_16_wrapper %1, 3 hadamard8_16_wrapper %1, 3
%elif cpuflag(mmx) %elif cpuflag(mmx)
ALIGN 16 ALIGN 16
; int hadamard8_diff_##cpu(void *s, uint8_t *src1, uint8_t *src2, ; int ff_hadamard8_diff_ ## cpu(void *s, uint8_t *src1, uint8_t *src2,
; int stride, int h) ; int stride, int h)
; r0 = void *s = unused, int h = unused (always 8) ; r0 = void *s = unused, int h = unused (always 8)
; note how r1, r2 and r3 are not clobbered in this function, so 16x16 ; note how r1, r2 and r3 are not clobbered in this function, so 16x16
; can simply call this 2x2x (and that's why we access rsp+gprsize ; can simply call this 2x2x (and that's why we access rsp+gprsize
...@@ -275,7 +275,7 @@ INIT_XMM ssse3 ...@@ -275,7 +275,7 @@ INIT_XMM ssse3
HADAMARD8_DIFF 9 HADAMARD8_DIFF 9
INIT_XMM sse2 INIT_XMM sse2
; sse16_sse2(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) ; int ff_sse16_sse2(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h);
cglobal sse16, 5, 5, 8 cglobal sse16, 5, 5, 8
shr r4d, 1 shr r4d, 1
pxor m0, m0 ; mm0 = 0 pxor m0, m0 ; mm0 = 0
...@@ -335,7 +335,7 @@ cglobal sse16, 5, 5, 8 ...@@ -335,7 +335,7 @@ cglobal sse16, 5, 5, 8
RET RET
INIT_MMX mmx INIT_MMX mmx
; get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size) ; void ff_get_pixels_mmx(int16_t *block, const uint8_t *pixels, int line_size)
cglobal get_pixels, 3,4 cglobal get_pixels, 3,4
movsxdifnidn r2, r2d movsxdifnidn r2, r2d
add r0, 128 add r0, 128
...@@ -392,7 +392,8 @@ cglobal get_pixels, 3, 4 ...@@ -392,7 +392,8 @@ cglobal get_pixels, 3, 4
RET RET
INIT_MMX mmx INIT_MMX mmx
; diff_pixels_mmx(int16_t *block, const uint8_t *s1, const unint8_t *s2, stride) ; void ff_diff_pixels_mmx(int16_t *block, const uint8_t *s1, const uint8_t *s2,
; int stride);
cglobal diff_pixels, 4,5 cglobal diff_pixels, 4,5
movsxdifnidn r3, r3d movsxdifnidn r3, r3d
pxor m7, m7 pxor m7, m7
...@@ -418,7 +419,7 @@ cglobal diff_pixels, 4,5 ...@@ -418,7 +419,7 @@ cglobal diff_pixels, 4,5
REP_RET REP_RET
INIT_MMX mmx INIT_MMX mmx
; pix_sum16_mmx(uint8_t * pix, int line_size) ; int ff_pix_sum16_mmx(uint8_t *pix, int line_size)
cglobal pix_sum16, 2, 3 cglobal pix_sum16, 2, 3
movsxdifnidn r1, r1d movsxdifnidn r1, r1d
mov r2, r1 mov r2, r1
...@@ -453,7 +454,7 @@ cglobal pix_sum16, 2, 3 ...@@ -453,7 +454,7 @@ cglobal pix_sum16, 2, 3
RET RET
INIT_MMX mmx INIT_MMX mmx
; pix_norm1_mmx(uint8_t *pix, int line_size) ; int ff_pix_norm1_mmx(uint8_t *pix, int line_size)
cglobal pix_norm1, 2, 4 cglobal pix_norm1, 2, 4
movsxdifnidn r1, r1d movsxdifnidn r1, r1d
mov r2, 16 mov r2, 16
......
...@@ -31,9 +31,10 @@ SECTION_TEXT ...@@ -31,9 +31,10 @@ SECTION_TEXT
%endif %endif
%endmacro %endmacro
;--------------------------------------------------------------------------------- ;------------------------------------------------------------------------------
; void int32_to_float_fmul_scalar(float *dst, const int32_t *src, float mul, int len); ; void ff_int32_to_float_fmul_scalar(float *dst, const int32_t *src, float mul,
;--------------------------------------------------------------------------------- ; int len);
;------------------------------------------------------------------------------
%macro INT32_TO_FLOAT_FMUL_SCALAR 1 %macro INT32_TO_FLOAT_FMUL_SCALAR 1
%if UNIX64 %if UNIX64
cglobal int32_to_float_fmul_scalar, 3, 3, %1, dst, src, len cglobal int32_to_float_fmul_scalar, 3, 3, %1, dst, src, len
...@@ -243,8 +244,10 @@ FLOAT_TO_INT16_INTERLEAVE2 ...@@ -243,8 +244,10 @@ FLOAT_TO_INT16_INTERLEAVE2
INIT_XMM sse2 INIT_XMM sse2
FLOAT_TO_INT16_INTERLEAVE2 FLOAT_TO_INT16_INTERLEAVE2
;-----------------------------------------------------------------------------
; void ff_float_to_int16_interleave6(int16_t *dst, const float **src, int len)
;-----------------------------------------------------------------------------
%macro FLOAT_TO_INT16_INTERLEAVE6 0 %macro FLOAT_TO_INT16_INTERLEAVE6 0
; void float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len)
cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len cglobal float_to_int16_interleave6, 2, 8, 0, dst, src, src1, src2, src3, src4, src5, len
%if ARCH_X86_64 %if ARCH_X86_64
mov lend, r2d mov lend, r2d
......
...@@ -26,7 +26,8 @@ ...@@ -26,7 +26,8 @@
SECTION .text SECTION .text
INIT_MMX mmxext INIT_MMX mmxext
; void pixels(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put/avg_pixels(uint8_t *block, const uint8_t *pixels,
; ptrdiff_t line_size, int h)
%macro PIXELS48 2 %macro PIXELS48 2
%if %2 == 4 %if %2 == 4
%define OP movh %define OP movh
...@@ -65,7 +66,8 @@ PIXELS48 avg, 8 ...@@ -65,7 +66,8 @@ PIXELS48 avg, 8
INIT_XMM sse2 INIT_XMM sse2
; void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
; ptrdiff_t line_size, int h)
cglobal put_pixels16, 4,5,4 cglobal put_pixels16, 4,5,4
lea r4, [r2*3] lea r4, [r2*3]
.loop: .loop:
...@@ -83,7 +85,8 @@ cglobal put_pixels16, 4,5,4 ...@@ -83,7 +85,8 @@ cglobal put_pixels16, 4,5,4
jnz .loop jnz .loop
REP_RET REP_RET
; void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels,
; ptrdiff_t line_size, int h)
cglobal avg_pixels16, 4,5,4 cglobal avg_pixels16, 4,5,4
lea r4, [r2*3] lea r4, [r2*3]
.loop: .loop:
......
...@@ -101,7 +101,7 @@ SECTION_TEXT ...@@ -101,7 +101,7 @@ SECTION_TEXT
%endmacro %endmacro
INIT_MMX mmx INIT_MMX mmx
; void h263_v_loop_filter(uint8_t *src, int stride, int qscale) ; void ff_h263_v_loop_filter_mmx(uint8_t *src, int stride, int qscale)
cglobal h263_v_loop_filter, 3,5 cglobal h263_v_loop_filter, 3,5
movsxdifnidn r1, r1d movsxdifnidn r1, r1d
movsxdifnidn r2, r2d movsxdifnidn r2, r2d
...@@ -142,7 +142,7 @@ cglobal h263_v_loop_filter, 3,5 ...@@ -142,7 +142,7 @@ cglobal h263_v_loop_filter, 3,5
%endmacro %endmacro
; void h263_h_loop_filter(uint8_t *src, int stride, int qscale) ; void ff_h263_h_loop_filter_mmx(uint8_t *src, int stride, int qscale)
INIT_MMX mmx INIT_MMX mmx
cglobal h263_h_loop_filter, 3,5,0,32 cglobal h263_h_loop_filter, 3,5,0,32
movsxdifnidn r1, r1d movsxdifnidn r1, r1d
......
...@@ -103,8 +103,9 @@ SECTION .text ...@@ -103,8 +103,9 @@ SECTION .text
%else %else
%define extra_regs 0 %define extra_regs 0
%endif ; rv40 %endif ; rv40
; put/avg_h264_chroma_mc8_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/, ; void ff_put/avg_h264_chroma_mc8_*(uint8_t *dst /* align 8 */,
; int stride, int h, int mx, int my) ; uint8_t *src /* align 1 */,
; int stride, int h, int mx, int my)
cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0 cglobal %1_%2_chroma_mc8%3, 6, 7 + extra_regs, 0
%if ARCH_X86_64 %if ARCH_X86_64
movsxd r2, r2d movsxd r2, r2d
......
...@@ -57,11 +57,10 @@ SECTION .text ...@@ -57,11 +57,10 @@ SECTION .text
%endmacro %endmacro
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void put/avg_h264_chroma_mc8(pixel *dst, pixel *src, int stride, int h, int mx, int my) ; void ff_put/avg_h264_chroma_mc8(pixel *dst, pixel *src, int stride, int h,
; int mx, int my)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro CHROMA_MC8 1 %macro CHROMA_MC8 1
; put/avg_h264_chroma_mc8_*(uint8_t *dst /*align 8*/, uint8_t *src /*align 1*/,
; int stride, int h, int mx, int my)
cglobal %1_h264_chroma_mc8_10, 6,7,8 cglobal %1_h264_chroma_mc8_10, 6,7,8
movsxdifnidn r2, r2d movsxdifnidn r2, r2d
mov r6d, r5d mov r6d, r5d
...@@ -150,7 +149,8 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8 ...@@ -150,7 +149,8 @@ cglobal %1_h264_chroma_mc8_10, 6,7,8
%endmacro %endmacro
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void put/avg_h264_chroma_mc4(pixel *dst, pixel *src, int stride, int h, int mx, int my) ; void ff_put/avg_h264_chroma_mc4(pixel *dst, pixel *src, int stride, int h,
; int mx, int my)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
;TODO: xmm mc4 ;TODO: xmm mc4
%macro MC4_OP 2 %macro MC4_OP 2
...@@ -200,7 +200,8 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7 ...@@ -200,7 +200,8 @@ cglobal %1_h264_chroma_mc4_10, 6,6,7
%endmacro %endmacro
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void put/avg_h264_chroma_mc2(pixel *dst, pixel *src, int stride, int h, int mx, int my) ; void ff_put/avg_h264_chroma_mc2(pixel *dst, pixel *src, int stride, int h,
; int mx, int my)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro CHROMA_MC2 1 %macro CHROMA_MC2 1
cglobal %1_h264_chroma_mc2_10, 6,7 cglobal %1_h264_chroma_mc2_10, 6,7
......
...@@ -283,7 +283,8 @@ cextern pb_3 ...@@ -283,7 +283,8 @@ cextern pb_3
%if ARCH_X86_64 %if ARCH_X86_64
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_v_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_v_luma(uint8_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro DEBLOCK_LUMA 0 %macro DEBLOCK_LUMA 0
cglobal deblock_v_luma_8, 5,5,10 cglobal deblock_v_luma_8, 5,5,10
...@@ -328,7 +329,8 @@ cglobal deblock_v_luma_8, 5,5,10 ...@@ -328,7 +329,8 @@ cglobal deblock_v_luma_8, 5,5,10
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_h_luma(uint8_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX cpuname INIT_MMX cpuname
cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64 cglobal deblock_h_luma_8, 5,9,0,0x60+16*WIN64
...@@ -391,7 +393,8 @@ DEBLOCK_LUMA ...@@ -391,7 +393,8 @@ DEBLOCK_LUMA
%macro DEBLOCK_LUMA 2 %macro DEBLOCK_LUMA 2
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_v8_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_v8_luma(uint8_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_%1_luma_8, 5,5,8,2*%2 cglobal deblock_%1_luma_8, 5,5,8,2*%2
lea r4, [r1*3] lea r4, [r1*3]
...@@ -439,7 +442,8 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2 ...@@ -439,7 +442,8 @@ cglobal deblock_%1_luma_8, 5,5,8,2*%2
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_h_luma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_h_luma(uint8_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX cpuname INIT_MMX cpuname
cglobal deblock_h_luma_8, 0,5,8,0x60+HAVE_ALIGNED_STACK*12 cglobal deblock_h_luma_8, 0,5,8,0x60+HAVE_ALIGNED_STACK*12
...@@ -639,7 +643,7 @@ DEBLOCK_LUMA v, 16 ...@@ -639,7 +643,7 @@ DEBLOCK_LUMA v, 16
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_v_luma_intra(uint8_t *pix, int stride, int alpha, int beta)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%if WIN64 %if WIN64
cglobal deblock_%1_luma_intra_8, 4,6,16,0x10 cglobal deblock_%1_luma_intra_8, 4,6,16,0x10
...@@ -699,7 +703,7 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50 ...@@ -699,7 +703,7 @@ cglobal deblock_%1_luma_intra_8, 4,6,16,ARCH_X86_64*0x50-0x50
INIT_MMX cpuname INIT_MMX cpuname
%if ARCH_X86_64 %if ARCH_X86_64
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint8_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_h_luma_intra(uint8_t *pix, int stride, int alpha, int beta)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_8, 4,9,0,0x80 cglobal deblock_h_luma_intra_8, 4,9,0,0x80
movsxd r7, r1d movsxd r7, r1d
...@@ -804,7 +808,8 @@ INIT_MMX mmxext ...@@ -804,7 +808,8 @@ INIT_MMX mmxext
%define t6 r6 %define t6 r6
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_deblock_v_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_v_chroma(uint8_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_8, 5,6 cglobal deblock_v_chroma_8, 5,6
CHROMA_V_START CHROMA_V_START
...@@ -818,7 +823,8 @@ cglobal deblock_v_chroma_8, 5,6 ...@@ -818,7 +823,8 @@ cglobal deblock_v_chroma_8, 5,6
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void ff_deblock_h_chroma( uint8_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_h_chroma(uint8_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_h_chroma_8, 5,7 cglobal deblock_h_chroma_8, 5,7
%if UNIX64 %if UNIX64
...@@ -874,9 +880,9 @@ ff_chroma_inter_body_mmxext: ...@@ -874,9 +880,9 @@ ff_chroma_inter_body_mmxext:
%define t5 r4 %define t5 r4
%define t6 r5 %define t6 r5
;----------------------------------------------------------------------------- ;------------------------------------------------------------------------------
; void ff_deblock_v_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
;----------------------------------------------------------------------------- ;------------------------------------------------------------------------------
cglobal deblock_v_chroma_intra_8, 4,5 cglobal deblock_v_chroma_intra_8, 4,5
CHROMA_V_START CHROMA_V_START
movq m0, [t5] movq m0, [t5]
...@@ -888,9 +894,9 @@ cglobal deblock_v_chroma_intra_8, 4,5 ...@@ -888,9 +894,9 @@ cglobal deblock_v_chroma_intra_8, 4,5
movq [r0], m2 movq [r0], m2
RET RET
;----------------------------------------------------------------------------- ;------------------------------------------------------------------------------
; void ff_deblock_h_chroma_intra( uint8_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_h_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
;----------------------------------------------------------------------------- ;------------------------------------------------------------------------------
cglobal deblock_h_chroma_intra_8, 4,6 cglobal deblock_h_chroma_intra_8, 4,6
CHROMA_H_START CHROMA_H_START
TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6) TRANSPOSE4x8_LOAD bw, wd, dq, PASS8ROWS(t5, r0, r1, t6)
...@@ -914,10 +920,10 @@ ff_chroma_intra_body_mmxext: ...@@ -914,10 +920,10 @@ ff_chroma_intra_body_mmxext:
ret ret
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_loop_filter_strength(int16_t bs[2][4][4], uint8_t nnz[40], ; void ff_h264_loop_filter_strength(int16_t bs[2][4][4], uint8_t nnz[40],
; int8_t ref[2][40], int16_t mv[2][40][2], ; int8_t ref[2][40], int16_t mv[2][40][2],
; int bidir, int edges, int step, ; int bidir, int edges, int step,
; int mask_mv0, int mask_mv1, int field); ; int mask_mv0, int mask_mv1, int field);
; ;
; bidir is 0 or 1 ; bidir is 0 or 1
; edges is 1 or 4 ; edges is 1 or 4
......
...@@ -152,7 +152,8 @@ cextern pw_4 ...@@ -152,7 +152,8 @@ cextern pw_4
%macro DEBLOCK_LUMA 0 %macro DEBLOCK_LUMA 0
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_v_luma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_v_luma_10(uint16_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_v_luma_10, 5,5,8*(mmsize/16) cglobal deblock_v_luma_10, 5,5,8*(mmsize/16)
%assign pad 5*mmsize+12-(stack_offset&15) %assign pad 5*mmsize+12-(stack_offset&15)
...@@ -600,7 +601,8 @@ DEBLOCK_LUMA_64 ...@@ -600,7 +601,8 @@ DEBLOCK_LUMA_64
%if ARCH_X86_64 %if ARCH_X86_64
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro DEBLOCK_LUMA_INTRA_64 0 %macro DEBLOCK_LUMA_INTRA_64 0
cglobal deblock_v_luma_intra_10, 4,7,16 cglobal deblock_v_luma_intra_10, 4,7,16
...@@ -651,7 +653,8 @@ cglobal deblock_v_luma_intra_10, 4,7,16 ...@@ -651,7 +653,8 @@ cglobal deblock_v_luma_intra_10, 4,7,16
REP_RET REP_RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_10, 4,7,16 cglobal deblock_h_luma_intra_10, 4,7,16
%define t0 m15 %define t0 m15
...@@ -723,7 +726,8 @@ DEBLOCK_LUMA_INTRA_64 ...@@ -723,7 +726,8 @@ DEBLOCK_LUMA_INTRA_64
%macro DEBLOCK_LUMA_INTRA 0 %macro DEBLOCK_LUMA_INTRA 0
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_v_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_v_luma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
LUMA_INTRA_INIT 3 LUMA_INTRA_INIT 3
...@@ -751,7 +755,8 @@ cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16) ...@@ -751,7 +755,8 @@ cglobal deblock_v_luma_intra_10, 4,7,8*(mmsize/16)
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_h_luma_intra( uint16_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_h_luma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16) cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
LUMA_INTRA_INIT 8 LUMA_INTRA_INIT 8
...@@ -849,7 +854,8 @@ DEBLOCK_LUMA_INTRA ...@@ -849,7 +854,8 @@ DEBLOCK_LUMA_INTRA
%macro DEBLOCK_CHROMA 0 %macro DEBLOCK_CHROMA 0
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 ) ; void ff_deblock_v_chroma_10(uint16_t *pix, int stride, int alpha, int beta,
; int8_t *tc0)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
mov r5, r0 mov r5, r0
...@@ -883,7 +889,8 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16) ...@@ -883,7 +889,8 @@ cglobal deblock_v_chroma_10, 5,7-(mmsize/16),8*(mmsize/16)
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta ) ; void ff_deblock_v_chroma_intra_10(uint16_t *pix, int stride, int alpha,
; int beta)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16) cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
mov r4, r0 mov r4, r0
......
...@@ -80,7 +80,7 @@ SECTION .text ...@@ -80,7 +80,7 @@ SECTION .text
%endmacro %endmacro
INIT_MMX mmx INIT_MMX mmx
; ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride) ; void ff_h264_idct_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct_add_8, 3, 3, 0 cglobal h264_idct_add_8, 3, 3, 0
IDCT4_ADD r0, r1, r2 IDCT4_ADD r0, r1, r2
RET RET
...@@ -202,7 +202,7 @@ cglobal h264_idct_add_8, 3, 3, 0 ...@@ -202,7 +202,7 @@ cglobal h264_idct_add_8, 3, 3, 0
%endmacro %endmacro
INIT_MMX mmx INIT_MMX mmx
; ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride) ; void ff_h264_idct8_add_8_mmx(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_add_8, 3, 4, 0 cglobal h264_idct8_add_8, 3, 4, 0
%assign pad 128+4-(stack_offset&7) %assign pad 128+4-(stack_offset&7)
SUB rsp, pad SUB rsp, pad
...@@ -270,7 +270,7 @@ cglobal h264_idct8_add_8, 3, 4, 0 ...@@ -270,7 +270,7 @@ cglobal h264_idct8_add_8, 3, 4, 0
%endmacro %endmacro
INIT_XMM sse2 INIT_XMM sse2
; ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride) ; void ff_h264_idct8_add_8_sse2(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_add_8, 3, 4, 10 cglobal h264_idct8_add_8, 3, 4, 10
IDCT8_ADD_SSE r0, r1, r2, r3 IDCT8_ADD_SSE r0, r1, r2, r3
RET RET
...@@ -307,7 +307,7 @@ cglobal h264_idct8_add_8, 3, 4, 10 ...@@ -307,7 +307,7 @@ cglobal h264_idct8_add_8, 3, 4, 10
%endmacro %endmacro
INIT_MMX mmxext INIT_MMX mmxext
; ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) ; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
%if ARCH_X86_64 %if ARCH_X86_64
cglobal h264_idct_dc_add_8, 3, 4, 0 cglobal h264_idct_dc_add_8, 3, 4, 0
movsx r3, word [r1] movsx r3, word [r1]
...@@ -316,7 +316,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0 ...@@ -316,7 +316,7 @@ cglobal h264_idct_dc_add_8, 3, 4, 0
DC_ADD_MMXEXT_OP movh, r0, r2, r3 DC_ADD_MMXEXT_OP movh, r0, r2, r3
RET RET
; ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) ; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_8, 3, 4, 0 cglobal h264_idct8_dc_add_8, 3, 4, 0
movsx r3, word [r1] movsx r3, word [r1]
mov dword [r1], 0 mov dword [r1], 0
...@@ -326,7 +326,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0 ...@@ -326,7 +326,7 @@ cglobal h264_idct8_dc_add_8, 3, 4, 0
DC_ADD_MMXEXT_OP mova, r0, r2, r3 DC_ADD_MMXEXT_OP mova, r0, r2, r3
RET RET
%else %else
; ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) ; void ff_h264_idct_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct_dc_add_8, 2, 3, 0 cglobal h264_idct_dc_add_8, 2, 3, 0
movsx r2, word [r1] movsx r2, word [r1]
mov dword [r1], 0 mov dword [r1], 0
...@@ -335,7 +335,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0 ...@@ -335,7 +335,7 @@ cglobal h264_idct_dc_add_8, 2, 3, 0
DC_ADD_MMXEXT_OP movh, r0, r1, r2 DC_ADD_MMXEXT_OP movh, r0, r1, r2
RET RET
; ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride) ; void ff_h264_idct8_dc_add_8_mmxext(uint8_t *dst, int16_t *block, int stride)
cglobal h264_idct8_dc_add_8, 2, 3, 0 cglobal h264_idct8_dc_add_8, 2, 3, 0
movsx r2, word [r1] movsx r2, word [r1]
mov dword [r1], 0 mov dword [r1], 0
...@@ -348,9 +348,9 @@ cglobal h264_idct8_dc_add_8, 2, 3, 0 ...@@ -348,9 +348,9 @@ cglobal h264_idct8_dc_add_8, 2, 3, 0
%endif %endif
INIT_MMX mmx INIT_MMX mmx
; ff_h264_idct_add16_8_mmx(uint8_t *dst, const int *block_offset, ; void ff_h264_idct_add16_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
...@@ -371,9 +371,9 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, ...@@ -371,9 +371,9 @@ cglobal h264_idct_add16_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
jl .nextblock jl .nextblock
REP_RET REP_RET
; ff_h264_idct8_add4_8_mmx(uint8_t *dst, const int *block_offset, ; void ff_h264_idct8_add4_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
%assign pad 128+4-(stack_offset&7) %assign pad 128+4-(stack_offset&7)
SUB rsp, pad SUB rsp, pad
...@@ -405,9 +405,9 @@ cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, ...@@ -405,9 +405,9 @@ cglobal h264_idct8_add4_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride,
RET RET
INIT_MMX mmxext INIT_MMX mmxext
; ff_h264_idct_add16_8_mmxext(uint8_t *dst, const int *block_offset, ; void ff_h264_idct_add16_8_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
...@@ -452,9 +452,9 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride ...@@ -452,9 +452,9 @@ cglobal h264_idct_add16_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
REP_RET REP_RET
INIT_MMX mmx INIT_MMX mmx
; ff_h264_idct_add16intra_8_mmx(uint8_t *dst, const int *block_offset, ; void ff_h264_idct_add16intra_8_mmx(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, stride, nnzc, cntr, coeff, picreg
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
...@@ -477,9 +477,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, st ...@@ -477,9 +477,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + npicregs, 0, dst, block_offset, block, st
REP_RET REP_RET
INIT_MMX mmxext INIT_MMX mmxext
; ff_h264_idct_add16intra_8_mmxext(uint8_t *dst, const int *block_offset, ; void ff_h264_idct_add16intra_8_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
...@@ -521,9 +521,9 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s ...@@ -521,9 +521,9 @@ cglobal h264_idct_add16intra_8, 5, 8 + npicregs, 0, dst1, block_offset, block, s
jl .nextblock jl .nextblock
REP_RET REP_RET
; ff_h264_idct8_add4_8_mmxext(uint8_t *dst, const int *block_offset, ; void ff_h264_idct8_add4_8_mmxext(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
%assign pad 128+4-(stack_offset&7) %assign pad 128+4-(stack_offset&7)
SUB rsp, pad SUB rsp, pad
...@@ -583,9 +583,9 @@ cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride ...@@ -583,9 +583,9 @@ cglobal h264_idct8_add4_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride
RET RET
INIT_XMM sse2 INIT_XMM sse2
; ff_h264_idct8_add4_8_sse2(uint8_t *dst, const int *block_offset, ; void ff_h264_idct8_add4_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct8_add4_8, 5, 8 + npicregs, 10, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg cglobal h264_idct8_add4_8, 5, 8 + npicregs, 10, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
xor r5, r5 xor r5, r5
%ifdef PIC %ifdef PIC
...@@ -660,8 +660,9 @@ h264_idct_add8_mmx_plane: ...@@ -660,8 +660,9 @@ h264_idct_add8_mmx_plane:
jnz .nextblock jnz .nextblock
rep ret rep ret
; ff_h264_idct_add8_8_mmx(uint8_t **dest, const int *block_offset, ; void ff_h264_idct_add8_8_mmx(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride, const uint8_t nnzc[6 * 8]) ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
mov r5, 16 mov r5, 16
add r2, 512 add r2, 512
...@@ -725,9 +726,9 @@ h264_idct_add8_mmxext_plane: ...@@ -725,9 +726,9 @@ h264_idct_add8_mmxext_plane:
rep ret rep ret
INIT_MMX mmxext INIT_MMX mmxext
; ff_h264_idct_add8_8_mmxext(uint8_t **dest, const int *block_offset, ; void ff_h264_idct_add8_8_mmxext(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
mov r5, 16 mov r5, 16
add r2, 512 add r2, 512
...@@ -809,9 +810,9 @@ h264_add8x4_idct_sse2: ...@@ -809,9 +810,9 @@ h264_add8x4_idct_sse2:
%endif %endif
%endmacro %endmacro
; ff_h264_idct_add16_8_sse2(uint8_t *dst, const int *block_offset, ; void ff_h264_idct_add16_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8 cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
%if ARCH_X86_64 %if ARCH_X86_64
mov r5, r0 mov r5, r0
...@@ -857,9 +858,9 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8 ...@@ -857,9 +858,9 @@ cglobal h264_idct_add16_8, 5, 5 + ARCH_X86_64, 8
%endif %endif
%endmacro %endmacro
; ff_h264_idct_add16intra_8_sse2(uint8_t *dst, const int *block_offset, ; void ff_h264_idct_add16intra_8_sse2(uint8_t *dst, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
%if ARCH_X86_64 %if ARCH_X86_64
mov r7, r0 mov r7, r0
...@@ -909,9 +910,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8 ...@@ -909,9 +910,9 @@ cglobal h264_idct_add16intra_8, 5, 7 + ARCH_X86_64, 8
%endif %endif
%endmacro %endmacro
; ff_h264_idct_add8_8_sse2(uint8_t **dest, const int *block_offset, ; void ff_h264_idct_add8_8_sse2(uint8_t **dest, const int *block_offset,
; int16_t *block, int stride, ; int16_t *block, int stride,
; const uint8_t nnzc[6 * 8]) ; const uint8_t nnzc[6 * 8])
cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8 cglobal h264_idct_add8_8, 5, 7 + ARCH_X86_64, 8
add r2, 512 add r2, 512
%if ARCH_X86_64 %if ARCH_X86_64
......
...@@ -32,7 +32,7 @@ pd_32: times 4 dd 32 ...@@ -32,7 +32,7 @@ pd_32: times 4 dd 32
SECTION .text SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_idct_add(pixel *dst, int16_t *block, int stride) ; void ff_h264_idct_add_10(pixel *dst, int16_t *block, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro STORE_DIFFx2 6 %macro STORE_DIFFx2 6
psrad %1, 6 psrad %1, 6
...@@ -89,8 +89,9 @@ IDCT_ADD_10 ...@@ -89,8 +89,9 @@ IDCT_ADD_10
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; h264_idct_add16(pixel *dst, const int *block_offset, int16_t *block, ; void ff_h264_idct_add16_10(pixel *dst, const int *block_offset,
; int stride, const uint8_t nnzc[6*8]) ; int16_t *block, int stride,
; const uint8_t nnzc[6*8])
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
;;;;;;; NO FATE SAMPLES TRIGGER THIS ;;;;;;; NO FATE SAMPLES TRIGGER THIS
%macro ADD4x4IDCT 0 %macro ADD4x4IDCT 0
...@@ -164,7 +165,7 @@ IDCT_ADD16_10 ...@@ -164,7 +165,7 @@ IDCT_ADD16_10
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_idct_dc_add(pixel *dst, int16_t *block, int stride) ; void ff_h264_idct_dc_add_10(pixel *dst, int16_t *block, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro IDCT_DC_ADD_OP_10 3 %macro IDCT_DC_ADD_OP_10 3
pxor m5, m5 pxor m5, m5
...@@ -206,7 +207,7 @@ cglobal h264_idct_dc_add_10,3,3 ...@@ -206,7 +207,7 @@ cglobal h264_idct_dc_add_10,3,3
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_idct8_dc_add(pixel *dst, int16_t *block, int stride) ; void ff_h264_idct8_dc_add_10(pixel *dst, int16_t *block, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro IDCT8_DC_ADD 0 %macro IDCT8_DC_ADD 0
cglobal h264_idct8_dc_add_10,3,4,7 cglobal h264_idct8_dc_add_10,3,4,7
...@@ -231,8 +232,9 @@ IDCT8_DC_ADD ...@@ -231,8 +232,9 @@ IDCT8_DC_ADD
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; h264_idct_add16intra(pixel *dst, const int *block_offset, int16_t *block, ; void ff_h264_idct_add16intra_10(pixel *dst, const int *block_offset,
; int stride, const uint8_t nnzc[6*8]) ; int16_t *block, int stride,
; const uint8_t nnzc[6*8])
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro AC 1 %macro AC 1
.ac%1: .ac%1:
...@@ -306,8 +308,9 @@ IDCT_ADD16INTRA_10 ...@@ -306,8 +308,9 @@ IDCT_ADD16INTRA_10
%assign last_block 36 %assign last_block 36
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; h264_idct_add8(pixel **dst, const int *block_offset, int16_t *block, ; void ff_h264_idct_add8_10(pixel **dst, const int *block_offset,
; int stride, const uint8_t nnzc[6*8]) ; int16_t *block, int stride,
; const uint8_t nnzc[6*8])
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro IDCT_ADD8 0 %macro IDCT_ADD8 0
cglobal h264_idct_add8_10,5,8,7 cglobal h264_idct_add8_10,5,8,7
...@@ -343,7 +346,7 @@ IDCT_ADD8 ...@@ -343,7 +346,7 @@ IDCT_ADD8
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_idct8_add(pixel *dst, int16_t *block, int stride) ; void ff_h264_idct8_add_10(pixel *dst, int16_t *block, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro IDCT8_1D 2 %macro IDCT8_1D 2
SWAP 0, 1 SWAP 0, 1
...@@ -552,8 +555,9 @@ IDCT8_ADD ...@@ -552,8 +555,9 @@ IDCT8_ADD
%endif %endif
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; h264_idct8_add4(pixel **dst, const int *block_offset, int16_t *block, ; void ff_h264_idct8_add4_10(pixel **dst, const int *block_offset,
; int stride, const uint8_t nnzc[6*8]) ; int16_t *block, int stride,
; const uint8_t nnzc[6*8])
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
;;;;;;; NO FATE SAMPLES TRIGGER THIS ;;;;;;; NO FATE SAMPLES TRIGGER THIS
%macro IDCT8_ADD4_OP 2 %macro IDCT8_ADD4_OP 2
......
This diff is collapsed.
This diff is collapsed.
...@@ -167,7 +167,7 @@ stub_%1_h264_qpel%3_%2_10 %+ SUFFIX: ...@@ -167,7 +167,7 @@ stub_%1_h264_qpel%3_%2_10 %+ SUFFIX:
%endmacro %endmacro
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc00(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc00(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro COPY4 0 %macro COPY4 0
movu m0, [r1 ] movu m0, [r1 ]
...@@ -221,7 +221,7 @@ MC00 put ...@@ -221,7 +221,7 @@ MC00 put
MC00 avg MC00 avg
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc20(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc20(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC_CACHE 1 %macro MC_CACHE 1
%define OP_MOV mova %define OP_MOV mova
...@@ -305,7 +305,7 @@ cglobal_mc %1, mc20, %2, 3,4,9 ...@@ -305,7 +305,7 @@ cglobal_mc %1, mc20, %2, 3,4,9
MC_CACHE MC20 MC_CACHE MC20
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc30(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc30(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC30 2 %macro MC30 2
cglobal_mc %1, mc30, %2, 3,5,9 cglobal_mc %1, mc30, %2, 3,5,9
...@@ -316,7 +316,7 @@ cglobal_mc %1, mc30, %2, 3,5,9 ...@@ -316,7 +316,7 @@ cglobal_mc %1, mc30, %2, 3,5,9
MC_CACHE MC30 MC_CACHE MC30
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc10(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc10(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC10 2 %macro MC10 2
cglobal_mc %1, mc10, %2, 3,5,9 cglobal_mc %1, mc10, %2, 3,5,9
...@@ -383,7 +383,7 @@ cglobal_mc %1, mc10, %2, 3,5,9 ...@@ -383,7 +383,7 @@ cglobal_mc %1, mc10, %2, 3,5,9
MC_CACHE MC10 MC_CACHE MC10
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc02(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc02(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro V_FILT 10 %macro V_FILT 10
v_filt%9_%10_10 v_filt%9_%10_10
...@@ -432,7 +432,7 @@ cglobal_mc %1, mc02, %2, 3,4,8 ...@@ -432,7 +432,7 @@ cglobal_mc %1, mc02, %2, 3,4,8
MC MC02 MC MC02
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc01(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc01(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC01 2 %macro MC01 2
cglobal_mc %1, mc01, %2, 3,5,8 cglobal_mc %1, mc01, %2, 3,5,8
...@@ -458,7 +458,7 @@ cglobal_mc %1, mc01, %2, 3,5,8 ...@@ -458,7 +458,7 @@ cglobal_mc %1, mc01, %2, 3,5,8
MC MC01 MC MC01
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc03(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc03(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC03 2 %macro MC03 2
cglobal_mc %1, mc03, %2, 3,5,8 cglobal_mc %1, mc03, %2, 3,5,8
...@@ -469,7 +469,7 @@ cglobal_mc %1, mc03, %2, 3,5,8 ...@@ -469,7 +469,7 @@ cglobal_mc %1, mc03, %2, 3,5,8
MC MC03 MC MC03
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc11(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc11(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro H_FILT_AVG 2-3 %macro H_FILT_AVG 2-3
h_filt%1_%2_10: h_filt%1_%2_10:
...@@ -551,7 +551,7 @@ cglobal_mc %1, mc11, %2, 3,6,8 ...@@ -551,7 +551,7 @@ cglobal_mc %1, mc11, %2, 3,6,8
MC MC11 MC MC11
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc31(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc31(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC31 2 %macro MC31 2
cglobal_mc %1, mc31, %2, 3,6,8 cglobal_mc %1, mc31, %2, 3,6,8
...@@ -563,7 +563,7 @@ cglobal_mc %1, mc31, %2, 3,6,8 ...@@ -563,7 +563,7 @@ cglobal_mc %1, mc31, %2, 3,6,8
MC MC31 MC MC31
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc13(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc13(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC13 2 %macro MC13 2
cglobal_mc %1, mc13, %2, 3,7,12 cglobal_mc %1, mc13, %2, 3,7,12
...@@ -574,7 +574,7 @@ cglobal_mc %1, mc13, %2, 3,7,12 ...@@ -574,7 +574,7 @@ cglobal_mc %1, mc13, %2, 3,7,12
MC MC13 MC MC13
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc33(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc33(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC33 2 %macro MC33 2
cglobal_mc %1, mc33, %2, 3,6,8 cglobal_mc %1, mc33, %2, 3,6,8
...@@ -586,7 +586,7 @@ cglobal_mc %1, mc33, %2, 3,6,8 ...@@ -586,7 +586,7 @@ cglobal_mc %1, mc33, %2, 3,6,8
MC MC33 MC MC33
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc22(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc22(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro FILT_H2 3 %macro FILT_H2 3
psubw %1, %2 ; a-b psubw %1, %2 ; a-b
...@@ -757,7 +757,7 @@ cglobal_mc %1, mc22, %2, 3,7,12 ...@@ -757,7 +757,7 @@ cglobal_mc %1, mc22, %2, 3,7,12
MC MC22 MC MC22
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc12(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc12(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC12 2 %macro MC12 2
cglobal_mc %1, mc12, %2, 3,7,12 cglobal_mc %1, mc12, %2, 3,7,12
...@@ -802,7 +802,7 @@ cglobal_mc %1, mc12, %2, 3,7,12 ...@@ -802,7 +802,7 @@ cglobal_mc %1, mc12, %2, 3,7,12
MC MC12 MC MC12
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc32(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc32(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC32 2 %macro MC32 2
cglobal_mc %1, mc32, %2, 3,7,12 cglobal_mc %1, mc32, %2, 3,7,12
...@@ -820,7 +820,7 @@ cglobal_mc %1, mc32, %2, 3,7,12 ...@@ -820,7 +820,7 @@ cglobal_mc %1, mc32, %2, 3,7,12
MC MC32 MC MC32
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc21(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc21(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro H_NRD 1 %macro H_NRD 1
put_h%1_10: put_h%1_10:
...@@ -873,7 +873,7 @@ cglobal_mc %1, mc21, %2, 3,7,12 ...@@ -873,7 +873,7 @@ cglobal_mc %1, mc21, %2, 3,7,12
MC MC21 MC MC21
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_qpel_mc23(uint8_t *dst, uint8_t *src, int stride) ; void ff_h264_qpel_mc23(uint8_t *dst, uint8_t *src, int stride)
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MC23 2 %macro MC23 2
cglobal_mc %1, mc23, %2, 3,7,12 cglobal_mc %1, mc23, %2, 3,7,12
......
...@@ -28,12 +28,12 @@ SECTION .text ...@@ -28,12 +28,12 @@ SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; biweight pred: ; biweight pred:
; ;
; void h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride, ; void ff_h264_biweight_16_sse2(uint8_t *dst, uint8_t *src, int stride,
; int height, int log2_denom, int weightd, ; int height, int log2_denom, int weightd,
; int weights, int offset); ; int weights, int offset);
; and ; and
; void h264_weight_16_sse2(uint8_t *dst, int stride, int height, ; void ff_h264_weight_16_sse2(uint8_t *dst, int stride, int height,
; int log2_denom, int weight, int offset); ; int log2_denom, int weight, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro WEIGHT_SETUP 0 %macro WEIGHT_SETUP 0
......
...@@ -35,8 +35,8 @@ cextern pw_1 ...@@ -35,8 +35,8 @@ cextern pw_1
SECTION .text SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_weight(uint8_t *dst, int stride, int height, int log2_denom, ; void ff_h264_weight_16_10(uint8_t *dst, int stride, int height,
; int weight, int offset); ; int log2_denom, int weight, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro WEIGHT_PROLOGUE 0 %macro WEIGHT_PROLOGUE 0
.prologue: .prologue:
...@@ -151,8 +151,9 @@ WEIGHT_FUNC_HALF_MM ...@@ -151,8 +151,9 @@ WEIGHT_FUNC_HALF_MM
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void h264_biweight(uint8_t *dst, uint8_t *src, int stride, int height, ; void ff_h264_biweight_16_10(uint8_t *dst, uint8_t *src, int stride,
; int log2_denom, int weightd, int weights, int offset); ; int height, int log2_denom, int weightd,
; int weights, int offset);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%if ARCH_X86_32 %if ARCH_X86_32
DECLARE_REG_TMP 3 DECLARE_REG_TMP 3
......
...@@ -32,7 +32,7 @@ cextern pb_1 ...@@ -32,7 +32,7 @@ cextern pb_1
SECTION_TEXT SECTION_TEXT
; put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_X2 0 %macro PUT_PIXELS8_X2 0
cglobal put_pixels8_x2, 4,5 cglobal put_pixels8_x2, 4,5
lea r4, [r2*2] lea r4, [r2*2]
...@@ -64,7 +64,7 @@ INIT_MMX 3dnow ...@@ -64,7 +64,7 @@ INIT_MMX 3dnow
PUT_PIXELS8_X2 PUT_PIXELS8_X2
; put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_pixels16_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS_16 0 %macro PUT_PIXELS_16 0
cglobal put_pixels16_x2, 4,5 cglobal put_pixels16_x2, 4,5
lea r4, [r2*2] lea r4, [r2*2]
...@@ -108,7 +108,7 @@ INIT_MMX 3dnow ...@@ -108,7 +108,7 @@ INIT_MMX 3dnow
PUT_PIXELS_16 PUT_PIXELS_16
; put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_no_rnd_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_NO_RND_PIXELS8_X2 0 %macro PUT_NO_RND_PIXELS8_X2 0
cglobal put_no_rnd_pixels8_x2, 4,5 cglobal put_no_rnd_pixels8_x2, 4,5
mova m6, [pb_1] mova m6, [pb_1]
...@@ -149,7 +149,7 @@ INIT_MMX 3dnow ...@@ -149,7 +149,7 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_X2 PUT_NO_RND_PIXELS8_X2
; put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_no_rnd_pixels8_x2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_NO_RND_PIXELS8_X2_EXACT 0 %macro PUT_NO_RND_PIXELS8_X2_EXACT 0
cglobal put_no_rnd_pixels8_x2_exact, 4,5 cglobal put_no_rnd_pixels8_x2_exact, 4,5
lea r4, [r2*3] lea r4, [r2*3]
...@@ -196,7 +196,7 @@ INIT_MMX 3dnow ...@@ -196,7 +196,7 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_X2_EXACT PUT_NO_RND_PIXELS8_X2_EXACT
; put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_PIXELS8_Y2 0 %macro PUT_PIXELS8_Y2 0
cglobal put_pixels8_y2, 4,5 cglobal put_pixels8_y2, 4,5
lea r4, [r2*2] lea r4, [r2*2]
...@@ -230,7 +230,7 @@ INIT_MMX 3dnow ...@@ -230,7 +230,7 @@ INIT_MMX 3dnow
PUT_PIXELS8_Y2 PUT_PIXELS8_Y2
; put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_no_rnd_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_NO_RND_PIXELS8_Y2 0 %macro PUT_NO_RND_PIXELS8_Y2 0
cglobal put_no_rnd_pixels8_y2, 4,5 cglobal put_no_rnd_pixels8_y2, 4,5
mova m6, [pb_1] mova m6, [pb_1]
...@@ -267,7 +267,7 @@ INIT_MMX 3dnow ...@@ -267,7 +267,7 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_Y2 PUT_NO_RND_PIXELS8_Y2
; put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_put_no_rnd_pixels8_y2_exact(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro PUT_NO_RND_PIXELS8_Y2_EXACT 0 %macro PUT_NO_RND_PIXELS8_Y2_EXACT 0
cglobal put_no_rnd_pixels8_y2_exact, 4,5 cglobal put_no_rnd_pixels8_y2_exact, 4,5
lea r4, [r2*3] lea r4, [r2*3]
...@@ -309,7 +309,7 @@ INIT_MMX 3dnow ...@@ -309,7 +309,7 @@ INIT_MMX 3dnow
PUT_NO_RND_PIXELS8_Y2_EXACT PUT_NO_RND_PIXELS8_Y2_EXACT
; avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_avg_pixels8(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8 0 %macro AVG_PIXELS8 0
cglobal avg_pixels8, 4,5 cglobal avg_pixels8, 4,5
lea r4, [r2*2] lea r4, [r2*2]
...@@ -339,7 +339,7 @@ INIT_MMX 3dnow ...@@ -339,7 +339,7 @@ INIT_MMX 3dnow
AVG_PIXELS8 AVG_PIXELS8
; avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_avg_pixels8_x2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8_X2 0 %macro AVG_PIXELS8_X2 0
cglobal avg_pixels8_x2, 4,5 cglobal avg_pixels8_x2, 4,5
lea r4, [r2*2] lea r4, [r2*2]
...@@ -375,7 +375,7 @@ INIT_MMX 3dnow ...@@ -375,7 +375,7 @@ INIT_MMX 3dnow
AVG_PIXELS8_X2 AVG_PIXELS8_X2
; avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_avg_pixels8_y2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8_Y2 0 %macro AVG_PIXELS8_Y2 0
cglobal avg_pixels8_y2, 4,5 cglobal avg_pixels8_y2, 4,5
lea r4, [r2*2] lea r4, [r2*2]
...@@ -417,7 +417,7 @@ INIT_MMX 3dnow ...@@ -417,7 +417,7 @@ INIT_MMX 3dnow
AVG_PIXELS8_Y2 AVG_PIXELS8_Y2
; avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) ; void ff_avg_pixels8_xy2(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
%macro AVG_PIXELS8_XY2 0 %macro AVG_PIXELS8_XY2 0
cglobal avg_pixels8_xy2, 4,5 cglobal avg_pixels8_xy2, 4,5
mova m6, [pb_1] mova m6, [pb_1]
......
...@@ -33,7 +33,7 @@ cextern pw_20 ...@@ -33,7 +33,7 @@ cextern pw_20
SECTION_TEXT SECTION_TEXT
; put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) ; void ff_put_no_rnd_pixels8_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
%macro PUT_NO_RND_PIXELS8_L2 0 %macro PUT_NO_RND_PIXELS8_L2 0
cglobal put_no_rnd_pixels8_l2, 6,6 cglobal put_no_rnd_pixels8_l2, 6,6
movsxdifnidn r4, r4d movsxdifnidn r4, r4d
...@@ -99,7 +99,7 @@ INIT_MMX mmxext ...@@ -99,7 +99,7 @@ INIT_MMX mmxext
PUT_NO_RND_PIXELS8_L2 PUT_NO_RND_PIXELS8_L2
; put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) ; void ff_put_no_rnd_pixels16_l2(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
%macro PUT_NO_RND_PIXELS16_l2 0 %macro PUT_NO_RND_PIXELS16_l2 0
cglobal put_no_rnd_pixels16_l2, 6,6 cglobal put_no_rnd_pixels16_l2, 6,6
movsxdifnidn r3, r3d movsxdifnidn r3, r3d
......
...@@ -231,8 +231,8 @@ section .text align=16 ...@@ -231,8 +231,8 @@ section .text align=16
SUMSUB_SHPK m2, m3, m4, m5, m6, m7, %2 SUMSUB_SHPK m2, m3, m4, m5, m6, m7, %2
%endmacro %endmacro
; void prores_idct_put_10_<opt>(uint8_t *pixels, int stride, ; void ff_prores_idct_put_10_<opt>(uint8_t *pixels, int stride,
; int16_t *block, const int16_t *qmat); ; int16_t *block, const int16_t *qmat);
%macro idct_put_fn 1 %macro idct_put_fn 1
cglobal prores_idct_put_10, 4, 4, %1 cglobal prores_idct_put_10, 4, 4, %1
movsxd r1, r1d movsxd r1, r1d
......
...@@ -44,7 +44,8 @@ SECTION .text ...@@ -44,7 +44,8 @@ SECTION .text
mova %2, %1 mova %2, %1
%endmacro %endmacro
; void pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) ; void ff_put/avg_pixels4_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
; int dstStride, int src1Stride, int h)
%macro PIXELS4_L2 1 %macro PIXELS4_L2 1
%define OP op_%1h %define OP op_%1h
cglobal %1_pixels4_l2, 6,6 cglobal %1_pixels4_l2, 6,6
...@@ -87,7 +88,8 @@ INIT_MMX mmxext ...@@ -87,7 +88,8 @@ INIT_MMX mmxext
PIXELS4_L2 put PIXELS4_L2 put
PIXELS4_L2 avg PIXELS4_L2 avg
; void pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) ; void ff_put/avg_pixels8_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
; int dstStride, int src1Stride, int h)
%macro PIXELS8_L2 1 %macro PIXELS8_L2 1
%define OP op_%1 %define OP op_%1
cglobal %1_pixels8_l2, 6,6 cglobal %1_pixels8_l2, 6,6
...@@ -130,7 +132,8 @@ INIT_MMX mmxext ...@@ -130,7 +132,8 @@ INIT_MMX mmxext
PIXELS8_L2 put PIXELS8_L2 put
PIXELS8_L2 avg PIXELS8_L2 avg
; void pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h) ; void ff_put/avg_pixels16_l2_mmxext(uint8_t *dst, uint8_t *src1, uint8_t *src2,
; int dstStride, int src1Stride, int h)
%macro PIXELS16_L2 1 %macro PIXELS16_L2 1
%define OP op_%1 %define OP op_%1
cglobal %1_pixels16_l2, 6,6 cglobal %1_pixels16_l2, 6,6
......
...@@ -77,9 +77,9 @@ SECTION .text ...@@ -77,9 +77,9 @@ SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; subpel MC functions: ; subpel MC functions:
; ;
; void [put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride, ; void ff_[put|rv40]_rv40_qpel_[h|v]_<opt>(uint8_t *dst, int deststride,
; uint8_t *src, int srcstride, ; uint8_t *src, int srcstride,
; int len, int m); ; int len, int m);
;---------------------------------------------------------------------- ;----------------------------------------------------------------------
%macro LOAD 2 %macro LOAD 2
%if WIN64 %if WIN64
...@@ -438,7 +438,7 @@ FILTER_SSSE3 avg ...@@ -438,7 +438,7 @@ FILTER_SSSE3 avg
%endmacro %endmacro
; rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride) ; void ff_rv40_weight_func_%1(uint8_t *dst, uint8_t *src1, uint8_t *src2, int w1, int w2, int stride)
; %1=size %2=num of xmm regs ; %1=size %2=num of xmm regs
; The weights are FP0.14 notation of fractions depending on pts. ; The weights are FP0.14 notation of fractions depending on pts.
; For timebases without rounding error (i.e. PAL), the fractions ; For timebases without rounding error (i.e. PAL), the fractions
......
...@@ -124,9 +124,9 @@ cglobal sbr_hf_g_filt, 5, 6, 5 ...@@ -124,9 +124,9 @@ cglobal sbr_hf_g_filt, 5, 6, 5
.end: .end:
RET RET
; static void sbr_hf_gen_c(float (*X_high)[2], const float (*X_low)[2], ; void ff_sbr_hf_gen_sse(float (*X_high)[2], const float (*X_low)[2],
; const float alpha0[2], const float alpha1[2], ; const float alpha0[2], const float alpha1[2],
; float bw, int start, int end) ; float bw, int start, int end)
; ;
cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E cglobal sbr_hf_gen, 4,4,8, X_high, X_low, alpha0, alpha1, BW, S, E
; load alpha factors ; load alpha factors
...@@ -249,7 +249,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z ...@@ -249,7 +249,7 @@ cglobal sbr_neg_odd_64, 1,2,4,z
jne .loop jne .loop
REP_RET REP_RET
; sbr_qmf_deint_bfly(float *v, const float *src0, const float *src1) ; void ff_sbr_qmf_deint_bfly_sse2(float *v, const float *src0, const float *src1)
%macro SBR_QMF_DEINT_BFLY 0 %macro SBR_QMF_DEINT_BFLY 0
cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c cglobal sbr_qmf_deint_bfly, 3,5,8, v,src0,src1,vrev,c
mov cq, 64*4-2*mmsize mov cq, 64*4-2*mmsize
......
...@@ -153,13 +153,13 @@ cextern pw_64 ...@@ -153,13 +153,13 @@ cextern pw_64
SECTION .text SECTION .text
;----------------------------------------------------------------------------- ;-------------------------------------------------------------------------------
; subpel MC functions: ; subpel MC functions:
; ;
; void put_vp8_epel<size>_h<htap>v<vtap>_<opt>(uint8_t *dst, int deststride, ; void ff_put_vp8_epel<size>_h<htap>v<vtap>_<opt>(uint8_t *dst, int deststride,
; uint8_t *src, int srcstride, ; uint8_t *src, int srcstride,
; int height, int mx, int my); ; int height, int mx, int my);
;----------------------------------------------------------------------------- ;-------------------------------------------------------------------------------
%macro FILTER_SSSE3 1 %macro FILTER_SSSE3 1
cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, height, mx, picreg cglobal put_vp8_epel%1_h6, 6, 6 + npicregs, 8, dst, dststride, src, srcstride, height, mx, picreg
...@@ -884,7 +884,7 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height ...@@ -884,7 +884,7 @@ cglobal put_vp8_pixels16, 5, 5, 2, dst, dststride, src, srcstride, height
REP_RET REP_RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], int stride); ; void ff_vp8_idct_dc_add_<opt>(uint8_t *dst, int16_t block[16], int stride);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro ADD_DC 4 %macro ADD_DC 4
...@@ -962,7 +962,7 @@ cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride ...@@ -962,7 +962,7 @@ cglobal vp8_idct_dc_add, 3, 3, 6, dst, block, stride
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void vp8_idct_dc_add4y_<opt>(uint8_t *dst, int16_t block[4][16], int stride); ; void ff_vp8_idct_dc_add4y_<opt>(uint8_t *dst, int16_t block[4][16], int stride);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%if ARCH_X86_32 %if ARCH_X86_32
...@@ -1035,7 +1035,7 @@ cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride ...@@ -1035,7 +1035,7 @@ cglobal vp8_idct_dc_add4y, 3, 3, 6, dst, block, stride
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void vp8_idct_dc_add4uv_<opt>(uint8_t *dst, int16_t block[4][16], int stride); ; void ff_vp8_idct_dc_add4uv_<opt>(uint8_t *dst, int16_t block[4][16], int stride);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
INIT_MMX mmx INIT_MMX mmx
...@@ -1077,7 +1077,7 @@ cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride ...@@ -1077,7 +1077,7 @@ cglobal vp8_idct_dc_add4uv, 3, 3, 0, dst, block, stride
RET RET
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void vp8_idct_add_<opt>(uint8_t *dst, int16_t block[16], int stride); ; void ff_vp8_idct_add_<opt>(uint8_t *dst, int16_t block[16], int stride);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2) ; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
...@@ -1157,7 +1157,7 @@ INIT_MMX sse ...@@ -1157,7 +1157,7 @@ INIT_MMX sse
VP8_IDCT_ADD VP8_IDCT_ADD
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void vp8_luma_dc_wht_mmxext(int16_t block[4][4][16], int16_t dc[16]) ; void ff_vp8_luma_dc_wht(int16_t block[4][4][16], int16_t dc[16])
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro SCATTER_WHT 3 %macro SCATTER_WHT 3
......
...@@ -43,7 +43,7 @@ cextern pb_80 ...@@ -43,7 +43,7 @@ cextern pb_80
SECTION .text SECTION .text
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim); ; void ff_vp8_h/v_loop_filter_simple_<opt>(uint8_t *dst, int stride, int flim);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; macro called with 7 mm register indexes as argument, and 4 regular registers ; macro called with 7 mm register indexes as argument, and 4 regular registers
...@@ -429,8 +429,8 @@ INIT_XMM sse4 ...@@ -429,8 +429,8 @@ INIT_XMM sse4
SIMPLE_LOOPFILTER h, 5 SIMPLE_LOOPFILTER h, 5
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride, ; void ff_vp8_h/v_loop_filter<size>_inner_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
; int flimE, int flimI, int hev_thr); ; int flimE, int flimI, int hev_thr);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro INNER_LOOPFILTER 2 %macro INNER_LOOPFILTER 2
...@@ -921,8 +921,8 @@ INNER_LOOPFILTER v, 8 ...@@ -921,8 +921,8 @@ INNER_LOOPFILTER v, 8
INNER_LOOPFILTER h, 8 INNER_LOOPFILTER h, 8
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
; void vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] int stride, ; void ff_vp8_h/v_loop_filter<size>_mbedge_<opt>(uint8_t *dst, [uint8_t *v,] int stride,
; int flimE, int flimI, int hev_thr); ; int flimE, int flimI, int hev_thr);
;----------------------------------------------------------------------------- ;-----------------------------------------------------------------------------
%macro MBEDGE_LOOPFILTER 2 %macro MBEDGE_LOOPFILTER 2
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment