Commit 884078d2 authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: huffyuvdsp: add SSE2 median prediction

From 5010c to 4566 on lagarith YUY2.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent b3dfebd6
...@@ -33,64 +33,86 @@ SECTION_TEXT ...@@ -33,64 +33,86 @@ SECTION_TEXT
; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, ; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w, ; const uint8_t *diff, int w,
; int *left, int *left_top) ; int *left, int *left_top)
INIT_MMX mmxext %macro LSHIFT 2
cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top %if mmsize > 8
movq mm0, [topq] pslldq %1, %2
movq mm2, mm0 %else
movd mm4, [left_topq] psllq %1, 8*(%2)
psllq mm2, 8 %endif
movq mm1, mm0 %endmacro
por mm4, mm2
movd mm3, [leftq] %macro RSHIFT 2
psubb mm0, mm4 ; t-tl %if mmsize > 8
psrldq %1, %2
%else
psrlq %1, 8*(%2)
%endif
%endmacro
%macro HFYU_MEDIAN 0
cglobal add_hfyu_median_pred, 6,6,8, dst, top, diff, w, left, left_top
movu m0, [topq]
mova m2, m0
movd m4, [left_topq]
LSHIFT m2, 1
mova m1, m0
por m4, m2
movd m3, [leftq]
psubb m0, m4 ; t-tl
add dstq, wq add dstq, wq
add topq, wq add topq, wq
add diffq, wq add diffq, wq
neg wq neg wq
jmp .skip jmp .skip
.loop: .loop:
movq mm4, [topq+wq] movu m4, [topq+wq]
movq mm0, mm4 mova m0, m4
psllq mm4, 8 LSHIFT m4, 1
por mm4, mm1 por m4, m1
movq mm1, mm0 ; t mova m1, m0 ; t
psubb mm0, mm4 ; t-tl psubb m0, m4 ; t-tl
.skip: .skip:
movq mm2, [diffq+wq] movu m2, [diffq+wq]
%assign i 0 %assign i 0
%rep 8 %rep mmsize
movq mm4, mm0 mova m4, m0
paddb mm4, mm3 ; t-tl+l paddb m4, m3 ; t-tl+l
movq mm5, mm3 mova m5, m3
pmaxub mm3, mm1 pmaxub m3, m1
pminub mm5, mm1 pminub m5, m1
pminub mm3, mm4 pminub m3, m4
pmaxub mm3, mm5 ; median pmaxub m3, m5 ; median
paddb mm3, mm2 ; +residual paddb m3, m2 ; +residual
%if i==0 %if i==0
movq mm7, mm3 mova m7, m3
psllq mm7, 56 LSHIFT m7, mmsize-1
%else %else
movq mm6, mm3 mova m6, m3
psrlq mm7, 8 RSHIFT m7, 1
psllq mm6, 56 LSHIFT m6, mmsize-1
por mm7, mm6 por m7, m6
%endif %endif
%if i<7 %if i<mmsize-1
psrlq mm0, 8 RSHIFT m0, 1
psrlq mm1, 8 RSHIFT m1, 1
psrlq mm2, 8 RSHIFT m2, 1
%endif %endif
%assign i i+1 %assign i i+1
%endrep %endrep
movq [dstq+wq], mm7 movu [dstq+wq], m7
add wq, 8 add wq, mmsize
jl .loop jl .loop
movzx r2d, byte [dstq-1] movzx r2d, byte [dstq-1]
mov [leftq], r2d mov [leftq], r2d
movzx r2d, byte [topq-1] movzx r2d, byte [topq-1]
mov [left_topq], r2d mov [left_topq], r2d
RET RET
%endmacro
INIT_MMX mmxext
HFYU_MEDIAN
INIT_XMM sse2
HFYU_MEDIAN
%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned %macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
......
...@@ -32,6 +32,9 @@ void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top, ...@@ -32,6 +32,9 @@ void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top, void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w, const uint8_t *diff, int w,
int *left, int *left_top); int *left, int *left_top);
void ff_add_hfyu_median_pred_sse2(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,
int *left, int *left_top);
int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src, int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
int w, int left); int w, int left);
...@@ -58,6 +61,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c) ...@@ -58,6 +61,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
c->add_bytes = ff_add_bytes_sse2; c->add_bytes = ff_add_bytes_sse2;
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
} }
if (EXTERNAL_SSSE3(cpu_flags)) { if (EXTERNAL_SSSE3(cpu_flags)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment