Commit 884078d2 authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: huffyuvdsp: add SSE2 median prediction

From 5010c to 4566 on lagarith YUY2.
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent b3dfebd6
......@@ -33,64 +33,86 @@ SECTION_TEXT
; void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w,
; int *left, int *left_top)
INIT_MMX mmxext
cglobal add_hfyu_median_pred, 6,6,0, dst, top, diff, w, left, left_top
movq mm0, [topq]
movq mm2, mm0
movd mm4, [left_topq]
psllq mm2, 8
movq mm1, mm0
por mm4, mm2
movd mm3, [leftq]
psubb mm0, mm4 ; t-tl
%macro LSHIFT 2
%if mmsize > 8
pslldq %1, %2
%else
psllq %1, 8*(%2)
%endif
%endmacro
%macro RSHIFT 2
%if mmsize > 8
psrldq %1, %2
%else
psrlq %1, 8*(%2)
%endif
%endmacro
%macro HFYU_MEDIAN 0
cglobal add_hfyu_median_pred, 6,6,8, dst, top, diff, w, left, left_top
movu m0, [topq]
mova m2, m0
movd m4, [left_topq]
LSHIFT m2, 1
mova m1, m0
por m4, m2
movd m3, [leftq]
psubb m0, m4 ; t-tl
add dstq, wq
add topq, wq
add diffq, wq
neg wq
jmp .skip
.loop:
movq mm4, [topq+wq]
movq mm0, mm4
psllq mm4, 8
por mm4, mm1
movq mm1, mm0 ; t
psubb mm0, mm4 ; t-tl
movu m4, [topq+wq]
mova m0, m4
LSHIFT m4, 1
por m4, m1
mova m1, m0 ; t
psubb m0, m4 ; t-tl
.skip:
movq mm2, [diffq+wq]
movu m2, [diffq+wq]
%assign i 0
%rep 8
movq mm4, mm0
paddb mm4, mm3 ; t-tl+l
movq mm5, mm3
pmaxub mm3, mm1
pminub mm5, mm1
pminub mm3, mm4
pmaxub mm3, mm5 ; median
paddb mm3, mm2 ; +residual
%rep mmsize
mova m4, m0
paddb m4, m3 ; t-tl+l
mova m5, m3
pmaxub m3, m1
pminub m5, m1
pminub m3, m4
pmaxub m3, m5 ; median
paddb m3, m2 ; +residual
%if i==0
movq mm7, mm3
psllq mm7, 56
mova m7, m3
LSHIFT m7, mmsize-1
%else
movq mm6, mm3
psrlq mm7, 8
psllq mm6, 56
por mm7, mm6
mova m6, m3
RSHIFT m7, 1
LSHIFT m6, mmsize-1
por m7, m6
%endif
%if i<7
psrlq mm0, 8
psrlq mm1, 8
psrlq mm2, 8
%if i<mmsize-1
RSHIFT m0, 1
RSHIFT m1, 1
RSHIFT m2, 1
%endif
%assign i i+1
%endrep
movq [dstq+wq], mm7
add wq, 8
movu [dstq+wq], m7
add wq, mmsize
jl .loop
movzx r2d, byte [dstq-1]
mov [leftq], r2d
movzx r2d, byte [topq-1]
mov [left_topq], r2d
RET
%endmacro
INIT_MMX mmxext
HFYU_MEDIAN
INIT_XMM sse2
HFYU_MEDIAN
%macro ADD_HFYU_LEFT_LOOP 2 ; %1 = dst_is_aligned, %2 = src_is_aligned
......
......@@ -32,6 +32,9 @@ void ff_add_hfyu_median_pred_cmov(uint8_t *dst, const uint8_t *top,
void ff_add_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,
int *left, int *left_top);
void ff_add_hfyu_median_pred_sse2(uint8_t *dst, const uint8_t *top,
const uint8_t *diff, int w,
int *left, int *left_top);
int ff_add_hfyu_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
int w, int left);
......@@ -58,6 +61,7 @@ av_cold void ff_huffyuvdsp_init_x86(HuffYUVDSPContext *c)
if (EXTERNAL_SSE2(cpu_flags)) {
c->add_bytes = ff_add_bytes_sse2;
c->add_hfyu_median_pred = ff_add_hfyu_median_pred_sse2;
}
if (EXTERNAL_SSSE3(cpu_flags)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment