Commit fee97f25 authored by Michael Niedermayer's avatar Michael Niedermayer

avcodec/x86/lossless_videodsp: port add_hfyu_median_prediction_mmxext to 16bit

Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 631939bd
...@@ -234,3 +234,68 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left ...@@ -234,3 +234,68 @@ cglobal add_hfyu_left_prediction_int16, 4,4,8, dst, src, mask, w, left
ADD_HFYU_LEFT_LOOP_INT16 0, 1 ADD_HFYU_LEFT_LOOP_INT16 0, 1
.src_unaligned: .src_unaligned:
ADD_HFYU_LEFT_LOOP_INT16 0, 0 ADD_HFYU_LEFT_LOOP_INT16 0, 0
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
INIT_MMX mmxext
cglobal add_hfyu_median_prediction_int16, 7,7,0, dst, top, diff, mask, w, left, left_top
add wq, wq
movd mm6, maskd
SPLATW mm6, mm6
movq mm0, [topq]
movq mm2, mm0
movd mm4, [left_topq]
psllq mm2, 16
movq mm1, mm0
por mm4, mm2
movd mm3, [leftq]
psubw mm0, mm4 ; t-tl
add dstq, wq
add topq, wq
add diffq, wq
neg wq
jmp .skip
.loop:
movq mm4, [topq+wq]
movq mm0, mm4
psllq mm4, 16
por mm4, mm1
movq mm1, mm0 ; t
psubw mm0, mm4 ; t-tl
.skip:
movq mm2, [diffq+wq]
%assign i 0
%rep 4
movq mm4, mm0
paddw mm4, mm3 ; t-tl+l
pand mm4, mm6
movq mm5, mm3
pmaxsw mm3, mm1
pminsw mm5, mm1
pminsw mm3, mm4
pmaxsw mm3, mm5 ; median
paddw mm3, mm2 ; +residual
pand mm3, mm6
%if i==0
movq mm7, mm3
psllq mm7, 48
%else
movq mm4, mm3
psrlq mm7, 16
psllq mm4, 48
por mm7, mm4
%endif
%if i<3
psrlq mm0, 16
psrlq mm1, 16
psrlq mm2, 16
%endif
%assign i i+1
%endrep
movq [dstq+wq], mm7
add wq, 8
jl .loop
movzx r2d, word [dstq-2]
mov [leftq], r2d
movzx r2d, word [topq-2]
mov [left_topq], r2d
RET
...@@ -27,6 +27,7 @@ void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src ...@@ -27,6 +27,7 @@ void ff_diff_int16_mmx (uint16_t *dst, const uint16_t *src1, const uint16_t *src
void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w); void ff_diff_int16_sse2(uint16_t *dst, const uint16_t *src1, const uint16_t *src2, unsigned mask, int w);
int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc); int ff_add_hfyu_left_prediction_int16_ssse3(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc); int ff_add_hfyu_left_prediction_int16_sse4(uint16_t *dst, const uint16_t *src, unsigned mask, int w, int acc);
void ff_add_hfyu_median_prediction_int16_mmxext(uint16_t *dst, const uint16_t *top, const uint16_t *diff, unsigned mask, int w, int *left, int *left_top);
void ff_llviddsp_init_x86(LLVidDSPContext *c) void ff_llviddsp_init_x86(LLVidDSPContext *c)
{ {
...@@ -37,6 +38,10 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c) ...@@ -37,6 +38,10 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c)
c->diff_int16 = ff_diff_int16_mmx; c->diff_int16 = ff_diff_int16_mmx;
} }
if (EXTERNAL_MMXEXT(cpu_flags)) {
c->add_hfyu_median_prediction_int16 = ff_add_hfyu_median_prediction_int16_mmxext;
}
if (EXTERNAL_SSE2(cpu_flags)) { if (EXTERNAL_SSE2(cpu_flags)) {
c->add_int16 = ff_add_int16_sse2; c->add_int16 = ff_add_int16_sse2;
c->diff_int16 = ff_diff_int16_sse2; c->diff_int16 = ff_diff_int16_sse2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment