Commit 97474d52 authored by Ronald S. Bultje's avatar Ronald S. Bultje

vp9/x86: iwht4x4 (lossless) mmx.

parent d43efa68
...@@ -173,6 +173,7 @@ itxfm_funcs(16, ssse3); ...@@ -173,6 +173,7 @@ itxfm_funcs(16, ssse3);
itxfm_funcs(16, avx); itxfm_funcs(16, avx);
itxfm_func(idct, idct, 32, ssse3); itxfm_func(idct, idct, 32, ssse3);
itxfm_func(idct, idct, 32, avx); itxfm_func(idct, idct, 32, avx);
itxfm_func(iwht, iwht, 4, mmx);
#undef itxfm_func #undef itxfm_func
#undef itxfm_funcs #undef itxfm_funcs
...@@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp) ...@@ -223,6 +224,10 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp)
if (EXTERNAL_MMX(cpu_flags)) { if (EXTERNAL_MMX(cpu_flags)) {
init_fpel(4, 0, 4, put, mmx); init_fpel(4, 0, 4, put, mmx);
init_fpel(3, 0, 8, put, mmx); init_fpel(3, 0, 8, put, mmx);
dsp->itxfm_add[4 /* lossless */][DCT_DCT] =
dsp->itxfm_add[4 /* lossless */][ADST_DCT] =
dsp->itxfm_add[4 /* lossless */][DCT_ADST] =
dsp->itxfm_add[4 /* lossless */][ADST_ADST] = ff_vp9_iwht_iwht_4x4_add_mmx;
} }
if (EXTERNAL_MMXEXT(cpu_flags)) { if (EXTERNAL_MMXEXT(cpu_flags)) {
......
...@@ -151,6 +151,47 @@ SECTION .text ...@@ -151,6 +151,47 @@ SECTION .text
%endrep %endrep
%endmacro %endmacro
;-------------------------------------------------------------------------------------------
; void vp9_iwht_iwht_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
;-------------------------------------------------------------------------------------------
%macro VP9_IWHT4_1D 0
SWAP 1, 2, 3
paddw m0, m2
psubw m3, m1
psubw m4, m0, m3
psraw m4, 1
psubw m5, m4, m1
SWAP 5, 1
psubw m4, m2
SWAP 4, 2
psubw m0, m1
paddw m3, m2
SWAP 3, 2, 1
%endmacro
INIT_MMX mmx
cglobal vp9_iwht_iwht_4x4_add, 3, 3, 0, dst, stride, block, eob
mova m0, [blockq+0*8]
mova m1, [blockq+1*8]
mova m2, [blockq+2*8]
mova m3, [blockq+3*8]
psraw m0, 2
psraw m1, 2
psraw m2, 2
psraw m3, 2
VP9_IWHT4_1D
TRANSPOSE4x4W 0, 1, 2, 3, 4
VP9_IWHT4_1D
pxor m4, m4
VP9_STORE_2X 0, 1, 5, 6, 4
lea dstq, [dstq+strideq*2]
VP9_STORE_2X 2, 3, 5, 6, 4
ZERO_BLOCK blockq, 8, 4, m4
RET
;------------------------------------------------------------------------------------------- ;-------------------------------------------------------------------------------------------
; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob); ; void vp9_idct_idct_4x4_add_<opt>(uint8_t *dst, ptrdiff_t stride, int16_t *block, int eob);
;------------------------------------------------------------------------------------------- ;-------------------------------------------------------------------------------------------
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment