Commit 9f815bc2 authored by James Almer's avatar James Almer

avcodec/jpeg200dsp: add ff_rct_int_{sse2,avx2}

Reviewed-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 7912a683
......@@ -221,6 +221,7 @@ int ff_jpeg2000_init_component(Jpeg2000Component *comp,
if (!comp->f_data)
return AVERROR(ENOMEM);
} else {
csize += FF_INPUT_BUFFER_PADDING_SIZE / sizeof(*comp->i_data);
comp->f_data = NULL;
comp->i_data = av_mallocz_array(csize, sizeof(*comp->i_data));
if (!comp->i_data)
......
......@@ -106,3 +106,39 @@ INIT_XMM sse
ICT_FLOAT 10
INIT_YMM avx
ICT_FLOAT 9
;***************************************************************************
; ff_rct_int_<opt>(int32_t *src0, int32_t *src1, int32_t *src2, int csize)
;***************************************************************************
%macro RCT_INT 0
cglobal rct_int, 4, 4, 4, src0, src1, src2, csize
shl csized, 2
add src0q, csizeq
add src1q, csizeq
add src2q, csizeq
neg csizeq
align 16
.loop:
mova m1, [src1q+csizeq]
mova m2, [src2q+csizeq]
mova m0, [src0q+csizeq]
paddd m3, m1, m2
psrad m3, 2
psubd m0, m3
paddd m1, m0
paddd m2, m0
mova [src1q+csizeq], m0
mova [src2q+csizeq], m1
mova [src0q+csizeq], m2
add csizeq, mmsize
jl .loop
REP_RET
%endmacro
INIT_XMM sse2
RCT_INT
%if HAVE_AVX2_EXTERNAL
INIT_YMM avx2
RCT_INT
%endif
......@@ -26,6 +26,8 @@
void ff_ict_float_sse(void *src0, void *src1, void *src2, int csize);
void ff_ict_float_avx(void *src0, void *src1, void *src2, int csize);
void ff_rct_int_sse2 (void *src0, void *src1, void *src2, int csize);
void ff_rct_int_avx2 (void *src0, void *src1, void *src2, int csize);
av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
{
......@@ -34,7 +36,15 @@ av_cold void ff_jpeg2000dsp_init_x86(Jpeg2000DSPContext *c)
c->mct_decode[FF_DWT97] = ff_ict_float_sse;
}
if (EXTERNAL_SSE2(cpu_flags)) {
c->mct_decode[FF_DWT53] = ff_rct_int_sse2;
}
if (EXTERNAL_AVX_FAST(cpu_flags)) {
c->mct_decode[FF_DWT97] = ff_ict_float_avx;
}
if (EXTERNAL_AVX2(cpu_flags)) {
c->mct_decode[FF_DWT53] = ff_rct_int_avx2;
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment