Commit 7a15cf42 authored by James Almer's avatar James Almer

x86/diracdsp: make ff_put_signed_rect_clamped_10_sse4 work on x86_32

Reviewed-by: 's avatarRostislav Pehlivanov <atomnuker@gmail.com>
Signed-off-by: 's avatarJames Almer <jamrial@gmail.com>
parent 41d7642a
...@@ -303,24 +303,30 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h ...@@ -303,24 +303,30 @@ cglobal dequant_subband_32, 7, 7, 4, src, dst, stride, qf, qs, tot_v, tot_h
RET RET
%if ARCH_X86_64 == 1 INIT_XMM sse4
; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height) ; void put_signed_rect_clamped_10(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height)
cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w, h %if ARCH_X86_64
mov r6, srcq cglobal put_signed_rect_clamped_10, 6, 8, 5, dst, dst_stride, src, src_stride, w, h, t1, t2
mov r7, dstq %else
mov r8, wq cglobal put_signed_rect_clamped_10, 5, 7, 5, dst, dst_stride, src, src_stride, w, t1, t2
%define hd r5mp
%endif
shl wd, 2
add srcq, wq
neg wq
mov t2q, dstq
mov t1q, wq
pxor m2, m2 pxor m2, m2
mova m3, [clip_10bit] mova m3, [clip_10bit]
mova m4, [convert_to_unsigned_10bit] mova m4, [convert_to_unsigned_10bit]
.loop_h: .loop_h:
mov srcq, r6 mov dstq, t2q
mov dstq, r7 mov wq, t1q
mov wq, r8
.loop_w: .loop_w:
movu m0, [srcq+0*mmsize] movu m0, [srcq+wq+0*mmsize]
movu m1, [srcq+1*mmsize] movu m1, [srcq+wq+1*mmsize]
paddd m0, m4 paddd m0, m4
paddd m1, m4 paddd m1, m4
...@@ -329,16 +335,13 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w ...@@ -329,16 +335,13 @@ cglobal put_signed_rect_clamped_10, 6, 9, 6, dst, dst_stride, src, src_stride, w
movu [dstq], m0 movu [dstq], m0
add srcq, 2*mmsize
add dstq, 1*mmsize add dstq, 1*mmsize
sub wd, 8 add wq, 2*mmsize
jg .loop_w jl .loop_w
add r6, src_strideq add srcq, src_strideq
add r7, dst_strideq add t2q, dst_strideq
sub hd, 1 sub hd, 1
jg .loop_h jg .loop_h
RET RET
%endif
...@@ -45,9 +45,7 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i ...@@ -45,9 +45,7 @@ void ff_put_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, i
void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_mmx(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_sse2(uint8_t *dst, int dst_stride, const int16_t *src, int src_stride, int width, int height);
#if ARCH_X86_64
void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height); void ff_put_signed_rect_clamped_10_sse4(uint8_t *dst, int dst_stride, const uint8_t *src, int src_stride, int width, int height);
#endif
void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h); void ff_dequant_subband_32_sse4(uint8_t *src, uint8_t *dst, ptrdiff_t stride, const int qf, const int qs, int tot_v, int tot_h);
...@@ -192,8 +190,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c) ...@@ -192,8 +190,6 @@ void ff_diracdsp_init_x86(DiracDSPContext* c)
if (EXTERNAL_SSE4(mm_flags)) { if (EXTERNAL_SSE4(mm_flags)) {
c->dequant_subband[1] = ff_dequant_subband_32_sse4; c->dequant_subband[1] = ff_dequant_subband_32_sse4;
#if ARCH_X86_64
c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4; c->put_signed_rect_clamped[1] = ff_put_signed_rect_clamped_10_sse4;
#endif
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment