Commit 83cd80d1 authored by Clément Bœsch's avatar Clément Bœsch

Merge commit '12004a9a'

* commit '12004a9a':
  audiodsp/x86: yasmify vector_clipf_sse
  audiodsp: reorder arguments for vector_clipf

Merged the version from Libav after a discussion with James Almer on
IRC:

19:22 <ubitux> jamrial: opinion on 12004a9a?
19:23 <ubitux> it was apparently yasmified differently
19:23 <ubitux> (it depends on the previous commit arg shuffle)
19:24 <ubitux> i don't see the magic movsxdifnidn in your port btw
19:24 <ubitux> it's a port from 1d36defe
19:25 <jamrial> seems better thanks to said arg shuffle
19:25 <jamrial> the loop is the same, but init is simpler
19:25 <jamrial> probably worth merging
19:25 <ubitux> OK
19:25 <ubitux> thanks
19:26 <jamrial> curious they didn't make len ptrdiff_t after the previous bunch of commits, heh
19:26 <ubitux> yeah indeed

Both commits are merged at the same time to prevent a conflict with our
existing yasmified ff_vector_clipf_sse.
Merged-by: 's avatarClément Bœsch <u@pkh.me>
parents bbc3bde1 12004a9a
...@@ -121,7 +121,7 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4], ...@@ -121,7 +121,7 @@ static void sum_square_butterfly(AC3EncodeContext *s, float sum[4],
static void clip_coefficients(AudioDSPContext *adsp, float *coef, static void clip_coefficients(AudioDSPContext *adsp, float *coef,
unsigned int len) unsigned int len)
{ {
adsp->vector_clipf(coef, coef, COEF_MIN, COEF_MAX, len); adsp->vector_clipf(coef, coef, len, COEF_MIN, COEF_MAX);
} }
......
...@@ -25,8 +25,7 @@ ...@@ -25,8 +25,7 @@
#include "libavcodec/audiodsp.h" #include "libavcodec/audiodsp.h"
#include "audiodsp_arm.h" #include "audiodsp_arm.h"
void ff_vector_clipf_neon(float *dst, const float *src, float min, float max, void ff_vector_clipf_neon(float *dst, const float *src, int len, float min, float max);
int len);
void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min, void ff_vector_clip_int32_neon(int32_t *dst, const int32_t *src, int32_t min,
int32_t max, unsigned int len); int32_t max, unsigned int len);
......
...@@ -24,9 +24,8 @@ ...@@ -24,9 +24,8 @@
function ff_vector_clipf_neon, export=1 function ff_vector_clipf_neon, export=1
VFP vdup.32 q1, d0[1] VFP vdup.32 q1, d0[1]
VFP vdup.32 q0, d0[0] VFP vdup.32 q0, d0[0]
NOVFP vdup.32 q0, r2 NOVFP vdup.32 q0, r3
NOVFP vdup.32 q1, r3 NOVFP vld1.32 {d2[],d3[]}, [sp]
NOVFP ldr r2, [sp]
vld1.f32 {q2},[r1,:128]! vld1.f32 {q2},[r1,:128]!
vmin.f32 q10, q2, q1 vmin.f32 q10, q2, q1
vld1.f32 {q3},[r1,:128]! vld1.f32 {q3},[r1,:128]!
......
...@@ -55,8 +55,8 @@ static void vector_clipf_c_opposite_sign(float *dst, const float *src, ...@@ -55,8 +55,8 @@ static void vector_clipf_c_opposite_sign(float *dst, const float *src,
} }
} }
static void vector_clipf_c(float *dst, const float *src, static void vector_clipf_c(float *dst, const float *src, int len,
float min, float max, int len) float min, float max)
{ {
int i; int i;
......
...@@ -48,7 +48,8 @@ typedef struct AudioDSPContext { ...@@ -48,7 +48,8 @@ typedef struct AudioDSPContext {
/* assume len is a multiple of 16, and arrays are 16-byte aligned */ /* assume len is a multiple of 16, and arrays are 16-byte aligned */
void (*vector_clipf)(float *dst /* align 16 */, void (*vector_clipf)(float *dst /* align 16 */,
const float *src /* align 16 */, const float *src /* align 16 */,
float min, float max, int len /* align 16 */); int len /* align 16 */,
float min, float max);
} AudioDSPContext; } AudioDSPContext;
void ff_audiodsp_init(AudioDSPContext *c); void ff_audiodsp_init(AudioDSPContext *c);
......
...@@ -882,7 +882,7 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p, ...@@ -882,7 +882,7 @@ static inline void decode_bytes_and_gain(COOKContext *q, COOKSubpacket *p,
static void saturate_output_float(COOKContext *q, float *out) static void saturate_output_float(COOKContext *q, float *out)
{ {
q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel, q->adsp.vector_clipf(out, q->mono_mdct_output + q->samples_per_channel,
-1.0f, 1.0f, FFALIGN(q->samples_per_channel, 8)); FFALIGN(q->samples_per_channel, 8), -1.0f, 1.0f);
} }
......
...@@ -132,46 +132,45 @@ VECTOR_CLIP_INT32 11, 1, 1, 0 ...@@ -132,46 +132,45 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
VECTOR_CLIP_INT32 6, 1, 0, 0 VECTOR_CLIP_INT32 6, 1, 0, 0
%endif %endif
;----------------------------------------------------- ; void ff_vector_clipf_sse(float *dst, const float *src,
;void ff_vector_clipf(float *dst, const float *src, ; int len, float min, float max)
; float min, float max, int len)
;-----------------------------------------------------
INIT_XMM sse INIT_XMM sse
%if UNIX64 cglobal vector_clipf, 3, 3, 6, dst, src, len, min, max
cglobal vector_clipf, 3,3,6, dst, src, len %if ARCH_X86_32
%else VBROADCASTSS m0, minm
cglobal vector_clipf, 5,5,6, dst, src, min, max, len VBROADCASTSS m1, maxm
%endif %elif WIN64
%if WIN64 VBROADCASTSS m0, m3
SWAP 0, 2 VBROADCASTSS m1, maxm
SWAP 1, 3 %else ; 64bit sysv
%elif ARCH_X86_32 VBROADCASTSS m0, m0
movss m0, minm VBROADCASTSS m1, m1
movss m1, maxm
%endif %endif
SPLATD m0
SPLATD m1 movsxdifnidn lenq, lend
shl lend, 2
add srcq, lenq .loop
add dstq, lenq mova m2, [srcq + 4 * lenq - 4 * mmsize]
neg lenq mova m3, [srcq + 4 * lenq - 3 * mmsize]
.loop: mova m4, [srcq + 4 * lenq - 2 * mmsize]
mova m2, [srcq+lenq+mmsize*0] mova m5, [srcq + 4 * lenq - 1 * mmsize]
mova m3, [srcq+lenq+mmsize*1]
mova m4, [srcq+lenq+mmsize*2] maxps m2, m0
mova m5, [srcq+lenq+mmsize*3] maxps m3, m0
maxps m2, m0 maxps m4, m0
maxps m3, m0 maxps m5, m0
maxps m4, m0
maxps m5, m0 minps m2, m1
minps m2, m1 minps m3, m1
minps m3, m1 minps m4, m1
minps m4, m1 minps m5, m1
minps m5, m1
mova [dstq+lenq+mmsize*0], m2 mova [dstq + 4 * lenq - 4 * mmsize], m2
mova [dstq+lenq+mmsize*1], m3 mova [dstq + 4 * lenq - 3 * mmsize], m3
mova [dstq+lenq+mmsize*2], m4 mova [dstq + 4 * lenq - 2 * mmsize], m4
mova [dstq+lenq+mmsize*3], m5 mova [dstq + 4 * lenq - 1 * mmsize], m5
add lenq, mmsize*4
jl .loop sub lenq, mmsize
REP_RET jg .loop
RET
...@@ -38,7 +38,7 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, ...@@ -38,7 +38,7 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src,
void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src, void ff_vector_clip_int32_sse4(int32_t *dst, const int32_t *src,
int32_t min, int32_t max, unsigned int len); int32_t min, int32_t max, unsigned int len);
void ff_vector_clipf_sse(float *dst, const float *src, void ff_vector_clipf_sse(float *dst, const float *src,
float min, float max, int len); int len, float min, float max);
av_cold void ff_audiodsp_init_x86(AudioDSPContext *c) av_cold void ff_audiodsp_init_x86(AudioDSPContext *c)
{ {
......
...@@ -120,7 +120,7 @@ void checkasm_check_audiodsp(void) ...@@ -120,7 +120,7 @@ void checkasm_check_audiodsp(void)
int i, len; int i, len;
declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src, declare_func_emms(AV_CPU_FLAG_MMX, void, float *dst, const float *src,
float min, float max, unsigned int len); int len, float min, float max);
val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; val1 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f; val2 = (float)rnd() / (UINT_MAX >> 1) - 1.0f;
...@@ -133,13 +133,13 @@ void checkasm_check_audiodsp(void) ...@@ -133,13 +133,13 @@ void checkasm_check_audiodsp(void)
len = rnd() % 128; len = rnd() % 128;
len = 16 * FFMAX(len, 1); len = 16 * FFMAX(len, 1);
call_ref(dst0, src, min, max, len); call_ref(dst0, src, len, min, max);
call_new(dst1, src, min, max, len); call_new(dst1, src, len, min, max);
for (i = 0; i < len; i++) { for (i = 0; i < len; i++) {
if (!float_near_ulp_array(dst0, dst1, 3, len)) if (!float_near_ulp_array(dst0, dst1, 3, len))
fail(); fail();
} }
bench_new(dst1, src, min, max, MAX_SIZE); bench_new(dst1, src, MAX_SIZE, min, max);
} }
report("audiodsp"); report("audiodsp");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment