Commit aad0e26f authored by Martin Storsjö's avatar Martin Storsjö

checkasm: aacpsdsp: Tolerate extra intermediate precision in stereo_interpolate

The stereo_interpolate functions add h_step to the values h
BUF_SIZE times. Within the stereo_interpolate C functions, the
values h (h0-h3, h00-h13) are declared as local float variables,
but the compiler is free to keep them in a register with extra
precision.

If the accumulation is rounded to 32 bit float precision after
each step, the less significant bits of h_step end up ignored
and the sum can deviate, affecting the end result more than
the currently set EPS.

By clearing the log2(BUF_SIZE) lower bits of h_step, we make sure
that the accumulation shouldn't differ significantly, regardless
of any extra precision in the accmulating register/variable.

This fixes the aacpsdsp checkasm test when built with clang for
mingw/x86_32.
Signed-off-by: 's avatarMartin Storsjö <martin@martin.st>
parent e10654de
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
*/ */
#include "libavcodec/aacpsdsp.h" #include "libavcodec/aacpsdsp.h"
#include "libavutil/intfloat.h"
#include "checkasm.h" #include "checkasm.h"
...@@ -34,6 +35,16 @@ ...@@ -34,6 +35,16 @@
#define EPS 0.005 #define EPS 0.005
static void clear_less_significant_bits(INTFLOAT *buf, int len, int bits)
{
int i;
for (i = 0; i < len; i++) {
union av_intfloat32 u = { .f = buf[i] };
u.i &= (0xffffffff << bits);
buf[i] = u.f;
}
}
static void test_add_squares(void) static void test_add_squares(void)
{ {
LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]); LOCAL_ALIGNED_16(INTFLOAT, dst0, [BUF_SIZE]);
...@@ -198,6 +209,13 @@ static void test_stereo_interpolate(PSDSPContext *psdsp) ...@@ -198,6 +209,13 @@ static void test_stereo_interpolate(PSDSPContext *psdsp)
randomize((INTFLOAT *)h, 2 * 4); randomize((INTFLOAT *)h, 2 * 4);
randomize((INTFLOAT *)h_step, 2 * 4); randomize((INTFLOAT *)h_step, 2 * 4);
// Clear the least significant 14 bits of h_step, to avoid
// divergence when accumulating h_step BUF_SIZE times into
// a float variable which may or may not have extra intermediate
// precision. Therefore clear roughly log2(BUF_SIZE) less
// significant bits, to get the same result regardless of any
// extra precision in the accumulator.
clear_less_significant_bits((INTFLOAT *)h_step, 2 * 4, 14);
call_ref(l0, r0, h, h_step, BUF_SIZE); call_ref(l0, r0, h, h_step, BUF_SIZE);
call_new(l1, r1, h, h_step, BUF_SIZE); call_new(l1, r1, h, h_step, BUF_SIZE);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment