Commit 8e8db16a authored by Rong Yan's avatar Rong Yan Committed by Michael Niedermayer

avutil/ppc/float_dsp_altivec: ppc: Fix bug in ff_vector_fmul_add_altivec()...

avutil/ppc/float_dsp_altivec: ppc: Fix bug in ff_vector_fmul_add_altivec() affecting little endian environments on POWER8
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent 4a203b77
...@@ -75,20 +75,18 @@ void ff_vector_fmul_add_altivec(float *dst, const float *src0, ...@@ -75,20 +75,18 @@ void ff_vector_fmul_add_altivec(float *dst, const float *src0,
int len) int len)
{ {
int i; int i;
vector float d, s0, s1, s2, t0, t1, edges; vector float d, ss0, ss1, ss2, t0, t1, edges;
vector unsigned char align = vec_lvsr(0,dst),
mask = vec_lvsl(0, dst);
for (i = 0; i < len - 3; i += 4) { for (i = 0; i < len - 3; i += 4) {
t0 = vec_ld(0, dst + i); t0 = vec_ld(0, dst + i);
t1 = vec_ld(15, dst + i); t1 = vec_ld(15, dst + i);
s0 = vec_ld(0, src0 + i); ss0 = vec_ld(0, src0 + i);
s1 = vec_ld(0, src1 + i); ss1 = vec_ld(0, src1 + i);
s2 = vec_ld(0, src2 + i); ss2 = vec_ld(0, src2 + i);
edges = vec_perm(t1, t0, mask); edges = vec_perm(t1, t0, vcprm(0, 1, 2, 3));
d = vec_madd(s0, s1, s2); d = vec_madd(ss0, ss1, ss2);
t1 = vec_perm(d, edges, align); t1 = vec_perm(d, edges, vcprm(s0,s1,s2,s3));
t0 = vec_perm(edges, d, align); t0 = vec_perm(edges, d, vcprm(s0,s1,s2,s3));
vec_st(t1, 15, dst + i); vec_st(t1, 15, dst + i);
vec_st(t0, 0, dst + i); vec_st(t0, 0, dst + i);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment