Commit f1190576 authored by Michael Niedermayer's avatar Michael Niedermayer

Merge remote-tracking branch 'qatar/master'

* qatar/master:
  ppc: reduce overreads when loading 8 pixels in altivec dsp functions
Merged-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parents e136579c 98fdfa99
...@@ -285,10 +285,10 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in ...@@ -285,10 +285,10 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
/* Read potentially unaligned pixels into t1 and t2 /* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8, Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */ mask out the last 8 pixels. The 0s don't change the sum. */
vector unsigned char pix1l = vec_ld( 0, pix1); vector unsigned char pix1l = vec_ld(0, pix1);
vector unsigned char pix1r = vec_ld(15, pix1); vector unsigned char pix1r = vec_ld(7, pix1);
vector unsigned char pix2l = vec_ld( 0, pix2); vector unsigned char pix2l = vec_ld(0, pix2);
vector unsigned char pix2r = vec_ld(15, pix2); vector unsigned char pix2r = vec_ld(7, pix2);
t1 = vec_and(vec_perm(pix1l, pix1r, perm1), permclear); t1 = vec_and(vec_perm(pix1l, pix1r, perm1), permclear);
t2 = vec_and(vec_perm(pix2l, pix2r, perm2), permclear); t2 = vec_and(vec_perm(pix2l, pix2r, perm2), permclear);
...@@ -367,10 +367,10 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in ...@@ -367,10 +367,10 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
/* Read potentially unaligned pixels into t1 and t2 /* Read potentially unaligned pixels into t1 and t2
Since we're reading 16 pixels, and actually only want 8, Since we're reading 16 pixels, and actually only want 8,
mask out the last 8 pixels. The 0s don't change the sum. */ mask out the last 8 pixels. The 0s don't change the sum. */
vector unsigned char pix1l = vec_ld( 0, pix1); vector unsigned char pix1l = vec_ld(0, pix1);
vector unsigned char pix1r = vec_ld(15, pix1); vector unsigned char pix1r = vec_ld(7, pix1);
vector unsigned char pix2l = vec_ld( 0, pix2); vector unsigned char pix2l = vec_ld(0, pix2);
vector unsigned char pix2r = vec_ld(15, pix2); vector unsigned char pix2r = vec_ld(7, pix2);
t1 = vec_and(vec_perm(pix1l, pix1r, perm1), permclear); t1 = vec_and(vec_perm(pix1l, pix1r, perm1), permclear);
t2 = vec_and(vec_perm(pix2l, pix2r, perm2), permclear); t2 = vec_and(vec_perm(pix2l, pix2r, perm2), permclear);
...@@ -489,8 +489,8 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, i ...@@ -489,8 +489,8 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, i
// Read potentially unaligned pixels. // Read potentially unaligned pixels.
// We're reading 16 pixels, and actually only want 8, // We're reading 16 pixels, and actually only want 8,
// but we simply ignore the extras. // but we simply ignore the extras.
vector unsigned char pixl = vec_ld( 0, pixels); vector unsigned char pixl = vec_ld(0, pixels);
vector unsigned char pixr = vec_ld(15, pixels); vector unsigned char pixr = vec_ld(7, pixels);
bytes = vec_perm(pixl, pixr, perm); bytes = vec_perm(pixl, pixr, perm);
// convert the bytes into shorts // convert the bytes into shorts
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment