Commit 30f3f959 authored by Diego Biurrun's avatar Diego Biurrun

ppc: dsputil: K&R formatting cosmetics

parent 82ee14d2
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#if HAVE_ALTIVEC_H #if HAVE_ALTIVEC_H
#include <altivec.h> #include <altivec.h>
#endif #endif
#include "libavutil/attributes.h" #include "libavutil/attributes.h"
#include "libavutil/ppc/types_altivec.h" #include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h" #include "libavutil/ppc/util_altivec.h"
...@@ -31,11 +32,13 @@ ...@@ -31,11 +32,13 @@
#include "libavcodec/dsputil.h" #include "libavcodec/dsputil.h"
#include "dsputil_altivec.h" #include "dsputil_altivec.h"
static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{ {
int i; int i;
int s; int s;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero =
(const vector unsigned char) vec_splat_u8(0);
vector unsigned char perm1 = vec_lvsl(0, pix2); vector unsigned char perm1 = vec_lvsl(0, pix2);
vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));
vector unsigned char pix2l, pix2r; vector unsigned char pix2l, pix2r;
...@@ -44,13 +47,13 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size ...@@ -44,13 +47,13 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
vector signed int sumdiffs; vector signed int sumdiffs;
s = 0; s = 0;
sad = (vector unsigned int)vec_splat_u32(0); sad = (vector unsigned int) vec_splat_u32(0);
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read unaligned pixels into our vectors. The vectors are as follows: /* Read unaligned pixels into our vectors. The vectors are as follows:
* pix1v: pix1[0] - pix1[15] * pix1v: pix1[0] - pix1[15]
* pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */ * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */
pix1v = vec_ld( 0, pix1); pix1v = vec_ld(0, pix1);
pix2l = vec_ld( 0, pix2); pix2l = vec_ld(0, pix2);
pix2r = vec_ld(16, pix2); pix2r = vec_ld(16, pix2);
pix2v = vec_perm(pix2l, pix2r, perm1); pix2v = vec_perm(pix2l, pix2r, perm1);
pix2iv = vec_perm(pix2l, pix2r, perm2); pix2iv = vec_perm(pix2l, pix2r, perm2);
...@@ -75,11 +78,13 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size ...@@ -75,11 +78,13 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
return s; return s;
} }
static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{ {
int i; int i;
int s; int s;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero =
(const vector unsigned char) vec_splat_u8(0);
vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned char perm = vec_lvsl(0, pix2);
vector unsigned char pix2l, pix2r; vector unsigned char pix2l, pix2r;
vector unsigned char pix1v, pix2v, pix3v, avgv, t5; vector unsigned char pix1v, pix2v, pix3v, avgv, t5;
...@@ -88,7 +93,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size ...@@ -88,7 +93,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
uint8_t *pix3 = pix2 + line_size; uint8_t *pix3 = pix2 + line_size;
s = 0; s = 0;
sad = (vector unsigned int)vec_splat_u32(0); sad = (vector unsigned int) vec_splat_u32(0);
/* Due to the fact that pix3 = pix2 + line_size, the pix3 of one /* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
* iteration becomes pix2 in the next iteration. We can use this * iteration becomes pix2 in the next iteration. We can use this
...@@ -97,7 +102,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size ...@@ -97,7 +102,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
* Read unaligned pixels into our vectors. The vectors are as follows: * Read unaligned pixels into our vectors. The vectors are as follows:
* pix2v: pix2[0] - pix2[15] * pix2v: pix2[0] - pix2[15]
* Split the pixel vectors into shorts. */ * Split the pixel vectors into shorts. */
pix2l = vec_ld( 0, pix2); pix2l = vec_ld(0, pix2);
pix2r = vec_ld(15, pix2); pix2r = vec_ld(15, pix2);
pix2v = vec_perm(pix2l, pix2r, perm); pix2v = vec_perm(pix2l, pix2r, perm);
...@@ -107,7 +112,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size ...@@ -107,7 +112,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
* pix3v: pix3[0] - pix3[15] */ * pix3v: pix3[0] - pix3[15] */
pix1v = vec_ld(0, pix1); pix1v = vec_ld(0, pix1);
pix2l = vec_ld( 0, pix3); pix2l = vec_ld(0, pix3);
pix2r = vec_ld(15, pix3); pix2r = vec_ld(15, pix3);
pix3v = vec_perm(pix2l, pix2r, perm); pix3v = vec_perm(pix2l, pix2r, perm);
...@@ -123,7 +128,6 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size ...@@ -123,7 +128,6 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
pix1 += line_size; pix1 += line_size;
pix2v = pix3v; pix2v = pix3v;
pix3 += line_size; pix3 += line_size;
} }
/* Sum up the four partial sums, and put the result into s. */ /* Sum up the four partial sums, and put the result into s. */
...@@ -133,13 +137,16 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size ...@@ -133,13 +137,16 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
return s; return s;
} }
static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{ {
int i; int i;
int s; int s;
uint8_t *pix3 = pix2 + line_size; uint8_t *pix3 = pix2 + line_size;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero =
const vector unsigned short two = (const vector unsigned short)vec_splat_u16(2); (const vector unsigned char) vec_splat_u8(0);
const vector unsigned short two =
(const vector unsigned short) vec_splat_u16(2);
vector unsigned char avgv, t5; vector unsigned char avgv, t5;
vector unsigned char perm1 = vec_lvsl(0, pix2); vector unsigned char perm1 = vec_lvsl(0, pix2);
vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1)); vector unsigned char perm2 = vec_add(perm1, vec_splat_u8(1));
...@@ -152,7 +159,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz ...@@ -152,7 +159,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
vector unsigned int sad; vector unsigned int sad;
vector signed int sumdiffs; vector signed int sumdiffs;
sad = (vector unsigned int)vec_splat_u32(0); sad = (vector unsigned int) vec_splat_u32(0);
s = 0; s = 0;
...@@ -163,7 +170,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz ...@@ -163,7 +170,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
* Read unaligned pixels into our vectors. The vectors are as follows: * Read unaligned pixels into our vectors. The vectors are as follows:
* pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] * pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16]
* Split the pixel vectors into shorts. */ * Split the pixel vectors into shorts. */
pix2l = vec_ld( 0, pix2); pix2l = vec_ld(0, pix2);
pix2r = vec_ld(16, pix2); pix2r = vec_ld(16, pix2);
pix2v = vec_perm(pix2l, pix2r, perm1); pix2v = vec_perm(pix2l, pix2r, perm1);
pix2iv = vec_perm(pix2l, pix2r, perm2); pix2iv = vec_perm(pix2l, pix2r, perm2);
...@@ -181,7 +188,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz ...@@ -181,7 +188,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
* pix3v: pix3[0] - pix3[15] pix3iv: pix3[1] - pix3[16] */ * pix3v: pix3[0] - pix3[15] pix3iv: pix3[1] - pix3[16] */
pix1v = vec_ld(0, pix1); pix1v = vec_ld(0, pix1);
pix2l = vec_ld( 0, pix3); pix2l = vec_ld(0, pix3);
pix2r = vec_ld(16, pix3); pix2r = vec_ld(16, pix3);
pix3v = vec_perm(pix2l, pix2r, perm1); pix3v = vec_perm(pix2l, pix2r, perm1);
pix3iv = vec_perm(pix2l, pix2r, perm2); pix3iv = vec_perm(pix2l, pix2r, perm2);
...@@ -228,22 +235,23 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz ...@@ -228,22 +235,23 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
return s; return s;
} }
static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{ {
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero =
(const vector unsigned int) vec_splat_u32(0);
vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned char perm = vec_lvsl(0, pix2);
vector unsigned char t1, t2, t3,t4, t5; vector unsigned char t1, t2, t3, t4, t5;
vector unsigned int sad; vector unsigned int sad;
vector signed int sumdiffs; vector signed int sumdiffs;
sad = (vector unsigned int)vec_splat_u32(0); sad = (vector unsigned int) vec_splat_u32(0);
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2. */ /* Read potentially unaligned pixels into t1 and t2. */
vector unsigned char pix2l = vec_ld( 0, pix2); vector unsigned char pix2l = vec_ld(0, pix2);
vector unsigned char pix2r = vec_ld(15, pix2); vector unsigned char pix2r = vec_ld(15, pix2);
t1 = vec_ld(0, pix1); t1 = vec_ld(0, pix1);
t2 = vec_perm(pix2l, pix2r, perm); t2 = vec_perm(pix2l, pix2r, perm);
...@@ -268,19 +276,23 @@ static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i ...@@ -268,19 +276,23 @@ static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
return s; return s;
} }
static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{ {
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero =
const vector unsigned char permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0}; (const vector unsigned int) vec_splat_u32(0);
const vector unsigned char permclear =
(vector unsigned char)
{ 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
vector unsigned char perm1 = vec_lvsl(0, pix1); vector unsigned char perm1 = vec_lvsl(0, pix1);
vector unsigned char perm2 = vec_lvsl(0, pix2); vector unsigned char perm2 = vec_lvsl(0, pix2);
vector unsigned char t1, t2, t3,t4, t5; vector unsigned char t1, t2, t3, t4, t5;
vector unsigned int sad; vector unsigned int sad;
vector signed int sumdiffs; vector signed int sumdiffs;
sad = (vector unsigned int)vec_splat_u32(0); sad = (vector unsigned int) vec_splat_u32(0);
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2. /* Read potentially unaligned pixels into t1 and t2.
...@@ -317,18 +329,19 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size) ...@@ -317,18 +329,19 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size)
{ {
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero =
(const vector unsigned int) vec_splat_u32(0);
vector unsigned char perm = vec_lvsl(0, pix); vector unsigned char perm = vec_lvsl(0, pix);
vector unsigned char pixv; vector unsigned char pixv;
vector unsigned int sv; vector unsigned int sv;
vector signed int sum; vector signed int sum;
sv = (vector unsigned int)vec_splat_u32(0); sv = (vector unsigned int) vec_splat_u32(0);
s = 0; s = 0;
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
/* Read the potentially unaligned pixels. */ /* Read the potentially unaligned pixels. */
vector unsigned char pixl = vec_ld( 0, pix); vector unsigned char pixl = vec_ld(0, pix);
vector unsigned char pixr = vec_ld(15, pix); vector unsigned char pixr = vec_ld(15, pix);
pixv = vec_perm(pixl, pixr, perm); pixv = vec_perm(pixl, pixr, perm);
...@@ -347,19 +360,23 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size) ...@@ -347,19 +360,23 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size)
/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced. /* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
* It's the sad8_altivec code above w/ squaring added. */ * It's the sad8_altivec code above w/ squaring added. */
static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{ {
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero =
const vector unsigned char permclear = (vector unsigned char){255,255,255,255,255,255,255,255,0,0,0,0,0,0,0,0}; (const vector unsigned int) vec_splat_u32(0);
const vector unsigned char permclear =
(vector unsigned char)
{ 255, 255, 255, 255, 255, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0 };
vector unsigned char perm1 = vec_lvsl(0, pix1); vector unsigned char perm1 = vec_lvsl(0, pix1);
vector unsigned char perm2 = vec_lvsl(0, pix2); vector unsigned char perm2 = vec_lvsl(0, pix2);
vector unsigned char t1, t2, t3,t4, t5; vector unsigned char t1, t2, t3, t4, t5;
vector unsigned int sum; vector unsigned int sum;
vector signed int sumsqr; vector signed int sumsqr;
sum = (vector unsigned int)vec_splat_u32(0); sum = (vector unsigned int) vec_splat_u32(0);
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2. /* Read potentially unaligned pixels into t1 and t2.
...@@ -397,21 +414,23 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in ...@@ -397,21 +414,23 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
/* Sum of Squared Errors for a 16x16 block, AltiVec-enhanced. /* Sum of Squared Errors for a 16x16 block, AltiVec-enhanced.
* It's the sad16_altivec code above w/ squaring added. */ * It's the sad16_altivec code above w/ squaring added. */
static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h) static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
int line_size, int h)
{ {
int i; int i;
int s; int s;
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero =
(const vector unsigned int) vec_splat_u32(0);
vector unsigned char perm = vec_lvsl(0, pix2); vector unsigned char perm = vec_lvsl(0, pix2);
vector unsigned char t1, t2, t3,t4, t5; vector unsigned char t1, t2, t3, t4, t5;
vector unsigned int sum; vector unsigned int sum;
vector signed int sumsqr; vector signed int sumsqr;
sum = (vector unsigned int)vec_splat_u32(0); sum = (vector unsigned int) vec_splat_u32(0);
for (i = 0; i < h; i++) { for (i = 0; i < h; i++) {
/* Read potentially unaligned pixels into t1 and t2. */ /* Read potentially unaligned pixels into t1 and t2. */
vector unsigned char pix2l = vec_ld( 0, pix2); vector unsigned char pix2l = vec_ld(0, pix2);
vector unsigned char pix2r = vec_ld(15, pix2); vector unsigned char pix2r = vec_ld(15, pix2);
t1 = vec_ld(0, pix1); t1 = vec_ld(0, pix1);
t2 = vec_perm(pix2l, pix2r, perm); t2 = vec_perm(pix2l, pix2r, perm);
...@@ -439,9 +458,10 @@ static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i ...@@ -439,9 +458,10 @@ static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
return s; return s;
} }
static int pix_sum_altivec(uint8_t * pix, int line_size) static int pix_sum_altivec(uint8_t *pix, int line_size)
{ {
const vector unsigned int zero = (const vector unsigned int)vec_splat_u32(0); const vector unsigned int zero =
(const vector unsigned int) vec_splat_u32(0);
vector unsigned char perm = vec_lvsl(0, pix); vector unsigned char perm = vec_lvsl(0, pix);
vector unsigned char t1; vector unsigned char t1;
vector unsigned int sad; vector unsigned int sad;
...@@ -450,11 +470,11 @@ static int pix_sum_altivec(uint8_t * pix, int line_size) ...@@ -450,11 +470,11 @@ static int pix_sum_altivec(uint8_t * pix, int line_size)
int i; int i;
int s; int s;
sad = (vector unsigned int)vec_splat_u32(0); sad = (vector unsigned int) vec_splat_u32(0);
for (i = 0; i < 16; i++) { for (i = 0; i < 16; i++) {
/* Read the potentially unaligned 16 pixels into t1. */ /* Read the potentially unaligned 16 pixels into t1. */
vector unsigned char pixl = vec_ld( 0, pix); vector unsigned char pixl = vec_ld(0, pix);
vector unsigned char pixr = vec_ld(15, pix); vector unsigned char pixr = vec_ld(15, pix);
t1 = vec_perm(pixl, pixr, perm); t1 = vec_perm(pixl, pixr, perm);
...@@ -472,30 +492,29 @@ static int pix_sum_altivec(uint8_t * pix, int line_size) ...@@ -472,30 +492,29 @@ static int pix_sum_altivec(uint8_t * pix, int line_size)
return s; return s;
} }
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels, int line_size) static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
int line_size)
{ {
int i; int i;
vector unsigned char perm = vec_lvsl(0, pixels); vector unsigned char perm = vec_lvsl(0, pixels);
vector unsigned char bytes; vector unsigned char bytes;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero =
(const vector unsigned char) vec_splat_u8(0);
vector signed short shorts; vector signed short shorts;
for (i = 0; i < 8; i++) { for (i = 0; i < 8; i++) {
/* Read potentially unaligned pixels. /* Read potentially unaligned pixels.
* We're reading 16 pixels, and actually only want 8, * We're reading 16 pixels, and actually only want 8,
* but we simply ignore the extras. */ * but we simply ignore the extras. */
// Read potentially unaligned pixels.
// We're reading 16 pixels, and actually only want 8,
// but we simply ignore the extras.
vector unsigned char pixl = vec_ld(0, pixels); vector unsigned char pixl = vec_ld(0, pixels);
vector unsigned char pixr = vec_ld(7, pixels); vector unsigned char pixr = vec_ld(7, pixels);
bytes = vec_perm(pixl, pixr, perm); bytes = vec_perm(pixl, pixr, perm);
// Convert the bytes into shorts. // Convert the bytes into shorts.
shorts = (vector signed short)vec_mergeh(zero, bytes); shorts = (vector signed short) vec_mergeh(zero, bytes);
// Save the data to the block, we assume the block is 16-byte aligned. // Save the data to the block, we assume the block is 16-byte aligned.
vec_st(shorts, i*16, (vector signed short*)block); vec_st(shorts, i * 16, (vector signed short *) block);
pixels += line_size; pixels += line_size;
} }
...@@ -508,33 +527,34 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, ...@@ -508,33 +527,34 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
vector unsigned char perm1 = vec_lvsl(0, s1); vector unsigned char perm1 = vec_lvsl(0, s1);
vector unsigned char perm2 = vec_lvsl(0, s2); vector unsigned char perm2 = vec_lvsl(0, s2);
vector unsigned char bytes, pixl, pixr; vector unsigned char bytes, pixl, pixr;
const vector unsigned char zero = (const vector unsigned char)vec_splat_u8(0); const vector unsigned char zero =
(const vector unsigned char) vec_splat_u8(0);
vector signed short shorts1, shorts2; vector signed short shorts1, shorts2;
for (i = 0; i < 4; i++) { for (i = 0; i < 4; i++) {
/* Read potentially unaligned pixels. /* Read potentially unaligned pixels.
* We're reading 16 pixels, and actually only want 8, * We're reading 16 pixels, and actually only want 8,
* but we simply ignore the extras. */ * but we simply ignore the extras. */
pixl = vec_ld( 0, s1); pixl = vec_ld(0, s1);
pixr = vec_ld(15, s1); pixr = vec_ld(15, s1);
bytes = vec_perm(pixl, pixr, perm1); bytes = vec_perm(pixl, pixr, perm1);
// Convert the bytes into shorts. // Convert the bytes into shorts.
shorts1 = (vector signed short)vec_mergeh(zero, bytes); shorts1 = (vector signed short) vec_mergeh(zero, bytes);
// Do the same for the second block of pixels. // Do the same for the second block of pixels.
pixl = vec_ld( 0, s2); pixl = vec_ld(0, s2);
pixr = vec_ld(15, s2); pixr = vec_ld(15, s2);
bytes = vec_perm(pixl, pixr, perm2); bytes = vec_perm(pixl, pixr, perm2);
// Convert the bytes into shorts. // Convert the bytes into shorts.
shorts2 = (vector signed short)vec_mergeh(zero, bytes); shorts2 = (vector signed short) vec_mergeh(zero, bytes);
// Do the subtraction. // Do the subtraction.
shorts1 = vec_sub(shorts1, shorts2); shorts1 = vec_sub(shorts1, shorts2);
// Save the data to the block, we assume the block is 16-byte aligned. // Save the data to the block, we assume the block is 16-byte aligned.
vec_st(shorts1, 0, (vector signed short*)block); vec_st(shorts1, 0, (vector signed short *) block);
s1 += stride; s1 += stride;
s2 += stride; s2 += stride;
...@@ -546,26 +566,26 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, ...@@ -546,26 +566,26 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
/* Read potentially unaligned pixels. /* Read potentially unaligned pixels.
* We're reading 16 pixels, and actually only want 8, * We're reading 16 pixels, and actually only want 8,
* but we simply ignore the extras. */ * but we simply ignore the extras. */
pixl = vec_ld( 0, s1); pixl = vec_ld(0, s1);
pixr = vec_ld(15, s1); pixr = vec_ld(15, s1);
bytes = vec_perm(pixl, pixr, perm1); bytes = vec_perm(pixl, pixr, perm1);
// Convert the bytes into shorts. // Convert the bytes into shorts.
shorts1 = (vector signed short)vec_mergeh(zero, bytes); shorts1 = (vector signed short) vec_mergeh(zero, bytes);
// Do the same for the second block of pixels. // Do the same for the second block of pixels.
pixl = vec_ld( 0, s2); pixl = vec_ld(0, s2);
pixr = vec_ld(15, s2); pixr = vec_ld(15, s2);
bytes = vec_perm(pixl, pixr, perm2); bytes = vec_perm(pixl, pixr, perm2);
// Convert the bytes into shorts. // Convert the bytes into shorts.
shorts2 = (vector signed short)vec_mergeh(zero, bytes); shorts2 = (vector signed short) vec_mergeh(zero, bytes);
// Do the subtraction. // Do the subtraction.
shorts1 = vec_sub(shorts1, shorts2); shorts1 = vec_sub(shorts1, shorts2);
// Save the data to the block, we assume the block is 16-byte aligned. // Save the data to the block, we assume the block is 16-byte aligned.
vec_st(shorts1, 0, (vector signed short*)block); vec_st(shorts1, 0, (vector signed short *) block);
s1 += stride; s1 += stride;
s2 += stride; s2 += stride;
...@@ -573,8 +593,8 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1, ...@@ -573,8 +593,8 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
} }
} }
static void clear_block_altivec(int16_t *block)
static void clear_block_altivec(int16_t *block) { {
LOAD_ZERO; LOAD_ZERO;
vec_st(zero_s16v, 0, block); vec_st(zero_s16v, 0, block);
vec_st(zero_s16v, 16, block); vec_st(zero_s16v, 16, block);
...@@ -586,46 +606,50 @@ static void clear_block_altivec(int16_t *block) { ...@@ -586,46 +606,50 @@ static void clear_block_altivec(int16_t *block) {
vec_st(zero_s16v, 112, block); vec_st(zero_s16v, 112, block);
} }
static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w)
static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) { {
register int i; register int i;
register vector unsigned char vdst, vsrc; register vector unsigned char vdst, vsrc;
/* dst and src are 16 bytes-aligned (guaranteed). */ /* dst and src are 16 bytes-aligned (guaranteed). */
for (i = 0 ; (i + 15) < w ; i+=16) { for (i = 0; (i + 15) < w; i += 16) {
vdst = vec_ld(i, (unsigned char*)dst); vdst = vec_ld(i, (unsigned char *) dst);
vsrc = vec_ld(i, (unsigned char*)src); vsrc = vec_ld(i, (unsigned char *) src);
vdst = vec_add(vsrc, vdst); vdst = vec_add(vsrc, vdst);
vec_st(vdst, i, (unsigned char*)dst); vec_st(vdst, i, (unsigned char *) dst);
} }
/* If w is not a multiple of 16. */ /* If w is not a multiple of 16. */
for (; (i < w) ; i++) { for (; (i < w); i++)
dst[i] = src[i]; dst[i] = src[i];
}
} }
static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ static int hadamard8_diff8x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst,
uint8_t *src, int stride, int h)
{
int sum; int sum;
register const vector unsigned char vzero = register const vector unsigned char vzero =
(const vector unsigned char)vec_splat_u8(0); (const vector unsigned char) vec_splat_u8(0);
register vector signed short temp0, temp1, temp2, temp3, temp4, register vector signed short temp0, temp1, temp2, temp3, temp4,
temp5, temp6, temp7; temp5, temp6, temp7;
{ {
register const vector signed short vprod1 =(const vector signed short) register const vector signed short vprod1 =
{ 1,-1, 1,-1, 1,-1, 1,-1 }; (const vector signed short) { 1, -1, 1, -1, 1, -1, 1, -1 };
register const vector signed short vprod2 =(const vector signed short) register const vector signed short vprod2 =
{ 1, 1,-1,-1, 1, 1,-1,-1 }; (const vector signed short) { 1, 1, -1, -1, 1, 1, -1, -1 };
register const vector signed short vprod3 =(const vector signed short) register const vector signed short vprod3 =
{ 1, 1, 1, 1,-1,-1,-1,-1 }; (const vector signed short) { 1, 1, 1, 1, -1, -1, -1, -1 };
register const vector unsigned char perm1 = (const vector unsigned char) register const vector unsigned char perm1 =
{0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, (const vector unsigned char)
0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D}; { 0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
register const vector unsigned char perm2 = (const vector unsigned char) 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D };
{0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, register const vector unsigned char perm2 =
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B}; (const vector unsigned char)
register const vector unsigned char perm3 = (const vector unsigned char) { 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
{0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B };
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; register const vector unsigned char perm3 =
(const vector unsigned char)
{ 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
#define ONEITERBUTTERFLY(i, res) \ #define ONEITERBUTTERFLY(i, res) \
{ \ { \
...@@ -641,10 +665,10 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u ...@@ -641,10 +665,10 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* Promote the unsigned chars to signed shorts. */ \ /* Promote the unsigned chars to signed shorts. */ \
/* We're in the 8x8 function, we only care for the first 8. */ \ /* We're in the 8x8 function, we only care for the first 8. */ \
srcV = (vector signed short)vec_mergeh((vector signed char)vzero, \ srcV = (vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char)srcO); \ (vector signed char) srcO); \
dstV = (vector signed short)vec_mergeh((vector signed char)vzero, \ dstV = (vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char)dstO); \ (vector signed char) dstO); \
/* subtractions inside the first butterfly */ \ /* subtractions inside the first butterfly */ \
but0 = vec_sub(srcV, dstV); \ but0 = vec_sub(srcV, dstV); \
op1 = vec_perm(but0, but0, perm1); \ op1 = vec_perm(but0, but0, perm1); \
...@@ -701,7 +725,7 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u ...@@ -701,7 +725,7 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u
vsum = vec_sum4s(vec_abs(line5C), vsum); vsum = vec_sum4s(vec_abs(line5C), vsum);
vsum = vec_sum4s(vec_abs(line6C), vsum); vsum = vec_sum4s(vec_abs(line6C), vsum);
vsum = vec_sum4s(vec_abs(line7C), vsum); vsum = vec_sum4s(vec_abs(line7C), vsum);
vsum = vec_sums(vsum, (vector signed int)vzero); vsum = vec_sums(vsum, (vector signed int) vzero);
vsum = vec_splat(vsum, 3); vsum = vec_splat(vsum, 3);
vec_ste(vsum, 0, &sum); vec_ste(vsum, 0, &sum);
} }
...@@ -726,7 +750,9 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u ...@@ -726,7 +750,9 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u
* On the 970, the hand-made RA is still a win (around 690 vs. around 780), * On the 970, the hand-made RA is still a win (around 690 vs. around 780),
* but xlc goes to around 660 on the regular C code... * but xlc goes to around 660 on the regular C code...
*/ */
static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h) { static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst,
uint8_t *src, int stride, int h)
{
int sum; int sum;
register vector signed short register vector signed short
temp0 __asm__ ("v0"), temp0 __asm__ ("v0"),
...@@ -747,37 +773,44 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ...@@ -747,37 +773,44 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
temp6S __asm__ ("v14"), temp6S __asm__ ("v14"),
temp7S __asm__ ("v15"); temp7S __asm__ ("v15");
register const vector unsigned char vzero __asm__ ("v31") = register const vector unsigned char vzero __asm__ ("v31") =
(const vector unsigned char)vec_splat_u8(0); (const vector unsigned char) vec_splat_u8(0);
{ {
register const vector signed short vprod1 __asm__ ("v16") = register const vector signed short vprod1 __asm__ ("v16") =
(const vector signed short){ 1,-1, 1,-1, 1,-1, 1,-1 }; (const vector signed short) { 1, -1, 1, -1, 1, -1, 1, -1 };
register const vector signed short vprod2 __asm__ ("v17") = register const vector signed short vprod2 __asm__ ("v17") =
(const vector signed short){ 1, 1,-1,-1, 1, 1,-1,-1 }; (const vector signed short) { 1, 1, -1, -1, 1, 1, -1, -1 };
register const vector signed short vprod3 __asm__ ("v18") = register const vector signed short vprod3 __asm__ ("v18") =
(const vector signed short){ 1, 1, 1, 1,-1,-1,-1,-1 }; (const vector signed short) { 1, 1, 1, 1, -1, -1, -1, -1 };
register const vector unsigned char perm1 __asm__ ("v19") = register const vector unsigned char perm1 __asm__ ("v19") =
(const vector unsigned char) (const vector unsigned char)
{0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05, { 0x02, 0x03, 0x00, 0x01, 0x06, 0x07, 0x04, 0x05,
0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D}; 0x0A, 0x0B, 0x08, 0x09, 0x0E, 0x0F, 0x0C, 0x0D };
register const vector unsigned char perm2 __asm__ ("v20") = register const vector unsigned char perm2 __asm__ ("v20") =
(const vector unsigned char) (const vector unsigned char)
{0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03, { 0x04, 0x05, 0x06, 0x07, 0x00, 0x01, 0x02, 0x03,
0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B}; 0x0C, 0x0D, 0x0E, 0x0F, 0x08, 0x09, 0x0A, 0x0B };
register const vector unsigned char perm3 __asm__ ("v21") = register const vector unsigned char perm3 __asm__ ("v21") =
(const vector unsigned char) (const vector unsigned char)
{0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07}; 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 };
#define ONEITERBUTTERFLY(i, res1, res2) \ #define ONEITERBUTTERFLY(i, res1, res2) \
{ \ { \
register vector unsigned char src1 __asm__ ("v22"), \ register vector unsigned char \
src1 __asm__ ("v22"), \
src2 __asm__ ("v23"), \ src2 __asm__ ("v23"), \
dst1 __asm__ ("v24"), \ dst1 __asm__ ("v24"), \
dst2 __asm__ ("v25"), \ dst2 __asm__ ("v25"), \
srcO __asm__ ("v22"), \ srcO __asm__ ("v22"), \
dstO __asm__ ("v23"); \ dstO __asm__ ("v23"); \
\ \
register vector signed short srcV __asm__ ("v24"), \ register vector signed short \
srcV __asm__ ("v24"), \
dstV __asm__ ("v25"), \ dstV __asm__ ("v25"), \
srcW __asm__ ("v26"), \ srcW __asm__ ("v26"), \
dstW __asm__ ("v27"), \ dstW __asm__ ("v27"), \
...@@ -801,14 +834,14 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ...@@ -801,14 +834,14 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
dst2 = vec_ld((stride * i) + 16, dst); \ dst2 = vec_ld((stride * i) + 16, dst); \
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \ dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* Promote the unsigned chars to signed shorts. */ \ /* Promote the unsigned chars to signed shorts. */ \
srcV = (vector signed short)vec_mergeh((vector signed char)vzero, \ srcV = (vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char)srcO); \ (vector signed char) srcO); \
dstV = (vector signed short)vec_mergeh((vector signed char)vzero, \ dstV = (vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char)dstO); \ (vector signed char) dstO); \
srcW = (vector signed short)vec_mergel((vector signed char)vzero, \ srcW = (vector signed short) vec_mergel((vector signed char) vzero, \
(vector signed char)srcO); \ (vector signed char) srcO); \
dstW = (vector signed short)vec_mergel((vector signed char)vzero, \ dstW = (vector signed short) vec_mergel((vector signed char) vzero, \
(vector signed char)dstO); \ (vector signed char) dstO); \
/* subtractions inside the first butterfly */ \ /* subtractions inside the first butterfly */ \
but0 = vec_sub(srcV, dstV); \ but0 = vec_sub(srcV, dstV); \
but0S = vec_sub(srcW, dstW); \ but0S = vec_sub(srcW, dstW); \
...@@ -838,10 +871,10 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ...@@ -838,10 +871,10 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
{ {
register vector signed int vsum; register vector signed int vsum;
register vector signed short line0S, line1S, line2S, line3S, line4S, register vector signed short line0S, line1S, line2S, line3S, line4S,
line5S, line6S, line7S, line0BS,line2BS, line5S, line6S, line7S, line0BS, line2BS,
line1BS,line3BS,line4BS,line6BS,line5BS, line1BS, line3BS, line4BS, line6BS, line5BS,
line7BS,line0CS,line4CS,line1CS,line5CS, line7BS, line0CS, line4CS, line1CS, line5CS,
line2CS,line6CS,line3CS,line7CS; line2CS, line6CS, line3CS, line7CS;
register vector signed short line0 = vec_add(temp0, temp1); register vector signed short line0 = vec_add(temp0, temp1);
register vector signed short line1 = vec_sub(temp0, temp1); register vector signed short line1 = vec_sub(temp0, temp1);
...@@ -914,19 +947,21 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ...@@ -914,19 +947,21 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
vsum = vec_sum4s(vec_abs(line5CS), vsum); vsum = vec_sum4s(vec_abs(line5CS), vsum);
vsum = vec_sum4s(vec_abs(line6CS), vsum); vsum = vec_sum4s(vec_abs(line6CS), vsum);
vsum = vec_sum4s(vec_abs(line7CS), vsum); vsum = vec_sum4s(vec_abs(line7CS), vsum);
vsum = vec_sums(vsum, (vector signed int)vzero); vsum = vec_sums(vsum, (vector signed int) vzero);
vsum = vec_splat(vsum, 3); vsum = vec_splat(vsum, 3);
vec_ste(vsum, 0, &sum); vec_ste(vsum, 0, &sum);
} }
return sum; return sum;
} }
static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){ static int hadamard8_diff16_altivec(/* MpegEncContext */ void *s, uint8_t *dst,
uint8_t *src, int stride, int h)
{
int score; int score;
score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8); score = hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
if (h==16) { if (h == 16) {
dst += 8*stride; dst += 8 * stride;
src += 8*stride; src += 8 * stride;
score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8); score += hadamard8_diff16x8_altivec(s, dst, src, stride, 8);
} }
return score; return score;
...@@ -941,14 +976,18 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx) ...@@ -941,14 +976,18 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx)
c->pix_abs[0][3] = sad16_xy2_altivec; c->pix_abs[0][3] = sad16_xy2_altivec;
c->pix_abs[0][0] = sad16_altivec; c->pix_abs[0][0] = sad16_altivec;
c->pix_abs[1][0] = sad8_altivec; c->pix_abs[1][0] = sad8_altivec;
c->sad[0]= sad16_altivec;
c->sad[1]= sad8_altivec; c->sad[0] = sad16_altivec;
c->sad[1] = sad8_altivec;
c->sse[0] = sse16_altivec;
c->sse[1] = sse8_altivec;
c->pix_norm1 = pix_norm1_altivec; c->pix_norm1 = pix_norm1_altivec;
c->sse[1]= sse8_altivec;
c->sse[0]= sse16_altivec;
c->pix_sum = pix_sum_altivec; c->pix_sum = pix_sum_altivec;
c->diff_pixels = diff_pixels_altivec; c->diff_pixels = diff_pixels_altivec;
c->add_bytes= add_bytes_altivec; c->add_bytes = add_bytes_altivec;
if (!high_bit_depth) { if (!high_bit_depth) {
c->get_pixels = get_pixels_altivec; c->get_pixels = get_pixels_altivec;
c->clear_block = clear_block_altivec; c->clear_block = clear_block_altivec;
......
...@@ -24,11 +24,13 @@ ...@@ -24,11 +24,13 @@
#define AVCODEC_PPC_DSPUTIL_ALTIVEC_H #define AVCODEC_PPC_DSPUTIL_ALTIVEC_H
#include <stdint.h> #include <stdint.h>
#include "libavcodec/dsputil.h"
void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); #include "libavcodec/dsputil.h"
void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h); void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_fdct_altivec(int16_t *block); void ff_fdct_altivec(int16_t *block);
void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
...@@ -36,7 +38,7 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h, ...@@ -36,7 +38,7 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); void ff_idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
void ff_dsputil_init_altivec(DSPContext* c, AVCodecContext *avctx); void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx);
void ff_int_init_altivec(DSPContext* c, AVCodecContext *avctx); void ff_int_init_altivec(DSPContext *c, AVCodecContext *avctx);
#endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */ #endif /* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */
...@@ -51,23 +51,23 @@ ...@@ -51,23 +51,23 @@
*/ */
static void clear_blocks_dcbz32_ppc(int16_t *blocks) static void clear_blocks_dcbz32_ppc(int16_t *blocks)
{ {
register int misal = ((unsigned long)blocks & 0x00000010); register int misal = ((unsigned long) blocks & 0x00000010);
register int i = 0; register int i = 0;
if (misal) { if (misal) {
((unsigned long*)blocks)[0] = 0L; ((unsigned long *) blocks)[0] = 0L;
((unsigned long*)blocks)[1] = 0L; ((unsigned long *) blocks)[1] = 0L;
((unsigned long*)blocks)[2] = 0L; ((unsigned long *) blocks)[2] = 0L;
((unsigned long*)blocks)[3] = 0L; ((unsigned long *) blocks)[3] = 0L;
i += 16; i += 16;
} }
for ( ; i < sizeof(int16_t)*6*64-31 ; i += 32) { for (; i < sizeof(int16_t) * 6 * 64 - 31; i += 32)
__asm__ volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory"); __asm__ volatile ("dcbz %0,%1" :: "b" (blocks), "r" (i) : "memory");
}
if (misal) { if (misal) {
((unsigned long*)blocks)[188] = 0L; ((unsigned long *) blocks)[188] = 0L;
((unsigned long*)blocks)[189] = 0L; ((unsigned long *) blocks)[189] = 0L;
((unsigned long*)blocks)[190] = 0L; ((unsigned long *) blocks)[190] = 0L;
((unsigned long*)blocks)[191] = 0L; ((unsigned long *) blocks)[191] = 0L;
i += 16; i += 16;
} }
} }
...@@ -77,23 +77,23 @@ static void clear_blocks_dcbz32_ppc(int16_t *blocks) ...@@ -77,23 +77,23 @@ static void clear_blocks_dcbz32_ppc(int16_t *blocks)
#if HAVE_DCBZL #if HAVE_DCBZL
static void clear_blocks_dcbz128_ppc(int16_t *blocks) static void clear_blocks_dcbz128_ppc(int16_t *blocks)
{ {
register int misal = ((unsigned long)blocks & 0x0000007f); register int misal = ((unsigned long) blocks & 0x0000007f);
register int i = 0; register int i = 0;
if (misal) { if (misal) {
/* We could probably also optimize this case, /* We could probably also optimize this case,
* but there's not much point as the machines * but there's not much point as the machines
* aren't available yet (2003-06-26). */ * aren't available yet (2003-06-26). */
memset(blocks, 0, sizeof(int16_t)*6*64); memset(blocks, 0, sizeof(int16_t) * 6 * 64);
} } else {
else for (; i < sizeof(int16_t) * 6 * 64; i += 128)
for ( ; i < sizeof(int16_t)*6*64 ; i += 128) { __asm__ volatile ("dcbzl %0,%1" :: "b" (blocks), "r" (i) : "memory");
__asm__ volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
} }
} }
#else #else
static void clear_blocks_dcbz128_ppc(int16_t *blocks) static void clear_blocks_dcbz128_ppc(int16_t *blocks)
{ {
memset(blocks, 0, sizeof(int16_t)*6*64); memset(blocks, 0, sizeof(int16_t) * 6 * 64);
} }
#endif #endif
...@@ -110,9 +110,8 @@ static long check_dcbzl_effect(void) ...@@ -110,9 +110,8 @@ static long check_dcbzl_effect(void)
register long i = 0; register long i = 0;
long count = 0; long count = 0;
if (!fakedata) { if (!fakedata)
return 0L; return 0L;
}
fakedata_middle = (fakedata + 512); fakedata_middle = (fakedata + 512);
...@@ -120,12 +119,11 @@ static long check_dcbzl_effect(void) ...@@ -120,12 +119,11 @@ static long check_dcbzl_effect(void)
/* Below the constraint "b" seems to mean "address base register" /* Below the constraint "b" seems to mean "address base register"
* in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */ * in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */
__asm__ volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero)); __asm__ volatile ("dcbzl %0, %1" :: "b" (fakedata_middle), "r" (zero));
for (i = 0; i < 1024 ; i ++) { for (i = 0; i < 1024; i++)
if (fakedata[i] == (char)0) if (fakedata[i] == (char) 0)
count++; count++;
}
av_free(fakedata); av_free(fakedata);
...@@ -177,6 +175,5 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx) ...@@ -177,6 +175,5 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx)
c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
} }
} }
} }
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment