Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
b7d24fd4
Commit
b7d24fd4
authored
Jan 15, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ppc: dsputil: Merge some declarations and initializations
parent
b045283f
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
219 additions
and
270 deletions
+219
-270
dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c
+181
-222
dsputil_ppc.c
libavcodec/ppc/dsputil_ppc.c
+3
-6
fdct_altivec.c
libavcodec/ppc/fdct_altivec.c
+1
-2
gmc_altivec.c
libavcodec/ppc/gmc_altivec.c
+15
-16
idct_altivec.c
libavcodec/ppc/idct_altivec.c
+17
-20
int_altivec.c
libavcodec/ppc/int_altivec.c
+2
-4
No files found.
libavcodec/ppc/dsputil_altivec.c
View file @
b7d24fd4
...
...
@@ -35,34 +35,30 @@
static
int
sad16_x2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
=
0
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
perm2
=
vec_add
(
perm1
,
vec_splat_u8
(
1
));
vector
unsigned
char
pix2l
,
pix2r
;
vector
unsigned
char
pix1v
,
pix2v
,
pix2iv
,
avgv
,
t5
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
s
=
0
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read unaligned pixels into our vectors. The vectors are as follows:
* pix1v: pix1[0] - pix1[15]
* pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */
pix1v
=
vec_ld
(
0
,
pix1
);
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
16
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
vector
unsigned
char
pix1v
=
vec_ld
(
0
,
pix1
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
16
,
pix2
);
vector
unsigned
char
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
vector
unsigned
char
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
/* Calculate the average vector. */
avgv
=
vec_avg
(
pix2v
,
pix2iv
);
vector
unsigned
char
avgv
=
vec_avg
(
pix2v
,
pix2iv
);
/* Calculate a sum of abs differences vector. */
t5
=
vec_sub
(
vec_max
(
pix1v
,
avgv
),
vec_min
(
pix1v
,
avgv
));
vector
unsigned
char
t5
=
vec_sub
(
vec_max
(
pix1v
,
avgv
),
vec_min
(
pix1v
,
avgv
));
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
...
...
@@ -81,20 +77,15 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sad16_y2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
=
0
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
pix2l
,
pix2r
;
vector
unsigned
char
pix1v
,
pix2v
,
pix3v
,
avgv
,
t5
;
vector
unsigned
int
sad
;
vector
unsigned
char
pix1v
,
pix3v
,
avgv
,
t5
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
uint8_t
*
pix3
=
pix2
+
line_size
;
s
=
0
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
/* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
* iteration becomes pix2 in the next iteration. We can use this
* fact to avoid a potentially expensive unaligned read, each
...
...
@@ -102,9 +93,9 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
* Read unaligned pixels into our vectors. The vectors are as follows:
* pix2v: pix2[0] - pix2[15]
* Split the pixel vectors into shorts. */
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
15
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
15
,
pix2
);
vector
unsigned
char
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read unaligned pixels into our vectors. The vectors are as follows:
...
...
@@ -140,8 +131,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sad16_xy2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
=
0
;
uint8_t
*
pix3
=
pix2
+
line_size
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
...
...
@@ -150,19 +140,12 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
vector
unsigned
char
avgv
,
t5
;
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
perm2
=
vec_add
(
perm1
,
vec_splat_u8
(
1
));
vector
unsigned
char
pix2l
,
pix2r
;
vector
unsigned
char
pix1v
,
pix2v
,
pix3v
,
pix2iv
,
pix3iv
;
vector
unsigned
short
pix2lv
,
pix2hv
,
pix2ilv
,
pix2ihv
;
vector
unsigned
char
pix1v
,
pix3v
,
pix3iv
;
vector
unsigned
short
pix3lv
,
pix3hv
,
pix3ilv
,
pix3ihv
;
vector
unsigned
short
avghv
,
avglv
;
vector
unsigned
short
t1
,
t2
,
t3
,
t4
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
s
=
0
;
/* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
* iteration becomes pix2 in the next iteration. We can use this
* fact to avoid a potentially expensive unaligned read, as well
...
...
@@ -170,17 +153,22 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
* Read unaligned pixels into our vectors. The vectors are as follows:
* pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16]
* Split the pixel vectors into shorts. */
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
16
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
pix2hv
=
(
vector
unsigned
short
)
vec_mergeh
(
zero
,
pix2v
);
pix2lv
=
(
vector
unsigned
short
)
vec_mergel
(
zero
,
pix2v
);
pix2ihv
=
(
vector
unsigned
short
)
vec_mergeh
(
zero
,
pix2iv
);
pix2ilv
=
(
vector
unsigned
short
)
vec_mergel
(
zero
,
pix2iv
);
t1
=
vec_add
(
pix2hv
,
pix2ihv
);
t2
=
vec_add
(
pix2lv
,
pix2ilv
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
16
,
pix2
);
vector
unsigned
char
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
vector
unsigned
char
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
vector
unsigned
short
pix2hv
=
(
vector
unsigned
short
)
vec_mergeh
(
zero
,
pix2v
);
vector
unsigned
short
pix2lv
=
(
vector
unsigned
short
)
vec_mergel
(
zero
,
pix2v
);
vector
unsigned
short
pix2ihv
=
(
vector
unsigned
short
)
vec_mergeh
(
zero
,
pix2iv
);
vector
unsigned
short
pix2ilv
=
(
vector
unsigned
short
)
vec_mergel
(
zero
,
pix2iv
);
vector
unsigned
short
t1
=
vec_add
(
pix2hv
,
pix2ihv
);
vector
unsigned
short
t2
=
vec_add
(
pix2lv
,
pix2ilv
);
vector
unsigned
short
t3
,
t4
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read unaligned pixels into our vectors. The vectors are as follows:
...
...
@@ -238,28 +226,24 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sad16_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2. */
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
15
,
pix2
);
t1
=
vec_ld
(
0
,
pix1
);
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
vector
unsigned
char
t1
=
vec_ld
(
0
,
pix1
);
vector
unsigned
char
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
/* Calculate a sum of abs differences vector. */
t3
=
vec_max
(
t1
,
t2
);
t4
=
vec_min
(
t1
,
t2
);
t5
=
vec_sub
(
t3
,
t4
);
vector
unsigned
char
t3
=
vec_max
(
t1
,
t2
);
vector
unsigned
char
t4
=
vec_min
(
t1
,
t2
);
vector
unsigned
char
t5
=
vec_sub
(
t3
,
t4
);
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
...
...
@@ -279,8 +263,7 @@ static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sad8_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
char
permclear
=
...
...
@@ -288,12 +271,9 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
{
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2.
* Since we're reading 16 pixels, and actually only want 8,
...
...
@@ -302,13 +282,15 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
vector
unsigned
char
pix1r
=
vec_ld
(
7
,
pix1
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
7
,
pix2
);
t1
=
vec_and
(
vec_perm
(
pix1l
,
pix1r
,
perm1
),
permclear
);
t2
=
vec_and
(
vec_perm
(
pix2l
,
pix2r
,
perm2
),
permclear
);
vector
unsigned
char
t1
=
vec_and
(
vec_perm
(
pix1l
,
pix1r
,
perm1
),
permclear
);
vector
unsigned
char
t2
=
vec_and
(
vec_perm
(
pix2l
,
pix2r
,
perm2
),
permclear
);
/* Calculate a sum of abs differences vector. */
t3
=
vec_max
(
t1
,
t2
);
t4
=
vec_min
(
t1
,
t2
);
t5
=
vec_sub
(
t3
,
t4
);
vector
unsigned
char
t3
=
vec_max
(
t1
,
t2
);
vector
unsigned
char
t4
=
vec_min
(
t1
,
t2
);
vector
unsigned
char
t5
=
vec_sub
(
t3
,
t4
);
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t5
,
sad
);
...
...
@@ -327,23 +309,18 @@ static int sad8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
pix_norm1_altivec
(
uint8_t
*
pix
,
int
line_size
)
{
int
i
;
int
s
;
int
i
,
s
=
0
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix
);
vector
unsigned
char
pixv
;
vector
unsigned
int
sv
;
vector
unsigned
int
sv
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sum
;
sv
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
s
=
0
;
for
(
i
=
0
;
i
<
16
;
i
++
)
{
/* Read the potentially unaligned pixels. */
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pix
);
vector
unsigned
char
pixr
=
vec_ld
(
15
,
pix
);
pixv
=
vec_perm
(
pixl
,
pixr
,
perm
);
vector
unsigned
char
pixv
=
vec_perm
(
pixl
,
pixr
,
perm
);
/* Square the values, and add them to our sum. */
sv
=
vec_msum
(
pixv
,
pixv
,
sv
);
...
...
@@ -363,8 +340,7 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size)
static
int
sse8_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
char
permclear
=
...
...
@@ -372,12 +348,9 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
{
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sum
;
vector
unsigned
int
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumsqr
;
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2.
* Since we're reading 16 pixels, and actually only want 8,
...
...
@@ -386,16 +359,18 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
vector
unsigned
char
pix1r
=
vec_ld
(
7
,
pix1
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
7
,
pix2
);
t1
=
vec_and
(
vec_perm
(
pix1l
,
pix1r
,
perm1
),
permclear
);
t2
=
vec_and
(
vec_perm
(
pix2l
,
pix2r
,
perm2
),
permclear
);
vector
unsigned
char
t1
=
vec_and
(
vec_perm
(
pix1l
,
pix1r
,
perm1
),
permclear
);
vector
unsigned
char
t2
=
vec_and
(
vec_perm
(
pix2l
,
pix2r
,
perm2
),
permclear
);
/* Since we want to use unsigned chars, we can take advantage
* of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */
/* Calculate abs differences vector. */
t3
=
vec_max
(
t1
,
t2
);
t4
=
vec_min
(
t1
,
t2
);
t5
=
vec_sub
(
t3
,
t4
);
vector
unsigned
char
t3
=
vec_max
(
t1
,
t2
);
vector
unsigned
char
t4
=
vec_min
(
t1
,
t2
);
vector
unsigned
char
t5
=
vec_sub
(
t3
,
t4
);
/* Square the values and add them to our sum. */
sum
=
vec_msum
(
t5
,
t5
,
sum
);
...
...
@@ -417,31 +392,27 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
sse16_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sum
;
vector
unsigned
int
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumsqr
;
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2. */
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
15
,
pix2
);
t1
=
vec_ld
(
0
,
pix1
);
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
vector
unsigned
char
t1
=
vec_ld
(
0
,
pix1
);
vector
unsigned
char
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
/* Since we want to use unsigned chars, we can take advantage
* of the fact that abs(a - b) ^ 2 = (a - b) ^ 2. */
/* Calculate abs differences vector. */
t3
=
vec_max
(
t1
,
t2
);
t4
=
vec_min
(
t1
,
t2
);
t5
=
vec_sub
(
t3
,
t4
);
vector
unsigned
char
t3
=
vec_max
(
t1
,
t2
);
vector
unsigned
char
t4
=
vec_min
(
t1
,
t2
);
vector
unsigned
char
t5
=
vec_sub
(
t3
,
t4
);
/* Square the values and add them to our sum. */
sum
=
vec_msum
(
t5
,
t5
,
sum
);
...
...
@@ -460,23 +431,18 @@ static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2,
static
int
pix_sum_altivec
(
uint8_t
*
pix
,
int
line_size
)
{
int
i
,
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix
);
vector
unsigned
char
t1
;
vector
unsigned
int
sad
;
vector
unsigned
int
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
signed
int
sumdiffs
;
int
i
;
int
s
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
16
;
i
++
)
{
/* Read the potentially unaligned 16 pixels into t1. */
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pix
);
vector
unsigned
char
pixr
=
vec_ld
(
15
,
pix
);
t1
=
vec_perm
(
pixl
,
pixr
,
perm
);
vector
unsigned
char
t1
=
vec_perm
(
pixl
,
pixr
,
perm
);
/* Add each 4 pixel group together and put 4 results into sad. */
sad
=
vec_sum4s
(
t1
,
sad
);
...
...
@@ -497,10 +463,8 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
{
int
i
;
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pixels
);
vector
unsigned
char
bytes
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
signed
short
shorts
;
for
(
i
=
0
;
i
<
8
;
i
++
)
{
/* Read potentially unaligned pixels.
...
...
@@ -508,10 +472,11 @@ static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
* but we simply ignore the extras. */
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pixels
);
vector
unsigned
char
pixr
=
vec_ld
(
7
,
pixels
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm
);
vector
unsigned
char
bytes
=
vec_perm
(
pixl
,
pixr
,
perm
);
// Convert the bytes into shorts.
shorts
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
vector
signed
short
shorts
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
// Save the data to the block, we assume the block is 16-byte aligned.
vec_st
(
shorts
,
i
*
16
,
(
vector
signed
short
*
)
block
);
...
...
@@ -526,7 +491,6 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
int
i
;
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
s1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
s2
);
vector
unsigned
char
bytes
,
pixl
,
pixr
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
signed
short
shorts1
,
shorts2
;
...
...
@@ -535,9 +499,9 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
/* Read potentially unaligned pixels.
* We're reading 16 pixels, and actually only want 8,
* but we simply ignore the extras. */
pixl
=
vec_ld
(
0
,
s1
);
pixr
=
vec_ld
(
15
,
s1
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm1
);
vector
unsigned
char
pixl
=
vec_ld
(
0
,
s1
);
vector
unsigned
char
pixr
=
vec_ld
(
15
,
s1
);
vector
unsigned
char
bytes
=
vec_perm
(
pixl
,
pixr
,
perm1
);
// Convert the bytes into shorts.
shorts1
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
...
...
@@ -653,29 +617,31 @@ static int hadamard8_diff8x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst,
#define ONEITERBUTTERFLY(i, res) \
{ \
register vector unsigned char src1, src2, srcO; \
register vector unsigned char dst1, dst2, dstO; \
register vector signed short srcV, dstV; \
register vector signed short but0, but1, but2, op1, op2, op3; \
src1 = vec_ld(stride * i, src); \
src2 = vec_ld(stride * i + 15, src); \
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
dst1 = vec_ld(stride * i, dst); \
dst2 = vec_ld(stride * i + 15, dst); \
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
register vector unsigned char src1 = vec_ld(stride * i, src); \
register vector unsigned char src2 = vec_ld(stride * i + 15, src); \
register vector unsigned char srcO = \
vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
register vector unsigned char dst1 = vec_ld(stride * i, dst); \
register vector unsigned char dst2 = vec_ld(stride * i + 15, dst); \
register vector unsigned char dstO = \
vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
\
/* Promote the unsigned chars to signed shorts. */
\
/* We're in the 8x8 function, we only care for the first 8. */
\
srcV = (vector signed short) vec_mergeh((vector signed char) vzero, \
register vector signed short srcV = \
(vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) srcO); \
dstV = (vector signed short) vec_mergeh((vector signed char) vzero, \
register vector signed short dstV = \
(vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) dstO); \
\
/* subtractions inside the first butterfly */
\
but0 = vec_sub(srcV, dstV);
\
op1 = vec_perm(but0, but0, perm1);
\
but1 = vec_mladd(but0, vprod1, op1);
\
op2 = vec_perm(but1, but1, perm2);
\
but2 = vec_mladd(but1, vprod2, op2);
\
op3 = vec_perm(but2, but2, perm3);
\
register vector signed short but0 = vec_sub(srcV, dstV);
\
register vector signed short op1 = vec_perm(but0, but0, perm1);
\
register vector signed short but1 = vec_mladd(but0, vprod1, op1);
\
register vector signed short op2 = vec_perm(but1, but1, perm2);
\
register vector signed short but2 = vec_mladd(but1, vprod2, op2);
\
register vector signed short op3 = vec_perm(but2, but2, perm3);
\
res = vec_mladd(but2, vprod3, op3); \
}
ONEITERBUTTERFLY
(
0
,
temp0
);
...
...
@@ -801,61 +767,59 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
#define ONEITERBUTTERFLY(i, res1, res2) \
{ \
register vector unsigned char \
src1 __asm__ ("v22"), \
src2 __asm__ ("v23"), \
dst1 __asm__ ("v24"), \
dst2 __asm__ ("v25"), \
srcO __asm__ ("v22"), \
dstO __asm__ ("v23"); \
\
register vector signed short \
srcV __asm__ ("v24"), \
dstV __asm__ ("v25"), \
srcW __asm__ ("v26"), \
dstW __asm__ ("v27"), \
but0 __asm__ ("v28"), \
but0S __asm__ ("v29"), \
op1 __asm__ ("v30"), \
but1 __asm__ ("v22"), \
op1S __asm__ ("v23"), \
but1S __asm__ ("v24"), \
op2 __asm__ ("v25"), \
but2 __asm__ ("v26"), \
op2S __asm__ ("v27"), \
but2S __asm__ ("v28"), \
op3 __asm__ ("v29"), \
op3S __asm__ ("v30"); \
register vector unsigned char src1 __asm__ ("v22") = \
vec_ld(stride * i, src); \
register vector unsigned char src2 __asm__ ("v23") = \
vec_ld(stride * i + 16, src); \
register vector unsigned char srcO __asm__ ("v22") = \
vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
register vector unsigned char dst1 __asm__ ("v24") = \
vec_ld(stride * i, dst); \
register vector unsigned char dst2 __asm__ ("v25") = \
vec_ld(stride * i + 16, dst); \
register vector unsigned char dstO __asm__ ("v23") = \
vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
\
src1 = vec_ld(stride * i, src); \
src2 = vec_ld(stride * i + 16, src); \
srcO = vec_perm(src1, src2, vec_lvsl(stride * i, src)); \
dst1 = vec_ld(stride * i, dst); \
dst2 = vec_ld(stride * i + 16, dst); \
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* Promote the unsigned chars to signed shorts. */
\
srcV = (vector signed short) vec_mergeh((vector signed char) vzero, \
register vector signed short srcV __asm__ ("v24") = \
(vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) srcO); \
dstV = (vector signed short) vec_mergeh((vector signed char) vzero, \
register vector signed short dstV __asm__ ("v25") = \
(vector signed short) vec_mergeh((vector signed char) vzero, \
(vector signed char) dstO); \
srcW = (vector signed short) vec_mergel((vector signed char) vzero, \
register vector signed short srcW __asm__ ("v26") = \
(vector signed short) vec_mergel((vector signed char) vzero, \
(vector signed char) srcO); \
dstW = (vector signed short) vec_mergel((vector signed char) vzero, \
register vector signed short dstW __asm__ ("v27") = \
(vector signed short) vec_mergel((vector signed char) vzero, \
(vector signed char) dstO); \
\
/* subtractions inside the first butterfly */
\
but0 = vec_sub(srcV, dstV); \
but0S = vec_sub(srcW, dstW); \
op1 = vec_perm(but0, but0, perm1); \
but1 = vec_mladd(but0, vprod1, op1); \
op1S = vec_perm(but0S, but0S, perm1); \
but1S = vec_mladd(but0S, vprod1, op1S); \
op2 = vec_perm(but1, but1, perm2); \
but2 = vec_mladd(but1, vprod2, op2); \
op2S = vec_perm(but1S, but1S, perm2); \
but2S = vec_mladd(but1S, vprod2, op2S); \
op3 = vec_perm(but2, but2, perm3); \
register vector signed short but0 __asm__ ("v28") = \
vec_sub(srcV, dstV); \
register vector signed short but0S __asm__ ("v29") = \
vec_sub(srcW, dstW); \
register vector signed short op1 __asm__ ("v30") = \
vec_perm(but0, but0, perm1); \
register vector signed short but1 __asm__ ("v22") = \
vec_mladd(but0, vprod1, op1); \
register vector signed short op1S __asm__ ("v23") = \
vec_perm(but0S, but0S, perm1); \
register vector signed short but1S __asm__ ("v24") = \
vec_mladd(but0S, vprod1, op1S); \
register vector signed short op2 __asm__ ("v25") = \
vec_perm(but1, but1, perm2); \
register vector signed short but2 __asm__ ("v26") = \
vec_mladd(but1, vprod2, op2); \
register vector signed short op2S __asm__ ("v27") = \
vec_perm(but1S, but1S, perm2); \
register vector signed short but2S __asm__ ("v28") = \
vec_mladd(but1S, vprod2, op2S); \
register vector signed short op3 __asm__ ("v29") = \
vec_perm(but2, but2, perm3); \
register vector signed short op3S __asm__ ("v30") = \
vec_perm(but2S, but2S, perm3); \
res1 = vec_mladd(but2, vprod3, op3); \
op3S = vec_perm(but2S, but2S, perm3); \
res2 = vec_mladd(but2S, vprod3, op3S); \
}
ONEITERBUTTERFLY
(
0
,
temp0
,
temp0S
);
...
...
@@ -870,11 +834,6 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
#undef ONEITERBUTTERFLY
{
register
vector
signed
int
vsum
;
register
vector
signed
short
line0S
,
line1S
,
line2S
,
line3S
,
line4S
,
line5S
,
line6S
,
line7S
,
line0BS
,
line2BS
,
line1BS
,
line3BS
,
line4BS
,
line6BS
,
line5BS
,
line7BS
,
line0CS
,
line4CS
,
line1CS
,
line5CS
,
line2CS
,
line6CS
,
line3CS
,
line7CS
;
register
vector
signed
short
line0
=
vec_add
(
temp0
,
temp1
);
register
vector
signed
short
line1
=
vec_sub
(
temp0
,
temp1
);
...
...
@@ -903,6 +862,33 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
register
vector
signed
short
line3C
=
vec_add
(
line3B
,
line7B
);
register
vector
signed
short
line7C
=
vec_sub
(
line3B
,
line7B
);
register
vector
signed
short
line0S
=
vec_add
(
temp0S
,
temp1S
);
register
vector
signed
short
line1S
=
vec_sub
(
temp0S
,
temp1S
);
register
vector
signed
short
line2S
=
vec_add
(
temp2S
,
temp3S
);
register
vector
signed
short
line3S
=
vec_sub
(
temp2S
,
temp3S
);
register
vector
signed
short
line4S
=
vec_add
(
temp4S
,
temp5S
);
register
vector
signed
short
line5S
=
vec_sub
(
temp4S
,
temp5S
);
register
vector
signed
short
line6S
=
vec_add
(
temp6S
,
temp7S
);
register
vector
signed
short
line7S
=
vec_sub
(
temp6S
,
temp7S
);
register
vector
signed
short
line0BS
=
vec_add
(
line0S
,
line2S
);
register
vector
signed
short
line2BS
=
vec_sub
(
line0S
,
line2S
);
register
vector
signed
short
line1BS
=
vec_add
(
line1S
,
line3S
);
register
vector
signed
short
line3BS
=
vec_sub
(
line1S
,
line3S
);
register
vector
signed
short
line4BS
=
vec_add
(
line4S
,
line6S
);
register
vector
signed
short
line6BS
=
vec_sub
(
line4S
,
line6S
);
register
vector
signed
short
line5BS
=
vec_add
(
line5S
,
line7S
);
register
vector
signed
short
line7BS
=
vec_sub
(
line5S
,
line7S
);
register
vector
signed
short
line0CS
=
vec_add
(
line0BS
,
line4BS
);
register
vector
signed
short
line4CS
=
vec_sub
(
line0BS
,
line4BS
);
register
vector
signed
short
line1CS
=
vec_add
(
line1BS
,
line5BS
);
register
vector
signed
short
line5CS
=
vec_sub
(
line1BS
,
line5BS
);
register
vector
signed
short
line2CS
=
vec_add
(
line2BS
,
line6BS
);
register
vector
signed
short
line6CS
=
vec_sub
(
line2BS
,
line6BS
);
register
vector
signed
short
line3CS
=
vec_add
(
line3BS
,
line7BS
);
register
vector
signed
short
line7CS
=
vec_sub
(
line3BS
,
line7BS
);
vsum
=
vec_sum4s
(
vec_abs
(
line0C
),
vec_splat_s32
(
0
));
vsum
=
vec_sum4s
(
vec_abs
(
line1C
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line2C
),
vsum
);
...
...
@@ -912,33 +898,6 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
vsum
=
vec_sum4s
(
vec_abs
(
line6C
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line7C
),
vsum
);
line0S
=
vec_add
(
temp0S
,
temp1S
);
line1S
=
vec_sub
(
temp0S
,
temp1S
);
line2S
=
vec_add
(
temp2S
,
temp3S
);
line3S
=
vec_sub
(
temp2S
,
temp3S
);
line4S
=
vec_add
(
temp4S
,
temp5S
);
line5S
=
vec_sub
(
temp4S
,
temp5S
);
line6S
=
vec_add
(
temp6S
,
temp7S
);
line7S
=
vec_sub
(
temp6S
,
temp7S
);
line0BS
=
vec_add
(
line0S
,
line2S
);
line2BS
=
vec_sub
(
line0S
,
line2S
);
line1BS
=
vec_add
(
line1S
,
line3S
);
line3BS
=
vec_sub
(
line1S
,
line3S
);
line4BS
=
vec_add
(
line4S
,
line6S
);
line6BS
=
vec_sub
(
line4S
,
line6S
);
line5BS
=
vec_add
(
line5S
,
line7S
);
line7BS
=
vec_sub
(
line5S
,
line7S
);
line0CS
=
vec_add
(
line0BS
,
line4BS
);
line4CS
=
vec_sub
(
line0BS
,
line4BS
);
line1CS
=
vec_add
(
line1BS
,
line5BS
);
line5CS
=
vec_sub
(
line1BS
,
line5BS
);
line2CS
=
vec_add
(
line2BS
,
line6BS
);
line6CS
=
vec_sub
(
line2BS
,
line6BS
);
line3CS
=
vec_add
(
line3BS
,
line7BS
);
line7CS
=
vec_sub
(
line3BS
,
line7BS
);
vsum
=
vec_sum4s
(
vec_abs
(
line0CS
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line1CS
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line2CS
),
vsum
);
...
...
@@ -957,8 +916,8 @@ static int hadamard8_diff16x8_altivec(/* MpegEncContext */ void *s, uint8_t *dst
static
int
hadamard8_diff16_altivec
(
/* MpegEncContext */
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
)
{
int
score
;
score
=
hadamard8_diff16x8_altivec
(
s
,
dst
,
src
,
stride
,
8
);
int
score
=
hadamard8_diff16x8_altivec
(
s
,
dst
,
src
,
stride
,
8
)
;
if
(
h
==
16
)
{
dst
+=
8
*
stride
;
src
+=
8
*
stride
;
...
...
libavcodec/ppc/dsputil_ppc.c
View file @
b7d24fd4
...
...
@@ -51,8 +51,7 @@
*/
static
void
clear_blocks_dcbz32_ppc
(
int16_t
*
blocks
)
{
register
int
misal
=
(
unsigned
long
)
blocks
&
0x00000010
;
register
int
i
=
0
;
register
int
misal
=
(
unsigned
long
)
blocks
&
0x00000010
,
i
=
0
;
if
(
misal
)
{
((
unsigned
long
*
)
blocks
)[
0
]
=
0L
;
...
...
@@ -77,8 +76,7 @@ static void clear_blocks_dcbz32_ppc(int16_t *blocks)
static
void
clear_blocks_dcbz128_ppc
(
int16_t
*
blocks
)
{
#if HAVE_DCBZL
register
int
misal
=
(
unsigned
long
)
blocks
&
0x0000007f
;
register
int
i
=
0
;
register
int
misal
=
(
unsigned
long
)
blocks
&
0x0000007f
,
i
=
0
;
if
(
misal
)
{
/* We could probably also optimize this case,
...
...
@@ -104,8 +102,7 @@ static long check_dcbzl_effect(void)
#if HAVE_DCBZL
register
char
*
fakedata
=
av_malloc
(
1024
);
register
char
*
fakedata_middle
;
register
long
zero
=
0
;
register
long
i
=
0
;
register
long
zero
=
0
,
i
=
0
;
if
(
!
fakedata
)
return
0L
;
...
...
libavcodec/ppc/fdct_altivec.c
View file @
b7d24fd4
...
...
@@ -191,7 +191,7 @@ static vector float fdctconsts[3] = {
void
ff_fdct_altivec
(
int16_t
*
block
)
{
vector
signed
short
*
bp
;
vector
float
*
cp
;
vector
float
*
cp
=
fdctconsts
;
vector
float
b00
,
b10
,
b20
,
b30
,
b40
,
b50
,
b60
,
b70
;
vector
float
b01
,
b11
,
b21
,
b31
,
b41
,
b51
,
b61
,
b71
;
vector
float
mzero
,
cnst
,
cnsts0
,
cnsts1
,
cnsts2
;
...
...
@@ -201,7 +201,6 @@ void ff_fdct_altivec(int16_t *block)
/* mzero = -0.0 */
mzero
=
((
vector
float
)
vec_splat_u32
(
-
1
));
mzero
=
((
vector
float
)
vec_sl
(
vu32
(
mzero
),
vu32
(
mzero
)));
cp
=
fdctconsts
;
cnsts0
=
vec_ld
(
0
,
cp
);
cp
++
;
cnsts1
=
vec_ld
(
0
,
cp
);
...
...
libavcodec/ppc/gmc_altivec.c
View file @
b7d24fd4
...
...
@@ -30,6 +30,7 @@
void
ff_gmc1_altivec
(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align1 */
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
int
i
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
rounder_a
)
=
rounder
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
ABCD
)[
8
]
=
{
(
16
-
x16
)
*
(
16
-
y16
),
/* A */
...
...
@@ -42,28 +43,26 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcsr8
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
8
);
register
vector
unsigned
char
dstv
,
dstv2
,
src_0
,
src_1
,
srcvA
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
Av
,
Bv
,
Cv
,
Dv
,
rounderV
,
tempA
,
tempB
,
tempC
,
tempD
;
int
i
;
register
vector
unsigned
char
dstv
,
dstv2
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
tempB
,
tempC
,
tempD
;
unsigned
long
dst_odd
=
(
unsigned
long
)
dst
&
0x0000000F
;
unsigned
long
src_really_odd
=
(
unsigned
long
)
src
&
0x0000000F
;
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
Av
=
vec_splat
(
tempA
,
0
);
Bv
=
vec_splat
(
tempA
,
1
);
Cv
=
vec_splat
(
tempA
,
2
);
Dv
=
vec_splat
(
tempA
,
3
);
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
register
vector
unsigned
short
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
register
vector
unsigned
short
Av
=
vec_splat
(
tempA
,
0
);
register
vector
unsigned
short
Bv
=
vec_splat
(
tempA
,
1
);
register
vector
unsigned
short
Cv
=
vec_splat
(
tempA
,
2
);
register
vector
unsigned
short
Dv
=
vec_splat
(
tempA
,
3
);
register
vector
unsigned
short
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
/* we'll be able to pick-up our 9 char elements at src from those
* 32 bytes we load the first batch here, as inside the loop we can
* reuse 'src + stride' from one iteration as the 'src' of the next. */
src_0
=
vec_ld
(
0
,
src
);
src_1
=
vec_ld
(
16
,
src
);
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
register
vector
unsigned
char
src_0
=
vec_ld
(
0
,
src
);
register
vector
unsigned
char
src_1
=
vec_ld
(
16
,
src
);
register
vector
unsigned
char
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
if
(
src_really_odd
!=
0x0000000F
)
/* If src & 0xF == 0xF, then (src + 1) is properly aligned
...
...
libavcodec/ppc/idct_altivec.c
View file @
b7d24fd4
...
...
@@ -76,31 +76,28 @@
vy4 = vec_subs(t2, t6);
#define IDCT \
vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \
vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \
vec_u16 shift; \
\
c4 = vec_splat(constants[0], 0);
\
a0 = vec_splat(constants[0], 1);
\
a1 = vec_splat(constants[0], 2);
\
a2 = vec_splat(constants[0], 3);
\
mc4 = vec_splat(constants[0], 4);
\
ma2 = vec_splat(constants[0], 5);
\
bias = (vec_s16) vec_splat((vec_s32) constants[0], 3);
\
vec_s16 c4 = vec_splat(constants[0], 0);
\
vec_s16 a0 = vec_splat(constants[0], 1);
\
vec_s16 a1 = vec_splat(constants[0], 2);
\
vec_s16 a2 = vec_splat(constants[0], 3);
\
vec_s16 mc4 = vec_splat(constants[0], 4);
\
vec_s16 ma2 = vec_splat(constants[0], 5);
\
vec_s16 bias = (vec_s16) vec_splat((vec_s32) constants[0], 3);
\
\
zero = vec_splat_s16(0);
\
shift = vec_splat_u16(4);
\
vec_s16 zero = vec_splat_s16(0);
\
vec_u16 shift = vec_splat_u16(4);
\
\
v
x0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero);
\
v
x1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero);
\
v
x2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero);
\
v
x3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero);
\
v
x4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero);
\
v
x5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero);
\
v
x6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero);
\
v
x7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero);
\
v
ec_s16 vx0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero);
\
v
ec_s16 vx1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero);
\
v
ec_s16 vx2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero);
\
v
ec_s16 vx3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero);
\
v
ec_s16 vx4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero);
\
v
ec_s16 vx5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero);
\
v
ec_s16 vx6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero);
\
v
ec_s16 vx7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero);
\
\
IDCT_HALF \
\
...
...
libavcodec/ppc/int_altivec.c
View file @
b7d24fd4
...
...
@@ -36,21 +36,19 @@
static
int
ssd_int8_vs_int16_altivec
(
const
int8_t
*
pix1
,
const
int16_t
*
pix2
,
int
size
)
{
int
i
,
size16
;
int
i
,
size16
=
size
>>
4
;
vector
signed
char
vpix1
;
vector
signed
short
vpix2
,
vdiff
,
vpix1l
,
vpix1h
;
union
{
vector
signed
int
vscore
;
int32_t
score
[
4
];
}
u
;
u
.
vscore
=
vec_splat_s32
(
0
);
}
u
=
{
.
vscore
=
vec_splat_s32
(
0
)
};
// XXX lazy way, fix it later
#define vec_unaligned_load(b) \
vec_perm(vec_ld(0, b), vec_ld(15, b), vec_lvsl(0, b));
size16
=
size
>>
4
;
while
(
size16
)
{
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
// load pix1 and the first batch of pix2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment