Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
30f3f959
Commit
30f3f959
authored
Jan 15, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ppc: dsputil: K&R formatting cosmetics
parent
82ee14d2
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
429 additions
and
391 deletions
+429
-391
dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c
+394
-355
dsputil_altivec.h
libavcodec/ppc/dsputil_altivec.h
+7
-5
dsputil_ppc.c
libavcodec/ppc/dsputil_ppc.c
+28
-31
No files found.
libavcodec/ppc/dsputil_altivec.c
View file @
30f3f959
...
...
@@ -24,6 +24,7 @@
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#include "libavutil/attributes.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
...
...
@@ -31,11 +32,13 @@
#include "libavcodec/dsputil.h"
#include "dsputil_altivec.h"
static
int
sad16_x2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
static
int
sad16_x2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
perm2
=
vec_add
(
perm1
,
vec_splat_u8
(
1
));
vector
unsigned
char
pix2l
,
pix2r
;
...
...
@@ -44,13 +47,13 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
vector
signed
int
sumdiffs
;
s
=
0
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read unaligned pixels into our vectors. The vectors are as follows:
* pix1v: pix1[0] - pix1[15]
* pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16] */
pix1v
=
vec_ld
(
0
,
pix1
);
pix2l
=
vec_ld
(
0
,
pix2
);
pix1v
=
vec_ld
(
0
,
pix1
);
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
16
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
...
...
@@ -75,11 +78,13 @@ static int sad16_x2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
return
s
;
}
static
int
sad16_y2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
static
int
sad16_y2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
pix2l
,
pix2r
;
vector
unsigned
char
pix1v
,
pix2v
,
pix3v
,
avgv
,
t5
;
...
...
@@ -88,7 +93,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
uint8_t
*
pix3
=
pix2
+
line_size
;
s
=
0
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
/* Due to the fact that pix3 = pix2 + line_size, the pix3 of one
* iteration becomes pix2 in the next iteration. We can use this
...
...
@@ -97,7 +102,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
* Read unaligned pixels into our vectors. The vectors are as follows:
* pix2v: pix2[0] - pix2[15]
* Split the pixel vectors into shorts. */
pix2l
=
vec_ld
(
0
,
pix2
);
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
15
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
...
...
@@ -107,7 +112,7 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
* pix3v: pix3[0] - pix3[15] */
pix1v
=
vec_ld
(
0
,
pix1
);
pix2l
=
vec_ld
(
0
,
pix3
);
pix2l
=
vec_ld
(
0
,
pix3
);
pix2r
=
vec_ld
(
15
,
pix3
);
pix3v
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
...
...
@@ -123,7 +128,6 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
pix1
+=
line_size
;
pix2v
=
pix3v
;
pix3
+=
line_size
;
}
/* Sum up the four partial sums, and put the result into s. */
...
...
@@ -133,13 +137,16 @@ static int sad16_y2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size
return
s
;
}
static
int
sad16_xy2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
static
int
sad16_xy2_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
uint8_t
*
pix3
=
pix2
+
line_size
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
const
vector
unsigned
short
two
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
const
vector
unsigned
short
two
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
vector
unsigned
char
avgv
,
t5
;
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
perm2
=
vec_add
(
perm1
,
vec_splat_u8
(
1
));
...
...
@@ -152,7 +159,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
vector
unsigned
int
sad
;
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
s
=
0
;
...
...
@@ -163,7 +170,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
* Read unaligned pixels into our vectors. The vectors are as follows:
* pix2v: pix2[0] - pix2[15] pix2iv: pix2[1] - pix2[16]
* Split the pixel vectors into shorts. */
pix2l
=
vec_ld
(
0
,
pix2
);
pix2l
=
vec_ld
(
0
,
pix2
);
pix2r
=
vec_ld
(
16
,
pix2
);
pix2v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
pix2iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
...
...
@@ -181,7 +188,7 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
* pix3v: pix3[0] - pix3[15] pix3iv: pix3[1] - pix3[16] */
pix1v
=
vec_ld
(
0
,
pix1
);
pix2l
=
vec_ld
(
0
,
pix3
);
pix2l
=
vec_ld
(
0
,
pix3
);
pix2r
=
vec_ld
(
16
,
pix3
);
pix3v
=
vec_perm
(
pix2l
,
pix2r
,
perm1
);
pix3iv
=
vec_perm
(
pix2l
,
pix2r
,
perm2
);
...
...
@@ -228,22 +235,23 @@ static int sad16_xy2_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_siz
return
s
;
}
static
int
sad16_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
static
int
sad16_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sad
;
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2. */
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
15
,
pix2
);
t1
=
vec_ld
(
0
,
pix1
);
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
...
...
@@ -268,19 +276,23 @@ static int sad16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
return
s
;
}
static
int
sad8_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
static
int
sad8_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
char
permclear
=
(
vector
unsigned
char
){
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
char
permclear
=
(
vector
unsigned
char
)
{
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sad
;
vector
signed
int
sumdiffs
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2.
...
...
@@ -317,18 +329,19 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size)
{
int
i
;
int
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix
);
vector
unsigned
char
pixv
;
vector
unsigned
int
sv
;
vector
signed
int
sum
;
sv
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sv
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
s
=
0
;
for
(
i
=
0
;
i
<
16
;
i
++
)
{
/* Read the potentially unaligned pixels. */
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pix
);
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pix
);
vector
unsigned
char
pixr
=
vec_ld
(
15
,
pix
);
pixv
=
vec_perm
(
pixl
,
pixr
,
perm
);
...
...
@@ -347,19 +360,23 @@ static int pix_norm1_altivec(uint8_t *pix, int line_size)
/* Sum of Squared Errors for an 8x8 block, AltiVec-enhanced.
* It's the sad8_altivec code above w/ squaring added. */
static
int
sse8_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
static
int
sse8_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
char
permclear
=
(
vector
unsigned
char
){
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
char
permclear
=
(
vector
unsigned
char
)
{
255
,
255
,
255
,
255
,
255
,
255
,
255
,
255
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
};
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
pix1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sum
;
vector
signed
int
sumsqr
;
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2.
...
...
@@ -397,21 +414,23 @@ static int sse8_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, in
/* Sum of Squared Errors for a 16x16 block, AltiVec-enhanced.
* It's the sad16_altivec code above w/ squaring added. */
static
int
sse16_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
static
int
sse16_altivec
(
void
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
line_size
,
int
h
)
{
int
i
;
int
s
;
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix2
);
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
char
t1
,
t2
,
t3
,
t4
,
t5
;
vector
unsigned
int
sum
;
vector
signed
int
sumsqr
;
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sum
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* Read potentially unaligned pixels into t1 and t2. */
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2l
=
vec_ld
(
0
,
pix2
);
vector
unsigned
char
pix2r
=
vec_ld
(
15
,
pix2
);
t1
=
vec_ld
(
0
,
pix1
);
t2
=
vec_perm
(
pix2l
,
pix2r
,
perm
);
...
...
@@ -439,9 +458,10 @@ static int sse16_altivec(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, i
return
s
;
}
static
int
pix_sum_altivec
(
uint8_t
*
pix
,
int
line_size
)
static
int
pix_sum_altivec
(
uint8_t
*
pix
,
int
line_size
)
{
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
const
vector
unsigned
int
zero
=
(
const
vector
unsigned
int
)
vec_splat_u32
(
0
);
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pix
);
vector
unsigned
char
t1
;
vector
unsigned
int
sad
;
...
...
@@ -450,11 +470,11 @@ static int pix_sum_altivec(uint8_t * pix, int line_size)
int
i
;
int
s
;
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
sad
=
(
vector
unsigned
int
)
vec_splat_u32
(
0
);
for
(
i
=
0
;
i
<
16
;
i
++
)
{
/* Read the potentially unaligned 16 pixels into t1. */
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pix
);
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pix
);
vector
unsigned
char
pixr
=
vec_ld
(
15
,
pix
);
t1
=
vec_perm
(
pixl
,
pixr
,
perm
);
...
...
@@ -472,30 +492,29 @@ static int pix_sum_altivec(uint8_t * pix, int line_size)
return
s
;
}
static
void
get_pixels_altivec
(
int16_t
*
restrict
block
,
const
uint8_t
*
pixels
,
int
line_size
)
static
void
get_pixels_altivec
(
int16_t
*
restrict
block
,
const
uint8_t
*
pixels
,
int
line_size
)
{
int
i
;
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pixels
);
vector
unsigned
char
bytes
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
signed
short
shorts
;
for
(
i
=
0
;
i
<
8
;
i
++
)
{
/* Read potentially unaligned pixels.
* We're reading 16 pixels, and actually only want 8,
* but we simply ignore the extras. */
// Read potentially unaligned pixels.
// We're reading 16 pixels, and actually only want 8,
// but we simply ignore the extras.
vector
unsigned
char
pixl
=
vec_ld
(
0
,
pixels
);
vector
unsigned
char
pixr
=
vec_ld
(
7
,
pixels
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm
);
// Convert the bytes into shorts.
shorts
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
shorts
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
// Save the data to the block, we assume the block is 16-byte aligned.
vec_st
(
shorts
,
i
*
16
,
(
vector
signed
short
*
)
block
);
vec_st
(
shorts
,
i
*
16
,
(
vector
signed
short
*
)
block
);
pixels
+=
line_size
;
}
...
...
@@ -508,33 +527,34 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
vector
unsigned
char
perm1
=
vec_lvsl
(
0
,
s1
);
vector
unsigned
char
perm2
=
vec_lvsl
(
0
,
s2
);
vector
unsigned
char
bytes
,
pixl
,
pixr
;
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
const
vector
unsigned
char
zero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
vector
signed
short
shorts1
,
shorts2
;
for
(
i
=
0
;
i
<
4
;
i
++
)
{
/* Read potentially unaligned pixels.
* We're reading 16 pixels, and actually only want 8,
* but we simply ignore the extras. */
pixl
=
vec_ld
(
0
,
s1
);
pixl
=
vec_ld
(
0
,
s1
);
pixr
=
vec_ld
(
15
,
s1
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm1
);
// Convert the bytes into shorts.
shorts1
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
shorts1
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
// Do the same for the second block of pixels.
pixl
=
vec_ld
(
0
,
s2
);
pixl
=
vec_ld
(
0
,
s2
);
pixr
=
vec_ld
(
15
,
s2
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm2
);
// Convert the bytes into shorts.
shorts2
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
shorts2
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
// Do the subtraction.
shorts1
=
vec_sub
(
shorts1
,
shorts2
);
// Save the data to the block, we assume the block is 16-byte aligned.
vec_st
(
shorts1
,
0
,
(
vector
signed
short
*
)
block
);
vec_st
(
shorts1
,
0
,
(
vector
signed
short
*
)
block
);
s1
+=
stride
;
s2
+=
stride
;
...
...
@@ -546,26 +566,26 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
/* Read potentially unaligned pixels.
* We're reading 16 pixels, and actually only want 8,
* but we simply ignore the extras. */
pixl
=
vec_ld
(
0
,
s1
);
pixl
=
vec_ld
(
0
,
s1
);
pixr
=
vec_ld
(
15
,
s1
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm1
);
// Convert the bytes into shorts.
shorts1
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
shorts1
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
// Do the same for the second block of pixels.
pixl
=
vec_ld
(
0
,
s2
);
pixl
=
vec_ld
(
0
,
s2
);
pixr
=
vec_ld
(
15
,
s2
);
bytes
=
vec_perm
(
pixl
,
pixr
,
perm2
);
// Convert the bytes into shorts.
shorts2
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
shorts2
=
(
vector
signed
short
)
vec_mergeh
(
zero
,
bytes
);
// Do the subtraction.
shorts1
=
vec_sub
(
shorts1
,
shorts2
);
// Save the data to the block, we assume the block is 16-byte aligned.
vec_st
(
shorts1
,
0
,
(
vector
signed
short
*
)
block
);
vec_st
(
shorts1
,
0
,
(
vector
signed
short
*
)
block
);
s1
+=
stride
;
s2
+=
stride
;
...
...
@@ -573,8 +593,8 @@ static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
}
}
static
void
clear_block_altivec
(
int16_t
*
block
)
{
static
void
clear_block_altivec
(
int16_t
*
block
)
{
LOAD_ZERO
;
vec_st
(
zero_s16v
,
0
,
block
);
vec_st
(
zero_s16v
,
16
,
block
);
...
...
@@ -586,46 +606,50 @@ static void clear_block_altivec(int16_t *block) {
vec_st
(
zero_s16v
,
112
,
block
);
}
static
void
add_bytes_altivec
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
w
)
{
static
void
add_bytes_altivec
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
w
)
{
register
int
i
;
register
vector
unsigned
char
vdst
,
vsrc
;
/* dst and src are 16 bytes-aligned (guaranteed). */
for
(
i
=
0
;
(
i
+
15
)
<
w
;
i
+=
16
)
{
vdst
=
vec_ld
(
i
,
(
unsigned
char
*
)
dst
);
vsrc
=
vec_ld
(
i
,
(
unsigned
char
*
)
src
);
for
(
i
=
0
;
(
i
+
15
)
<
w
;
i
+=
16
)
{
vdst
=
vec_ld
(
i
,
(
unsigned
char
*
)
dst
);
vsrc
=
vec_ld
(
i
,
(
unsigned
char
*
)
src
);
vdst
=
vec_add
(
vsrc
,
vdst
);
vec_st
(
vdst
,
i
,
(
unsigned
char
*
)
dst
);
vec_st
(
vdst
,
i
,
(
unsigned
char
*
)
dst
);
}
/* If w is not a multiple of 16. */
for
(;
(
i
<
w
)
;
i
++
)
{
for
(;
(
i
<
w
)
;
i
++
)
dst
[
i
]
=
src
[
i
];
}
}
static
int
hadamard8_diff8x8_altivec
(
/*MpegEncContext*/
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
){
static
int
hadamard8_diff8x8_altivec
(
/* MpegEncContext */
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
)
{
int
sum
;
register
const
vector
unsigned
char
vzero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
vector
signed
short
temp0
,
temp1
,
temp2
,
temp3
,
temp4
,
temp5
,
temp6
,
temp7
;
{
register
const
vector
signed
short
vprod1
=
(
const
vector
signed
short
)
{
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
};
register
const
vector
signed
short
vprod2
=
(
const
vector
signed
short
)
{
1
,
1
,
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
};
register
const
vector
signed
short
vprod3
=
(
const
vector
signed
short
)
{
1
,
1
,
1
,
1
,
-
1
,
-
1
,
-
1
,
-
1
};
register
const
vector
unsigned
char
perm1
=
(
const
vector
unsigned
char
)
{
0x02
,
0x03
,
0x00
,
0x01
,
0x06
,
0x07
,
0x04
,
0x05
,
0x0A
,
0x0B
,
0x08
,
0x09
,
0x0E
,
0x0F
,
0x0C
,
0x0D
};
register
const
vector
unsigned
char
perm2
=
(
const
vector
unsigned
char
)
{
0x04
,
0x05
,
0x06
,
0x07
,
0x00
,
0x01
,
0x02
,
0x03
,
0x0C
,
0x0D
,
0x0E
,
0x0F
,
0x08
,
0x09
,
0x0A
,
0x0B
};
register
const
vector
unsigned
char
perm3
=
(
const
vector
unsigned
char
)
{
0x08
,
0x09
,
0x0A
,
0x0B
,
0x0C
,
0x0D
,
0x0E
,
0x0F
,
0x00
,
0x01
,
0x02
,
0x03
,
0x04
,
0x05
,
0x06
,
0x07
};
register
const
vector
signed
short
vprod1
=
(
const
vector
signed
short
)
{
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
};
register
const
vector
signed
short
vprod2
=
(
const
vector
signed
short
)
{
1
,
1
,
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
};
register
const
vector
signed
short
vprod3
=
(
const
vector
signed
short
)
{
1
,
1
,
1
,
1
,
-
1
,
-
1
,
-
1
,
-
1
};
register
const
vector
unsigned
char
perm1
=
(
const
vector
unsigned
char
)
{
0x02
,
0x03
,
0x00
,
0x01
,
0x06
,
0x07
,
0x04
,
0x05
,
0x0A
,
0x0B
,
0x08
,
0x09
,
0x0E
,
0x0F
,
0x0C
,
0x0D
};
register
const
vector
unsigned
char
perm2
=
(
const
vector
unsigned
char
)
{
0x04
,
0x05
,
0x06
,
0x07
,
0x00
,
0x01
,
0x02
,
0x03
,
0x0C
,
0x0D
,
0x0E
,
0x0F
,
0x08
,
0x09
,
0x0A
,
0x0B
};
register
const
vector
unsigned
char
perm3
=
(
const
vector
unsigned
char
)
{
0x08
,
0x09
,
0x0A
,
0x0B
,
0x0C
,
0x0D
,
0x0E
,
0x0F
,
0x00
,
0x01
,
0x02
,
0x03
,
0x04
,
0x05
,
0x06
,
0x07
};
#define ONEITERBUTTERFLY(i, res) \
{ \
...
...
@@ -641,10 +665,10 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* Promote the unsigned chars to signed shorts. */
\
/* We're in the 8x8 function, we only care for the first 8. */
\
srcV = (vector signed short)vec_mergeh((vector signed char)
vzero, \
(vector signed char)srcO);
\
dstV = (vector signed short)vec_mergeh((vector signed char)
vzero, \
(vector signed char)dstO);
\
srcV = (vector signed short) vec_mergeh((vector signed char)
vzero, \
(vector signed char) srcO);
\
dstV = (vector signed short) vec_mergeh((vector signed char)
vzero, \
(vector signed char) dstO);
\
/* subtractions inside the first butterfly */
\
but0 = vec_sub(srcV, dstV); \
op1 = vec_perm(but0, but0, perm1); \
...
...
@@ -701,7 +725,7 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u
vsum
=
vec_sum4s
(
vec_abs
(
line5C
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line6C
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line7C
),
vsum
);
vsum
=
vec_sums
(
vsum
,
(
vector
signed
int
)
vzero
);
vsum
=
vec_sums
(
vsum
,
(
vector
signed
int
)
vzero
);
vsum
=
vec_splat
(
vsum
,
3
);
vec_ste
(
vsum
,
0
,
&
sum
);
}
...
...
@@ -726,7 +750,9 @@ static int hadamard8_diff8x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, u
* On the 970, the hand-made RA is still a win (around 690 vs. around 780),
* but xlc goes to around 660 on the regular C code...
*/
static
int
hadamard8_diff16x8_altivec
(
/*MpegEncContext*/
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
)
{
static
int
hadamard8_diff16x8_altivec
(
/* MpegEncContext */
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
)
{
int
sum
;
register
vector
signed
short
temp0
__asm__
(
"v0"
),
...
...
@@ -747,37 +773,44 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
temp6S
__asm__
(
"v14"
),
temp7S
__asm__
(
"v15"
);
register
const
vector
unsigned
char
vzero
__asm__
(
"v31"
)
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
{
register
const
vector
signed
short
vprod1
__asm__
(
"v16"
)
=
(
const
vector
signed
short
){
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
};
(
const
vector
signed
short
)
{
1
,
-
1
,
1
,
-
1
,
1
,
-
1
,
1
,
-
1
};
register
const
vector
signed
short
vprod2
__asm__
(
"v17"
)
=
(
const
vector
signed
short
){
1
,
1
,
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
};
(
const
vector
signed
short
)
{
1
,
1
,
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
};
register
const
vector
signed
short
vprod3
__asm__
(
"v18"
)
=
(
const
vector
signed
short
){
1
,
1
,
1
,
1
,
-
1
,
-
1
,
-
1
,
-
1
};
(
const
vector
signed
short
)
{
1
,
1
,
1
,
1
,
-
1
,
-
1
,
-
1
,
-
1
};
register
const
vector
unsigned
char
perm1
__asm__
(
"v19"
)
=
(
const
vector
unsigned
char
)
{
0x02
,
0x03
,
0x00
,
0x01
,
0x06
,
0x07
,
0x04
,
0x05
,
0x0A
,
0x0B
,
0x08
,
0x09
,
0x0E
,
0x0F
,
0x0C
,
0x0D
};
{
0x02
,
0x03
,
0x00
,
0x01
,
0x06
,
0x07
,
0x04
,
0x05
,
0x0A
,
0x0B
,
0x08
,
0x09
,
0x0E
,
0x0F
,
0x0C
,
0x0D
};
register
const
vector
unsigned
char
perm2
__asm__
(
"v20"
)
=
(
const
vector
unsigned
char
)
{
0x04
,
0x05
,
0x06
,
0x07
,
0x00
,
0x01
,
0x02
,
0x03
,
0x0C
,
0x0D
,
0x0E
,
0x0F
,
0x08
,
0x09
,
0x0A
,
0x0B
};
{
0x04
,
0x05
,
0x06
,
0x07
,
0x00
,
0x01
,
0x02
,
0x03
,
0x0C
,
0x0D
,
0x0E
,
0x0F
,
0x08
,
0x09
,
0x0A
,
0x0B
};
register
const
vector
unsigned
char
perm3
__asm__
(
"v21"
)
=
(
const
vector
unsigned
char
)
{
0x08
,
0x09
,
0x0A
,
0x0B
,
0x0C
,
0x0D
,
0x0E
,
0x0F
,
0x00
,
0x01
,
0x02
,
0x03
,
0x04
,
0x05
,
0x06
,
0x07
};
{
0x08
,
0x09
,
0x0A
,
0x0B
,
0x0C
,
0x0D
,
0x0E
,
0x0F
,
0x00
,
0x01
,
0x02
,
0x03
,
0x04
,
0x05
,
0x06
,
0x07
};
#define ONEITERBUTTERFLY(i, res1, res2) \
{ \
register vector unsigned char src1 __asm__ ("v22"), \
register vector unsigned char \
src1 __asm__ ("v22"), \
src2 __asm__ ("v23"), \
dst1 __asm__ ("v24"), \
dst2 __asm__ ("v25"), \
srcO __asm__ ("v22"), \
dstO __asm__ ("v23"); \
\
register vector signed short srcV __asm__ ("v24"), \
register vector signed short \
srcV __asm__ ("v24"), \
dstV __asm__ ("v25"), \
srcW __asm__ ("v26"), \
dstW __asm__ ("v27"), \
...
...
@@ -801,14 +834,14 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
dst2 = vec_ld((stride * i) + 16, dst); \
dstO = vec_perm(dst1, dst2, vec_lvsl(stride * i, dst)); \
/* Promote the unsigned chars to signed shorts. */
\
srcV = (vector signed short)vec_mergeh((vector signed char)
vzero, \
(vector signed char)srcO);
\
dstV = (vector signed short)vec_mergeh((vector signed char)
vzero, \
(vector signed char)dstO);
\
srcW = (vector signed short)vec_mergel((vector signed char)
vzero, \
(vector signed char)srcO);
\
dstW = (vector signed short)vec_mergel((vector signed char)
vzero, \
(vector signed char)dstO);
\
srcV = (vector signed short) vec_mergeh((vector signed char)
vzero, \
(vector signed char) srcO);
\
dstV = (vector signed short) vec_mergeh((vector signed char)
vzero, \
(vector signed char) dstO);
\
srcW = (vector signed short) vec_mergel((vector signed char)
vzero, \
(vector signed char) srcO);
\
dstW = (vector signed short) vec_mergel((vector signed char)
vzero, \
(vector signed char) dstO);
\
/* subtractions inside the first butterfly */
\
but0 = vec_sub(srcV, dstV); \
but0S = vec_sub(srcW, dstW); \
...
...
@@ -838,10 +871,10 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
{
register
vector
signed
int
vsum
;
register
vector
signed
short
line0S
,
line1S
,
line2S
,
line3S
,
line4S
,
line5S
,
line6S
,
line7S
,
line0BS
,
line2BS
,
line1BS
,
line3BS
,
line4BS
,
line6BS
,
line5BS
,
line7BS
,
line0CS
,
line4CS
,
line1CS
,
line5CS
,
line2CS
,
line6CS
,
line3CS
,
line7CS
;
line5S
,
line6S
,
line7S
,
line0BS
,
line2BS
,
line1BS
,
line3BS
,
line4BS
,
line6BS
,
line5BS
,
line7BS
,
line0CS
,
line4CS
,
line1CS
,
line5CS
,
line2CS
,
line6CS
,
line3CS
,
line7CS
;
register
vector
signed
short
line0
=
vec_add
(
temp0
,
temp1
);
register
vector
signed
short
line1
=
vec_sub
(
temp0
,
temp1
);
...
...
@@ -914,19 +947,21 @@ static int hadamard8_diff16x8_altivec(/*MpegEncContext*/ void *s, uint8_t *dst,
vsum
=
vec_sum4s
(
vec_abs
(
line5CS
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line6CS
),
vsum
);
vsum
=
vec_sum4s
(
vec_abs
(
line7CS
),
vsum
);
vsum
=
vec_sums
(
vsum
,
(
vector
signed
int
)
vzero
);
vsum
=
vec_sums
(
vsum
,
(
vector
signed
int
)
vzero
);
vsum
=
vec_splat
(
vsum
,
3
);
vec_ste
(
vsum
,
0
,
&
sum
);
}
return
sum
;
}
static
int
hadamard8_diff16_altivec
(
/*MpegEncContext*/
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
){
static
int
hadamard8_diff16_altivec
(
/* MpegEncContext */
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
)
{
int
score
;
score
=
hadamard8_diff16x8_altivec
(
s
,
dst
,
src
,
stride
,
8
);
if
(
h
==
16
)
{
dst
+=
8
*
stride
;
src
+=
8
*
stride
;
if
(
h
==
16
)
{
dst
+=
8
*
stride
;
src
+=
8
*
stride
;
score
+=
hadamard8_diff16x8_altivec
(
s
,
dst
,
src
,
stride
,
8
);
}
return
score
;
...
...
@@ -941,14 +976,18 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx)
c
->
pix_abs
[
0
][
3
]
=
sad16_xy2_altivec
;
c
->
pix_abs
[
0
][
0
]
=
sad16_altivec
;
c
->
pix_abs
[
1
][
0
]
=
sad8_altivec
;
c
->
sad
[
0
]
=
sad16_altivec
;
c
->
sad
[
1
]
=
sad8_altivec
;
c
->
sad
[
0
]
=
sad16_altivec
;
c
->
sad
[
1
]
=
sad8_altivec
;
c
->
sse
[
0
]
=
sse16_altivec
;
c
->
sse
[
1
]
=
sse8_altivec
;
c
->
pix_norm1
=
pix_norm1_altivec
;
c
->
sse
[
1
]
=
sse8_altivec
;
c
->
sse
[
0
]
=
sse16_altivec
;
c
->
pix_sum
=
pix_sum_altivec
;
c
->
diff_pixels
=
diff_pixels_altivec
;
c
->
add_bytes
=
add_bytes_altivec
;
c
->
add_bytes
=
add_bytes_altivec
;
if
(
!
high_bit_depth
)
{
c
->
get_pixels
=
get_pixels_altivec
;
c
->
clear_block
=
clear_block_altivec
;
...
...
libavcodec/ppc/dsputil_altivec.h
View file @
30f3f959
...
...
@@ -24,11 +24,13 @@
#define AVCODEC_PPC_DSPUTIL_ALTIVEC_H
#include <stdint.h>
#include "libavcodec/dsputil.h"
void
ff_put_pixels16_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
#include "libavcodec/dsputil.h"
void
ff_avg_pixels16_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_avg_pixels16_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_put_pixels16_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
);
void
ff_fdct_altivec
(
int16_t
*
block
);
void
ff_gmc1_altivec
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
...
...
@@ -36,7 +38,7 @@ void ff_gmc1_altivec(uint8_t *dst, uint8_t *src, int stride, int h,
void
ff_idct_put_altivec
(
uint8_t
*
dest
,
int
line_size
,
int16_t
*
block
);
void
ff_idct_add_altivec
(
uint8_t
*
dest
,
int
line_size
,
int16_t
*
block
);
void
ff_dsputil_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_int_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_dsputil_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_int_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
);
#endif
/* AVCODEC_PPC_DSPUTIL_ALTIVEC_H */
libavcodec/ppc/dsputil_ppc.c
View file @
30f3f959
...
...
@@ -51,23 +51,23 @@
*/
static
void
clear_blocks_dcbz32_ppc
(
int16_t
*
blocks
)
{
register
int
misal
=
((
unsigned
long
)
blocks
&
0x00000010
);
register
int
misal
=
((
unsigned
long
)
blocks
&
0x00000010
);
register
int
i
=
0
;
if
(
misal
)
{
((
unsigned
long
*
)
blocks
)[
0
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
1
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
2
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
3
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
0
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
1
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
2
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
3
]
=
0L
;
i
+=
16
;
}
for
(
;
i
<
sizeof
(
int16_t
)
*
6
*
64
-
31
;
i
+=
32
)
{
__asm__
volatile
(
"dcbz %0,%1"
:
:
"b"
(
blocks
),
"r"
(
i
)
:
"memory"
);
}
for
(;
i
<
sizeof
(
int16_t
)
*
6
*
64
-
31
;
i
+=
32
)
__asm__
volatile
(
"dcbz %0,%1"
::
"b"
(
blocks
),
"r"
(
i
)
:
"memory"
);
if
(
misal
)
{
((
unsigned
long
*
)
blocks
)[
188
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
189
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
190
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
191
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
188
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
189
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
190
]
=
0L
;
((
unsigned
long
*
)
blocks
)[
191
]
=
0L
;
i
+=
16
;
}
}
...
...
@@ -77,23 +77,23 @@ static void clear_blocks_dcbz32_ppc(int16_t *blocks)
#if HAVE_DCBZL
static
void
clear_blocks_dcbz128_ppc
(
int16_t
*
blocks
)
{
register
int
misal
=
((
unsigned
long
)
blocks
&
0x0000007f
);
register
int
misal
=
((
unsigned
long
)
blocks
&
0x0000007f
);
register
int
i
=
0
;
if
(
misal
)
{
/* We could probably also optimize this case,
* but there's not much point as the machines
* aren't available yet (2003-06-26). */
memset
(
blocks
,
0
,
sizeof
(
int16_t
)
*
6
*
64
);
}
else
for
(
;
i
<
sizeof
(
int16_t
)
*
6
*
64
;
i
+=
128
)
{
__asm__
volatile
(
"dcbzl %0,%1"
:
:
"b"
(
blocks
),
"r"
(
i
)
:
"memory"
);
memset
(
blocks
,
0
,
sizeof
(
int16_t
)
*
6
*
64
);
}
else
{
for
(;
i
<
sizeof
(
int16_t
)
*
6
*
64
;
i
+=
128
)
__asm__
volatile
(
"dcbzl %0,%1"
::
"b"
(
blocks
),
"r"
(
i
)
:
"memory"
);
}
}
#else
static
void
clear_blocks_dcbz128_ppc
(
int16_t
*
blocks
)
{
memset
(
blocks
,
0
,
sizeof
(
int16_t
)
*
6
*
64
);
memset
(
blocks
,
0
,
sizeof
(
int16_t
)
*
6
*
64
);
}
#endif
...
...
@@ -110,9 +110,8 @@ static long check_dcbzl_effect(void)
register
long
i
=
0
;
long
count
=
0
;
if
(
!
fakedata
)
{
if
(
!
fakedata
)
return
0L
;
}
fakedata_middle
=
(
fakedata
+
512
);
...
...
@@ -120,12 +119,11 @@ static long check_dcbzl_effect(void)
/* Below the constraint "b" seems to mean "address base register"
* in gcc-3.3 / RS/6000 speaks. Seems to avoid using r0, so.... */
__asm__
volatile
(
"dcbzl %0, %1"
:
:
"b"
(
fakedata_middle
),
"r"
(
zero
));
__asm__
volatile
(
"dcbzl %0, %1"
:
:
"b"
(
fakedata_middle
),
"r"
(
zero
));
for
(
i
=
0
;
i
<
1024
;
i
++
)
{
if
(
fakedata
[
i
]
==
(
char
)
0
)
for
(
i
=
0
;
i
<
1024
;
i
++
)
if
(
fakedata
[
i
]
==
(
char
)
0
)
count
++
;
}
av_free
(
fakedata
);
...
...
@@ -177,6 +175,5 @@ av_cold void ff_dsputil_init_ppc(DSPContext *c, AVCodecContext *avctx)
c
->
idct_permutation_type
=
FF_TRANSPOSE_IDCT_PERM
;
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment