Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
b7d24fd4
Commit
b7d24fd4
authored
Jan 15, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ppc: dsputil: Merge some declarations and initializations
parent
b045283f
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
219 additions
and
270 deletions
+219
-270
dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c
+181
-222
dsputil_ppc.c
libavcodec/ppc/dsputil_ppc.c
+3
-6
fdct_altivec.c
libavcodec/ppc/fdct_altivec.c
+1
-2
gmc_altivec.c
libavcodec/ppc/gmc_altivec.c
+15
-16
idct_altivec.c
libavcodec/ppc/idct_altivec.c
+17
-20
int_altivec.c
libavcodec/ppc/int_altivec.c
+2
-4
No files found.
libavcodec/ppc/dsputil_altivec.c
View file @
b7d24fd4
This diff is collapsed.
Click to expand it.
libavcodec/ppc/dsputil_ppc.c
View file @
b7d24fd4
...
...
@@ -51,8 +51,7 @@
*/
static
void
clear_blocks_dcbz32_ppc
(
int16_t
*
blocks
)
{
register
int
misal
=
(
unsigned
long
)
blocks
&
0x00000010
;
register
int
i
=
0
;
register
int
misal
=
(
unsigned
long
)
blocks
&
0x00000010
,
i
=
0
;
if
(
misal
)
{
((
unsigned
long
*
)
blocks
)[
0
]
=
0L
;
...
...
@@ -77,8 +76,7 @@ static void clear_blocks_dcbz32_ppc(int16_t *blocks)
static
void
clear_blocks_dcbz128_ppc
(
int16_t
*
blocks
)
{
#if HAVE_DCBZL
register
int
misal
=
(
unsigned
long
)
blocks
&
0x0000007f
;
register
int
i
=
0
;
register
int
misal
=
(
unsigned
long
)
blocks
&
0x0000007f
,
i
=
0
;
if
(
misal
)
{
/* We could probably also optimize this case,
...
...
@@ -104,8 +102,7 @@ static long check_dcbzl_effect(void)
#if HAVE_DCBZL
register
char
*
fakedata
=
av_malloc
(
1024
);
register
char
*
fakedata_middle
;
register
long
zero
=
0
;
register
long
i
=
0
;
register
long
zero
=
0
,
i
=
0
;
if
(
!
fakedata
)
return
0L
;
...
...
libavcodec/ppc/fdct_altivec.c
View file @
b7d24fd4
...
...
@@ -191,7 +191,7 @@ static vector float fdctconsts[3] = {
void
ff_fdct_altivec
(
int16_t
*
block
)
{
vector
signed
short
*
bp
;
vector
float
*
cp
;
vector
float
*
cp
=
fdctconsts
;
vector
float
b00
,
b10
,
b20
,
b30
,
b40
,
b50
,
b60
,
b70
;
vector
float
b01
,
b11
,
b21
,
b31
,
b41
,
b51
,
b61
,
b71
;
vector
float
mzero
,
cnst
,
cnsts0
,
cnsts1
,
cnsts2
;
...
...
@@ -201,7 +201,6 @@ void ff_fdct_altivec(int16_t *block)
/* mzero = -0.0 */
mzero
=
((
vector
float
)
vec_splat_u32
(
-
1
));
mzero
=
((
vector
float
)
vec_sl
(
vu32
(
mzero
),
vu32
(
mzero
)));
cp
=
fdctconsts
;
cnsts0
=
vec_ld
(
0
,
cp
);
cp
++
;
cnsts1
=
vec_ld
(
0
,
cp
);
...
...
libavcodec/ppc/gmc_altivec.c
View file @
b7d24fd4
...
...
@@ -30,6 +30,7 @@
void
ff_gmc1_altivec
(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align1 */
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
int
i
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
rounder_a
)
=
rounder
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
ABCD
)[
8
]
=
{
(
16
-
x16
)
*
(
16
-
y16
),
/* A */
...
...
@@ -42,28 +43,26 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */,
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcsr8
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
8
);
register
vector
unsigned
char
dstv
,
dstv2
,
src_0
,
src_1
,
srcvA
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
Av
,
Bv
,
Cv
,
Dv
,
rounderV
,
tempA
,
tempB
,
tempC
,
tempD
;
int
i
;
register
vector
unsigned
char
dstv
,
dstv2
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
tempB
,
tempC
,
tempD
;
unsigned
long
dst_odd
=
(
unsigned
long
)
dst
&
0x0000000F
;
unsigned
long
src_really_odd
=
(
unsigned
long
)
src
&
0x0000000F
;
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
Av
=
vec_splat
(
tempA
,
0
);
Bv
=
vec_splat
(
tempA
,
1
);
Cv
=
vec_splat
(
tempA
,
2
);
Dv
=
vec_splat
(
tempA
,
3
);
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
register
vector
unsigned
short
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
register
vector
unsigned
short
Av
=
vec_splat
(
tempA
,
0
);
register
vector
unsigned
short
Bv
=
vec_splat
(
tempA
,
1
);
register
vector
unsigned
short
Cv
=
vec_splat
(
tempA
,
2
);
register
vector
unsigned
short
Dv
=
vec_splat
(
tempA
,
3
);
register
vector
unsigned
short
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
/* we'll be able to pick-up our 9 char elements at src from those
* 32 bytes we load the first batch here, as inside the loop we can
* reuse 'src + stride' from one iteration as the 'src' of the next. */
src_0
=
vec_ld
(
0
,
src
);
src_1
=
vec_ld
(
16
,
src
);
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
register
vector
unsigned
char
src_0
=
vec_ld
(
0
,
src
);
register
vector
unsigned
char
src_1
=
vec_ld
(
16
,
src
);
register
vector
unsigned
char
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
if
(
src_really_odd
!=
0x0000000F
)
/* If src & 0xF == 0xF, then (src + 1) is properly aligned
...
...
libavcodec/ppc/idct_altivec.c
View file @
b7d24fd4
...
...
@@ -76,31 +76,28 @@
vy4 = vec_subs(t2, t6);
#define IDCT \
vec_s16 vx0, vx1, vx2, vx3, vx4, vx5, vx6, vx7; \
vec_s16 vy0, vy1, vy2, vy3, vy4, vy5, vy6, vy7; \
vec_s16 a0, a1, a2, ma2, c4, mc4, zero, bias; \
vec_s16 t0, t1, t2, t3, t4, t5, t6, t7, t8; \
vec_u16 shift; \
\
c4 = vec_splat(constants[0], 0);
\
a0 = vec_splat(constants[0], 1);
\
a1 = vec_splat(constants[0], 2);
\
a2 = vec_splat(constants[0], 3);
\
mc4 = vec_splat(constants[0], 4);
\
ma2 = vec_splat(constants[0], 5);
\
bias = (vec_s16) vec_splat((vec_s32) constants[0], 3);
\
vec_s16 c4 = vec_splat(constants[0], 0);
\
vec_s16 a0 = vec_splat(constants[0], 1);
\
vec_s16 a1 = vec_splat(constants[0], 2);
\
vec_s16 a2 = vec_splat(constants[0], 3);
\
vec_s16 mc4 = vec_splat(constants[0], 4);
\
vec_s16 ma2 = vec_splat(constants[0], 5);
\
vec_s16 bias = (vec_s16) vec_splat((vec_s32) constants[0], 3);
\
\
zero = vec_splat_s16(0);
\
shift = vec_splat_u16(4);
\
vec_s16 zero = vec_splat_s16(0);
\
vec_u16 shift = vec_splat_u16(4);
\
\
v
x0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero);
\
v
x1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero);
\
v
x2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero);
\
v
x3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero);
\
v
x4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero);
\
v
x5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero);
\
v
x6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero);
\
v
x7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero);
\
v
ec_s16 vx0 = vec_mradds(vec_sl(block[0], shift), constants[1], zero);
\
v
ec_s16 vx1 = vec_mradds(vec_sl(block[1], shift), constants[2], zero);
\
v
ec_s16 vx2 = vec_mradds(vec_sl(block[2], shift), constants[3], zero);
\
v
ec_s16 vx3 = vec_mradds(vec_sl(block[3], shift), constants[4], zero);
\
v
ec_s16 vx4 = vec_mradds(vec_sl(block[4], shift), constants[1], zero);
\
v
ec_s16 vx5 = vec_mradds(vec_sl(block[5], shift), constants[4], zero);
\
v
ec_s16 vx6 = vec_mradds(vec_sl(block[6], shift), constants[3], zero);
\
v
ec_s16 vx7 = vec_mradds(vec_sl(block[7], shift), constants[2], zero);
\
\
IDCT_HALF \
\
...
...
libavcodec/ppc/int_altivec.c
View file @
b7d24fd4
...
...
@@ -36,21 +36,19 @@
static
int
ssd_int8_vs_int16_altivec
(
const
int8_t
*
pix1
,
const
int16_t
*
pix2
,
int
size
)
{
int
i
,
size16
;
int
i
,
size16
=
size
>>
4
;
vector
signed
char
vpix1
;
vector
signed
short
vpix2
,
vdiff
,
vpix1l
,
vpix1h
;
union
{
vector
signed
int
vscore
;
int32_t
score
[
4
];
}
u
;
u
.
vscore
=
vec_splat_s32
(
0
);
}
u
=
{
.
vscore
=
vec_splat_s32
(
0
)
};
// XXX lazy way, fix it later
#define vec_unaligned_load(b) \
vec_perm(vec_ld(0, b), vec_ld(15, b), vec_lvsl(0, b));
size16
=
size
>>
4
;
while
(
size16
)
{
// score += (pix1[i] - pix2[i]) * (pix1[i] - pix2[i]);
// load pix1 and the first batch of pix2
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment