Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
022184a6
Commit
022184a6
authored
Jan 15, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ppc: dsputil: more K&R formatting cosmetics
parent
30f3f959
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
437 additions
and
416 deletions
+437
-416
fdct_altivec.c
libavcodec/ppc/fdct_altivec.c
+191
-193
gmc_altivec.c
libavcodec/ppc/gmc_altivec.c
+42
-41
idct_altivec.c
libavcodec/ppc/idct_altivec.c
+154
-141
int_altivec.c
libavcodec/ppc/int_altivec.c
+50
-41
No files found.
libavcodec/ppc/fdct_altivec.c
View file @
022184a6
This diff is collapsed.
Click to expand it.
libavcodec/ppc/gmc_altivec.c
View file @
022184a6
...
...
@@ -27,32 +27,36 @@
/* AltiVec-enhanced gmc1. ATM this code assumes stride is a multiple of 8
* to preserve proper dst alignment. */
void
ff_gmc1_altivec
(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align1 */
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
void
ff_gmc1_altivec
(
uint8_t
*
dst
/* align 8 */
,
uint8_t
*
src
/* align1 */
,
int
stride
,
int
h
,
int
x16
,
int
y16
,
int
rounder
)
{
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
rounder_a
)
=
rounder
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
ABCD
)[
8
]
=
{
(
16
-
x16
)
*
(
16
-
y16
),
/* A */
(
x16
)
*
(
16
-
y16
),
/* B */
(
16
-
x16
)
*
(
y16
),
/* C */
(
x16
)
*
(
y16
),
/* D */
0
,
0
,
0
,
0
/* padding */
};
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcsr8
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
8
);
register
vector
unsigned
char
dstv
,
dstv2
,
src_0
,
src_1
,
srcvA
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
Av
,
Bv
,
Cv
,
Dv
,
rounderV
,
tempA
,
tempB
,
tempC
,
tempD
;
const
DECLARE_ALIGNED
(
16
,
unsigned
short
,
ABCD
)[
8
]
=
{
(
16
-
x16
)
*
(
16
-
y16
),
/* A */
(
x16
)
*
(
16
-
y16
),
/* B */
(
16
-
x16
)
*
(
y16
),
/* C */
(
x16
)
*
(
y16
),
/* D */
0
,
0
,
0
,
0
/* padding */
};
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcsr8
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
8
);
register
vector
unsigned
char
dstv
,
dstv2
,
src_0
,
src_1
,
srcvA
,
srcvB
,
srcvC
,
srcvD
;
register
vector
unsigned
short
Av
,
Bv
,
Cv
,
Dv
,
rounderV
,
tempA
,
tempB
,
tempC
,
tempD
;
int
i
;
unsigned
long
dst_odd
=
(
unsigned
long
)
dst
&
0x0000000F
;
unsigned
long
src_really_odd
=
(
unsigned
long
)
src
&
0x0000000F
;
unsigned
long
dst_odd
=
(
unsigned
long
)
dst
&
0x0000000F
;
unsigned
long
src_really_odd
=
(
unsigned
long
)
src
&
0x0000000F
;
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
Av
=
vec_splat
(
tempA
,
0
);
Bv
=
vec_splat
(
tempA
,
1
);
Cv
=
vec_splat
(
tempA
,
2
);
Dv
=
vec_splat
(
tempA
,
3
);
tempA
=
vec_ld
(
0
,
(
const
unsigned
short
*
)
ABCD
);
Av
=
vec_splat
(
tempA
,
0
);
Bv
=
vec_splat
(
tempA
,
1
);
Cv
=
vec_splat
(
tempA
,
2
);
Dv
=
vec_splat
(
tempA
,
3
);
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
rounderV
=
vec_splat
((
vec_u16
)
vec_lde
(
0
,
&
rounder_a
),
0
);
/* we'll be able to pick-up our 9 char elements at src from those
* 32 bytes we load the first batch here, as inside the loop we can
...
...
@@ -61,36 +65,34 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int
src_1
=
vec_ld
(
16
,
src
);
srcvA
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
0
,
src
));
if
(
src_really_odd
!=
0x0000000F
)
{
if
(
src_really_odd
!=
0x0000000F
)
/* If src & 0xF == 0xF, then (src + 1) is properly aligned
* on the second vector. */
srcvB
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
1
,
src
));
}
else
{
else
srcvB
=
src_1
;
}
srcvA
=
vec_mergeh
(
vczero
,
srcvA
);
srcvB
=
vec_mergeh
(
vczero
,
srcvB
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
dst_odd
=
(
unsigned
long
)
dst
&
0x0000000F
;
src_really_odd
=
(((
unsigned
long
)
src
)
+
stride
)
&
0x0000000F
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
dst_odd
=
(
unsigned
long
)
dst
&
0x0000000F
;
src_really_odd
=
(((
unsigned
long
)
src
)
+
stride
)
&
0x0000000F
;
dstv
=
vec_ld
(
0
,
dst
);
/* We'll be able to pick-up our 9 char elements at src + stride from
* those 32 bytes then reuse the resulting 2 vectors srvcC and srcvD
* as the next srcvA and srcvB. */
src_0
=
vec_ld
(
stride
+
0
,
src
);
src_0
=
vec_ld
(
stride
+
0
,
src
);
src_1
=
vec_ld
(
stride
+
16
,
src
);
srcvC
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
stride
+
0
,
src
));
if
(
src_really_odd
!=
0x0000000F
)
{
if
(
src_really_odd
!=
0x0000000F
)
/* If src & 0xF == 0xF, then (src + 1) is properly aligned
* on the second vector. */
srcvD
=
vec_perm
(
src_0
,
src_1
,
vec_lvsl
(
stride
+
1
,
src
));
}
else
{
else
srcvD
=
src_1
;
}
srcvC
=
vec_mergeh
(
vczero
,
srcvC
);
srcvD
=
vec_mergeh
(
vczero
,
srcvD
);
...
...
@@ -98,23 +100,22 @@ void ff_gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int
/* OK, now we (finally) do the math :-)
* Those four instructions replace 32 int muls & 32 int adds.
* Isn't AltiVec nice? */
tempA
=
vec_mladd
((
vector
unsigned
short
)
srcvA
,
Av
,
rounderV
);
tempB
=
vec_mladd
((
vector
unsigned
short
)
srcvB
,
Bv
,
tempA
);
tempC
=
vec_mladd
((
vector
unsigned
short
)
srcvC
,
Cv
,
tempB
);
tempD
=
vec_mladd
((
vector
unsigned
short
)
srcvD
,
Dv
,
tempC
);
tempA
=
vec_mladd
((
vector
unsigned
short
)
srcvA
,
Av
,
rounderV
);
tempB
=
vec_mladd
((
vector
unsigned
short
)
srcvB
,
Bv
,
tempA
);
tempC
=
vec_mladd
((
vector
unsigned
short
)
srcvC
,
Cv
,
tempB
);
tempD
=
vec_mladd
((
vector
unsigned
short
)
srcvD
,
Dv
,
tempC
);
srcvA
=
srcvC
;
srcvB
=
srcvD
;
tempD
=
vec_sr
(
tempD
,
vcsr8
);
dstv2
=
vec_pack
(
tempD
,
(
vector
unsigned
short
)
vczero
);
dstv2
=
vec_pack
(
tempD
,
(
vector
unsigned
short
)
vczero
);
if
(
dst_odd
)
{
dstv2
=
vec_perm
(
dstv
,
dstv2
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
dstv2
=
vec_perm
(
dstv
,
dstv2
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
if
(
dst_odd
)
dstv2
=
vec_perm
(
dstv
,
dstv2
,
vcprm
(
0
,
1
,
s0
,
s1
));
else
dstv2
=
vec_perm
(
dstv
,
dstv2
,
vcprm
(
s0
,
s1
,
2
,
3
));
vec_st
(
dstv2
,
0
,
dst
);
...
...
libavcodec/ppc/idct_altivec.c
View file @
022184a6
This diff is collapsed.
Click to expand it.
libavcodec/ppc/int_altivec.c
View file @
022184a6
...
...
@@ -31,27 +31,28 @@
#include "libavutil/attributes.h"
#include "libavutil/ppc/types_altivec.h"
#include "libavcodec/dsputil.h"
#include "dsputil_altivec.h"
static
int
ssd_int8_vs_int16_altivec
(
const
int8_t
*
pix1
,
const
int16_t
*
pix2
,
int
size
)
{
int
size
)
{
int
i
,
size16
;
vector
signed
char
vpix1
;
vector
signed
short
vpix2
,
vdiff
,
vpix1l
,
vpix1h
;
union
{
vector
signed
int
vscore
;
int32_t
score
[
4
];
}
u
;
vector
signed
short
vpix2
,
vdiff
,
vpix1l
,
vpix1h
;
union
{
vector
signed
int
vscore
;
int32_t
score
[
4
];
}
u
;
u
.
vscore
=
vec_splat_s32
(
0
);
// XXX lazy way, fix it later
#define vec_unaligned_load(b) \
vec_perm(vec_ld(0,
b),vec_ld(15,b),
vec_lvsl(0, b));
#define vec_unaligned_load(b)
\
vec_perm(vec_ld(0,
b), vec_ld(15, b),
vec_lvsl(0, b));
size16
=
size
>>
4
;
while
(
size16
)
{
// score += (pix1[i]-pix2[i])*(pix1[i]-
pix2[i]);
while
(
size16
)
{
// score += (pix1[i] - pix2[i]) * (pix1[i] -
pix2[i]);
// load pix1 and the first batch of pix2
vpix1
=
vec_unaligned_load
(
pix1
);
...
...
@@ -62,20 +63,20 @@ static int ssd_int8_vs_int16_altivec(const int8_t *pix1, const int16_t *pix2,
vdiff
=
vec_sub
(
vpix1h
,
vpix2
);
vpix1l
=
vec_unpackl
(
vpix1
);
// load another batch from pix2
vpix2
=
vec_unaligned_load
(
pix2
);
vpix2
=
vec_unaligned_load
(
pix2
);
u
.
vscore
=
vec_msum
(
vdiff
,
vdiff
,
u
.
vscore
);
vdiff
=
vec_sub
(
vpix1l
,
vpix2
);
vdiff
=
vec_sub
(
vpix1l
,
vpix2
);
u
.
vscore
=
vec_msum
(
vdiff
,
vdiff
,
u
.
vscore
);
pix1
+=
16
;
pix2
+=
8
;
pix1
+=
16
;
pix2
+=
8
;
size16
--
;
}
u
.
vscore
=
vec_sums
(
u
.
vscore
,
vec_splat_s32
(
0
));
size
%=
16
;
for
(
i
=
0
;
i
<
size
;
i
++
)
{
u
.
score
[
3
]
+=
(
pix1
[
i
]
-
pix2
[
i
])
*
(
pix1
[
i
]
-
pix2
[
i
]);
}
for
(
i
=
0
;
i
<
size
;
i
++
)
u
.
score
[
3
]
+=
(
pix1
[
i
]
-
pix2
[
i
])
*
(
pix1
[
i
]
-
pix2
[
i
]);
return
u
.
score
[
3
];
}
...
...
@@ -88,56 +89,64 @@ static int32_t scalarproduct_int16_altivec(const int16_t *v1, const int16_t *v2,
register
vec_s32
res
=
vec_splat_s32
(
0
),
t
;
int32_t
ires
;
for
(
i
=
0
;
i
<
order
;
i
+=
8
)
{
for
(
i
=
0
;
i
<
order
;
i
+=
8
)
{
vec1
=
vec_unaligned_load
(
v1
);
t
=
vec_msum
(
vec1
,
vec_ld
(
0
,
v2
),
zero_s32v
);
res
=
vec_sums
(
t
,
res
);
v1
+=
8
;
v2
+=
8
;
t
=
vec_msum
(
vec1
,
vec_ld
(
0
,
v2
),
zero_s32v
);
res
=
vec_sums
(
t
,
res
);
v1
+=
8
;
v2
+=
8
;
}
res
=
vec_splat
(
res
,
3
);
vec_ste
(
res
,
0
,
&
ires
);
return
ires
;
}
static
int32_t
scalarproduct_and_madd_int16_altivec
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
)
static
int32_t
scalarproduct_and_madd_int16_altivec
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
)
{
LOAD_ZERO
;
vec_s16
*
pv1
=
(
vec_s16
*
)
v1
;
register
vec_s16
muls
=
{
mul
,
mul
,
mul
,
mul
,
mul
,
mul
,
mul
,
mul
};
vec_s16
*
pv1
=
(
vec_s16
*
)
v1
;
register
vec_s16
muls
=
{
mul
,
mul
,
mul
,
mul
,
mul
,
mul
,
mul
,
mul
};
register
vec_s16
t0
,
t1
,
i0
,
i1
,
i4
;
register
vec_s16
i2
=
vec_ld
(
0
,
v2
),
i3
=
vec_ld
(
0
,
v3
);
register
vec_s32
res
=
zero_s32v
;
register
vec_u8
align
=
vec_lvsl
(
0
,
v2
);
int32_t
ires
;
order
>>=
4
;
do
{
i1
=
vec_ld
(
16
,
v2
);
t0
=
vec_perm
(
i2
,
i1
,
align
);
i2
=
vec_ld
(
32
,
v2
);
t1
=
vec_perm
(
i1
,
i2
,
align
);
i0
=
pv1
[
0
];
i1
=
pv1
[
1
];
res
=
vec_msum
(
t0
,
i0
,
res
);
res
=
vec_msum
(
t1
,
i1
,
res
);
i4
=
vec_ld
(
16
,
v3
);
t0
=
vec_perm
(
i3
,
i4
,
align
);
i3
=
vec_ld
(
32
,
v3
);
t1
=
vec_perm
(
i4
,
i3
,
align
);
i1
=
vec_ld
(
16
,
v2
);
t0
=
vec_perm
(
i2
,
i1
,
align
);
i2
=
vec_ld
(
32
,
v2
);
t1
=
vec_perm
(
i1
,
i2
,
align
);
i0
=
pv1
[
0
];
i1
=
pv1
[
1
];
res
=
vec_msum
(
t0
,
i0
,
res
);
res
=
vec_msum
(
t1
,
i1
,
res
);
i4
=
vec_ld
(
16
,
v3
);
t0
=
vec_perm
(
i3
,
i4
,
align
);
i3
=
vec_ld
(
32
,
v3
);
t1
=
vec_perm
(
i4
,
i3
,
align
);
pv1
[
0
]
=
vec_mladd
(
t0
,
muls
,
i0
);
pv1
[
1
]
=
vec_mladd
(
t1
,
muls
,
i1
);
pv1
+=
2
;
v2
+=
16
;
v3
+=
16
;
}
while
(
--
order
);
pv1
+=
2
;
v2
+=
16
;
v3
+=
16
;
}
while
(
--
order
);
res
=
vec_splat
(
vec_sums
(
res
,
zero_s32v
),
3
);
vec_ste
(
res
,
0
,
&
ires
);
return
ires
;
}
av_cold
void
ff_int_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
c
->
ssd_int8_vs_int16
=
ssd_int8_vs_int16_altivec
;
c
->
scalarproduct_int16
=
scalarproduct_int16_altivec
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_altivec
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment