Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
47e5a981
Commit
47e5a981
authored
Mar 10, 2013
by
Ronald S. Bultje
Committed by
Martin Storsjö
Apr 19, 2013
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ppc: hpeldsp: Move half-pel assembly from dsputil to hpeldsp
Signed-off-by:
Martin Storsjö
<
martin@martin.st
>
parent
8db00081
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
470 additions
and
421 deletions
+470
-421
hpeldsp.c
libavcodec/hpeldsp.c
+2
-0
hpeldsp.h
libavcodec/hpeldsp.h
+1
-0
Makefile
libavcodec/ppc/Makefile
+1
-0
dsputil_altivec.c
libavcodec/ppc/dsputil_altivec.c
+0
-421
hpeldsp_altivec.c
libavcodec/ppc/hpeldsp_altivec.c
+466
-0
No files found.
libavcodec/hpeldsp.c
View file @
47e5a981
...
@@ -54,6 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
...
@@ -54,6 +54,8 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
hpel_funcs
(
avg
,
[
3
],
2
);
hpel_funcs
(
avg
,
[
3
],
2
);
hpel_funcs
(
avg_no_rnd
,,
16
);
hpel_funcs
(
avg_no_rnd
,,
16
);
if
(
ARCH_PPC
)
ff_hpeldsp_init_ppc
(
c
,
flags
);
if
(
ARCH_X86
)
if
(
ARCH_X86
)
ff_hpeldsp_init_x86
(
c
,
flags
);
ff_hpeldsp_init_x86
(
c
,
flags
);
}
}
libavcodec/hpeldsp.h
View file @
47e5a981
...
@@ -94,6 +94,7 @@ typedef struct HpelDSPContext {
...
@@ -94,6 +94,7 @@ typedef struct HpelDSPContext {
void
ff_hpeldsp_init
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_ppc
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_x86
(
HpelDSPContext
*
c
,
int
flags
);
void
ff_hpeldsp_init_x86
(
HpelDSPContext
*
c
,
int
flags
);
#endif
/* AVCODEC_HPELDSP_H */
#endif
/* AVCODEC_HPELDSP_H */
libavcodec/ppc/Makefile
View file @
47e5a981
...
@@ -3,6 +3,7 @@ OBJS += ppc/dsputil_ppc.o \
...
@@ -3,6 +3,7 @@ OBJS += ppc/dsputil_ppc.o \
OBJS-$(CONFIG_H264CHROMA)
+=
ppc/h264chroma_init.o
OBJS-$(CONFIG_H264CHROMA)
+=
ppc/h264chroma_init.o
OBJS-$(CONFIG_H264QPEL)
+=
ppc/h264_qpel.o
OBJS-$(CONFIG_H264QPEL)
+=
ppc/h264_qpel.o
OBJS-$(CONFIG_HPELDSP)
+=
ppc/hpeldsp_altivec.o
OBJS-$(CONFIG_VORBIS_DECODER)
+=
ppc/vorbisdsp_altivec.o
OBJS-$(CONFIG_VORBIS_DECODER)
+=
ppc/vorbisdsp_altivec.o
OBJS-$(CONFIG_VP3DSP)
+=
ppc/vp3dsp_altivec.o
OBJS-$(CONFIG_VP3DSP)
+=
ppc/vp3dsp_altivec.o
...
...
libavcodec/ppc/dsputil_altivec.c
View file @
47e5a981
...
@@ -607,354 +607,6 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
...
@@ -607,354 +607,6 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, int w) {
}
}
}
}
/* next one assumes that ((line_size % 16) == 0) */
void
ff_put_pixels16_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
vector
unsigned
char
pixelsv1
,
pixelsv2
;
register
vector
unsigned
char
pixelsv1B
,
pixelsv2B
;
register
vector
unsigned
char
pixelsv1C
,
pixelsv2C
;
register
vector
unsigned
char
pixelsv1D
,
pixelsv2D
;
register
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pixels
);
int
i
;
register
ptrdiff_t
line_size_2
=
line_size
<<
1
;
register
ptrdiff_t
line_size_3
=
line_size
+
line_size_2
;
register
ptrdiff_t
line_size_4
=
line_size
<<
2
;
// hand-unrolling the loop by 4 gains about 15%
// mininum execution time goes from 74 to 60 cycles
// it's faster than -funroll-loops, but using
// -funroll-loops w/ this is bad - 74 cycles again.
// all this is on a 7450, tuning for the 7450
for
(
i
=
0
;
i
<
h
;
i
+=
4
)
{
pixelsv1
=
vec_ld
(
0
,
pixels
);
pixelsv2
=
vec_ld
(
15
,
pixels
);
pixelsv1B
=
vec_ld
(
line_size
,
pixels
);
pixelsv2B
=
vec_ld
(
15
+
line_size
,
pixels
);
pixelsv1C
=
vec_ld
(
line_size_2
,
pixels
);
pixelsv2C
=
vec_ld
(
15
+
line_size_2
,
pixels
);
pixelsv1D
=
vec_ld
(
line_size_3
,
pixels
);
pixelsv2D
=
vec_ld
(
15
+
line_size_3
,
pixels
);
vec_st
(
vec_perm
(
pixelsv1
,
pixelsv2
,
perm
),
0
,
(
unsigned
char
*
)
block
);
vec_st
(
vec_perm
(
pixelsv1B
,
pixelsv2B
,
perm
),
line_size
,
(
unsigned
char
*
)
block
);
vec_st
(
vec_perm
(
pixelsv1C
,
pixelsv2C
,
perm
),
line_size_2
,
(
unsigned
char
*
)
block
);
vec_st
(
vec_perm
(
pixelsv1D
,
pixelsv2D
,
perm
),
line_size_3
,
(
unsigned
char
*
)
block
);
pixels
+=
line_size_4
;
block
+=
line_size_4
;
}
}
/* next one assumes that ((line_size % 16) == 0) */
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
void
ff_avg_pixels16_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsv
,
blockv
;
register
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pixels
);
int
i
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
pixelsv1
=
vec_ld
(
0
,
pixels
);
pixelsv2
=
vec_ld
(
16
,
pixels
);
blockv
=
vec_ld
(
0
,
block
);
pixelsv
=
vec_perm
(
pixelsv1
,
pixelsv2
,
perm
);
blockv
=
vec_avg
(
blockv
,
pixelsv
);
vec_st
(
blockv
,
0
,
(
unsigned
char
*
)
block
);
pixels
+=
line_size
;
block
+=
line_size
;
}
}
/* next one assumes that ((line_size % 8) == 0) */
static
void
avg_pixels8_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsv
,
blockv
;
int
i
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* block is 8 bytes-aligned, so we're either in the
left block (16 bytes-aligned) or in the right block (not) */
int
rightside
=
((
unsigned
long
)
block
&
0x0000000F
);
blockv
=
vec_ld
(
0
,
block
);
pixelsv1
=
vec_ld
(
0
,
pixels
);
pixelsv2
=
vec_ld
(
16
,
pixels
);
pixelsv
=
vec_perm
(
pixelsv1
,
pixelsv2
,
vec_lvsl
(
0
,
pixels
));
if
(
rightside
)
{
pixelsv
=
vec_perm
(
blockv
,
pixelsv
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
pixelsv
=
vec_perm
(
blockv
,
pixelsv
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
blockv
=
vec_avg
(
blockv
,
pixelsv
);
vec_st
(
blockv
,
0
,
block
);
pixels
+=
line_size
;
block
+=
line_size
;
}
}
/* next one assumes that ((line_size % 8) == 0) */
static
void
put_pixels8_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsavg
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
;
register
vector
unsigned
short
pixelssum1
,
pixelssum2
,
temp3
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vctwo
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
int
rightside
=
((
unsigned
long
)
block
&
0x0000000F
);
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum1
=
vec_add
(
pixelssum2
,
vctwo
);
pixelsavg
=
vec_packsu
(
temp3
,
(
vector
unsigned
short
)
vczero
);
if
(
rightside
)
{
blockv
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
blockv
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
/* next one assumes that ((line_size % 8) == 0) */
static
void
put_no_rnd_pixels8_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsavg
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
;
register
vector
unsigned
short
pixelssum1
,
pixelssum2
,
temp3
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcone
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
1
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vcone
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
int
rightside
=
((
unsigned
long
)
block
&
0x0000000F
);
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum1
=
vec_add
(
pixelssum2
,
vcone
);
pixelsavg
=
vec_packsu
(
temp3
,
(
vector
unsigned
short
)
vczero
);
if
(
rightside
)
{
blockv
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
blockv
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
/* next one assumes that ((line_size % 16) == 0) */
static
void
put_pixels16_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsv3
,
pixelsv4
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
;
register
vector
unsigned
short
temp3
,
temp4
,
pixelssum1
,
pixelssum2
,
pixelssum3
,
pixelssum4
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv3
=
vec_mergel
(
vczero
,
pixelsv1
);
pixelsv4
=
vec_mergel
(
vczero
,
pixelsv2
);
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum3
=
vec_add
((
vector
unsigned
short
)
pixelsv3
,
(
vector
unsigned
short
)
pixelsv4
);
pixelssum3
=
vec_add
(
pixelssum3
,
vctwo
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vctwo
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv3
=
vec_mergel
(
vczero
,
pixelsv1
);
pixelsv4
=
vec_mergel
(
vczero
,
pixelsv2
);
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum4
=
vec_add
((
vector
unsigned
short
)
pixelsv3
,
(
vector
unsigned
short
)
pixelsv4
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp4
=
vec_add
(
pixelssum3
,
pixelssum4
);
temp4
=
vec_sra
(
temp4
,
vctwo
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum3
=
vec_add
(
pixelssum4
,
vctwo
);
pixelssum1
=
vec_add
(
pixelssum2
,
vctwo
);
blockv
=
vec_packsu
(
temp3
,
temp4
);
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
/* next one assumes that ((line_size % 16) == 0) */
static
void
put_no_rnd_pixels16_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsv3
,
pixelsv4
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
;
register
vector
unsigned
short
temp3
,
temp4
,
pixelssum1
,
pixelssum2
,
pixelssum3
,
pixelssum4
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcone
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
1
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv3
=
vec_mergel
(
vczero
,
pixelsv1
);
pixelsv4
=
vec_mergel
(
vczero
,
pixelsv2
);
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum3
=
vec_add
((
vector
unsigned
short
)
pixelsv3
,
(
vector
unsigned
short
)
pixelsv4
);
pixelssum3
=
vec_add
(
pixelssum3
,
vcone
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vcone
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv3
=
vec_mergel
(
vczero
,
pixelsv1
);
pixelsv4
=
vec_mergel
(
vczero
,
pixelsv2
);
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum4
=
vec_add
((
vector
unsigned
short
)
pixelsv3
,
(
vector
unsigned
short
)
pixelsv4
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp4
=
vec_add
(
pixelssum3
,
pixelssum4
);
temp4
=
vec_sra
(
temp4
,
vctwo
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum3
=
vec_add
(
pixelssum4
,
vcone
);
pixelssum1
=
vec_add
(
pixelssum2
,
vcone
);
blockv
=
vec_packsu
(
temp3
,
temp4
);
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
static
int
hadamard8_diff8x8_altivec
(
/*MpegEncContext*/
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
){
static
int
hadamard8_diff8x8_altivec
(
/*MpegEncContext*/
void
*
s
,
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
){
int
sum
;
int
sum
;
register
const
vector
unsigned
char
vzero
=
register
const
vector
unsigned
char
vzero
=
...
@@ -1284,69 +936,6 @@ static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui
...
@@ -1284,69 +936,6 @@ static int hadamard8_diff16_altivec(/*MpegEncContext*/ void *s, uint8_t *dst, ui
return
score
;
return
score
;
}
}
/* next one assumes that ((line_size % 8) == 0) */
static
void
avg_pixels8_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsavg
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
,
blocktemp
;
register
vector
unsigned
short
pixelssum1
,
pixelssum2
,
temp3
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vctwo
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
int
rightside
=
((
unsigned
long
)
block
&
0x0000000F
);
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum1
=
vec_add
(
pixelssum2
,
vctwo
);
pixelsavg
=
vec_packsu
(
temp3
,
(
vector
unsigned
short
)
vczero
);
if
(
rightside
)
{
blocktemp
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
blocktemp
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
blockv
=
vec_avg
(
blocktemp
,
blockv
);
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
av_cold
void
ff_dsputil_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
av_cold
void
ff_dsputil_init_altivec
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
{
const
int
high_bit_depth
=
avctx
->
bits_per_raw_sample
>
8
;
const
int
high_bit_depth
=
avctx
->
bits_per_raw_sample
>
8
;
...
@@ -1367,16 +956,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx)
...
@@ -1367,16 +956,6 @@ av_cold void ff_dsputil_init_altivec(DSPContext *c, AVCodecContext *avctx)
if
(
!
high_bit_depth
)
{
if
(
!
high_bit_depth
)
{
c
->
get_pixels
=
get_pixels_altivec
;
c
->
get_pixels
=
get_pixels_altivec
;
c
->
clear_block
=
clear_block_altivec
;
c
->
clear_block
=
clear_block_altivec
;
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_altivec
;
/* the two functions do the same thing, so use the same code */
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_altivec
;
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_altivec
;
c
->
avg_pixels_tab
[
1
][
0
]
=
avg_pixels8_altivec
;
c
->
avg_pixels_tab
[
1
][
3
]
=
avg_pixels8_xy2_altivec
;
c
->
put_pixels_tab
[
1
][
3
]
=
put_pixels8_xy2_altivec
;
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
put_no_rnd_pixels8_xy2_altivec
;
c
->
put_pixels_tab
[
0
][
3
]
=
put_pixels16_xy2_altivec
;
c
->
put_no_rnd_pixels_tab
[
0
][
3
]
=
put_no_rnd_pixels16_xy2_altivec
;
}
}
c
->
hadamard8_diff
[
0
]
=
hadamard8_diff16_altivec
;
c
->
hadamard8_diff
[
0
]
=
hadamard8_diff16_altivec
;
...
...
libavcodec/ppc/hpeldsp_altivec.c
0 → 100644
View file @
47e5a981
/*
* Copyright (c) 2002 Brian Foley
* Copyright (c) 2002 Dieter Shirley
* Copyright (c) 2003-2004 Romain Dolbeau <romain@dolbeau.org>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "config.h"
#include "libavutil/cpu.h"
#include "libavcodec/hpeldsp.h"
#if HAVE_ALTIVEC
#if HAVE_ALTIVEC_H
#include <altivec.h>
#endif
#include "libavutil/ppc/types_altivec.h"
#include "libavutil/ppc/util_altivec.h"
#include "dsputil_altivec.h"
/* next one assumes that ((line_size % 16) == 0) */
void
ff_put_pixels16_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
vector
unsigned
char
pixelsv1
,
pixelsv2
;
register
vector
unsigned
char
pixelsv1B
,
pixelsv2B
;
register
vector
unsigned
char
pixelsv1C
,
pixelsv2C
;
register
vector
unsigned
char
pixelsv1D
,
pixelsv2D
;
register
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pixels
);
int
i
;
register
ptrdiff_t
line_size_2
=
line_size
<<
1
;
register
ptrdiff_t
line_size_3
=
line_size
+
line_size_2
;
register
ptrdiff_t
line_size_4
=
line_size
<<
2
;
// hand-unrolling the loop by 4 gains about 15%
// mininum execution time goes from 74 to 60 cycles
// it's faster than -funroll-loops, but using
// -funroll-loops w/ this is bad - 74 cycles again.
// all this is on a 7450, tuning for the 7450
for
(
i
=
0
;
i
<
h
;
i
+=
4
)
{
pixelsv1
=
vec_ld
(
0
,
pixels
);
pixelsv2
=
vec_ld
(
15
,
pixels
);
pixelsv1B
=
vec_ld
(
line_size
,
pixels
);
pixelsv2B
=
vec_ld
(
15
+
line_size
,
pixels
);
pixelsv1C
=
vec_ld
(
line_size_2
,
pixels
);
pixelsv2C
=
vec_ld
(
15
+
line_size_2
,
pixels
);
pixelsv1D
=
vec_ld
(
line_size_3
,
pixels
);
pixelsv2D
=
vec_ld
(
15
+
line_size_3
,
pixels
);
vec_st
(
vec_perm
(
pixelsv1
,
pixelsv2
,
perm
),
0
,
(
unsigned
char
*
)
block
);
vec_st
(
vec_perm
(
pixelsv1B
,
pixelsv2B
,
perm
),
line_size
,
(
unsigned
char
*
)
block
);
vec_st
(
vec_perm
(
pixelsv1C
,
pixelsv2C
,
perm
),
line_size_2
,
(
unsigned
char
*
)
block
);
vec_st
(
vec_perm
(
pixelsv1D
,
pixelsv2D
,
perm
),
line_size_3
,
(
unsigned
char
*
)
block
);
pixels
+=
line_size_4
;
block
+=
line_size_4
;
}
}
/* next one assumes that ((line_size % 16) == 0) */
#define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
void
ff_avg_pixels16_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsv
,
blockv
;
register
vector
unsigned
char
perm
=
vec_lvsl
(
0
,
pixels
);
int
i
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
pixelsv1
=
vec_ld
(
0
,
pixels
);
pixelsv2
=
vec_ld
(
16
,
pixels
);
blockv
=
vec_ld
(
0
,
block
);
pixelsv
=
vec_perm
(
pixelsv1
,
pixelsv2
,
perm
);
blockv
=
vec_avg
(
blockv
,
pixelsv
);
vec_st
(
blockv
,
0
,
(
unsigned
char
*
)
block
);
pixels
+=
line_size
;
block
+=
line_size
;
}
}
/* next one assumes that ((line_size % 8) == 0) */
static
void
avg_pixels8_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsv
,
blockv
;
int
i
;
for
(
i
=
0
;
i
<
h
;
i
++
)
{
/* block is 8 bytes-aligned, so we're either in the
left block (16 bytes-aligned) or in the right block (not) */
int
rightside
=
((
unsigned
long
)
block
&
0x0000000F
);
blockv
=
vec_ld
(
0
,
block
);
pixelsv1
=
vec_ld
(
0
,
pixels
);
pixelsv2
=
vec_ld
(
16
,
pixels
);
pixelsv
=
vec_perm
(
pixelsv1
,
pixelsv2
,
vec_lvsl
(
0
,
pixels
));
if
(
rightside
)
{
pixelsv
=
vec_perm
(
blockv
,
pixelsv
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
pixelsv
=
vec_perm
(
blockv
,
pixelsv
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
blockv
=
vec_avg
(
blockv
,
pixelsv
);
vec_st
(
blockv
,
0
,
block
);
pixels
+=
line_size
;
block
+=
line_size
;
}
}
/* next one assumes that ((line_size % 8) == 0) */
static
void
put_pixels8_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsavg
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
;
register
vector
unsigned
short
pixelssum1
,
pixelssum2
,
temp3
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vctwo
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
int
rightside
=
((
unsigned
long
)
block
&
0x0000000F
);
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum1
=
vec_add
(
pixelssum2
,
vctwo
);
pixelsavg
=
vec_packsu
(
temp3
,
(
vector
unsigned
short
)
vczero
);
if
(
rightside
)
{
blockv
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
blockv
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
/* next one assumes that ((line_size % 8) == 0) */
static
void
put_no_rnd_pixels8_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsavg
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
;
register
vector
unsigned
short
pixelssum1
,
pixelssum2
,
temp3
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcone
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
1
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vcone
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
int
rightside
=
((
unsigned
long
)
block
&
0x0000000F
);
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum1
=
vec_add
(
pixelssum2
,
vcone
);
pixelsavg
=
vec_packsu
(
temp3
,
(
vector
unsigned
short
)
vczero
);
if
(
rightside
)
{
blockv
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
blockv
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
/* next one assumes that ((line_size % 16) == 0) */
static
void
put_pixels16_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsv3
,
pixelsv4
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
;
register
vector
unsigned
short
temp3
,
temp4
,
pixelssum1
,
pixelssum2
,
pixelssum3
,
pixelssum4
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv3
=
vec_mergel
(
vczero
,
pixelsv1
);
pixelsv4
=
vec_mergel
(
vczero
,
pixelsv2
);
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum3
=
vec_add
((
vector
unsigned
short
)
pixelsv3
,
(
vector
unsigned
short
)
pixelsv4
);
pixelssum3
=
vec_add
(
pixelssum3
,
vctwo
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vctwo
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv3
=
vec_mergel
(
vczero
,
pixelsv1
);
pixelsv4
=
vec_mergel
(
vczero
,
pixelsv2
);
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum4
=
vec_add
((
vector
unsigned
short
)
pixelsv3
,
(
vector
unsigned
short
)
pixelsv4
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp4
=
vec_add
(
pixelssum3
,
pixelssum4
);
temp4
=
vec_sra
(
temp4
,
vctwo
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum3
=
vec_add
(
pixelssum4
,
vctwo
);
pixelssum1
=
vec_add
(
pixelssum2
,
vctwo
);
blockv
=
vec_packsu
(
temp3
,
temp4
);
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
/* next one assumes that ((line_size % 16) == 0) */
static
void
put_no_rnd_pixels16_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsv3
,
pixelsv4
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
;
register
vector
unsigned
short
temp3
,
temp4
,
pixelssum1
,
pixelssum2
,
pixelssum3
,
pixelssum4
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vcone
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
1
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv3
=
vec_mergel
(
vczero
,
pixelsv1
);
pixelsv4
=
vec_mergel
(
vczero
,
pixelsv2
);
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum3
=
vec_add
((
vector
unsigned
short
)
pixelsv3
,
(
vector
unsigned
short
)
pixelsv4
);
pixelssum3
=
vec_add
(
pixelssum3
,
vcone
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vcone
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv3
=
vec_mergel
(
vczero
,
pixelsv1
);
pixelsv4
=
vec_mergel
(
vczero
,
pixelsv2
);
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum4
=
vec_add
((
vector
unsigned
short
)
pixelsv3
,
(
vector
unsigned
short
)
pixelsv4
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp4
=
vec_add
(
pixelssum3
,
pixelssum4
);
temp4
=
vec_sra
(
temp4
,
vctwo
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum3
=
vec_add
(
pixelssum4
,
vcone
);
pixelssum1
=
vec_add
(
pixelssum2
,
vcone
);
blockv
=
vec_packsu
(
temp3
,
temp4
);
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
/* next one assumes that ((line_size % 8) == 0) */
static
void
avg_pixels8_xy2_altivec
(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
register
int
i
;
register
vector
unsigned
char
pixelsv1
,
pixelsv2
,
pixelsavg
;
register
vector
unsigned
char
blockv
,
temp1
,
temp2
,
blocktemp
;
register
vector
unsigned
short
pixelssum1
,
pixelssum2
,
temp3
;
register
const
vector
unsigned
char
vczero
=
(
const
vector
unsigned
char
)
vec_splat_u8
(
0
);
register
const
vector
unsigned
short
vctwo
=
(
const
vector
unsigned
short
)
vec_splat_u16
(
2
);
temp1
=
vec_ld
(
0
,
pixels
);
temp2
=
vec_ld
(
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
0
,
pixels
));
if
((((
unsigned
long
)
pixels
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum1
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
pixelssum1
=
vec_add
(
pixelssum1
,
vctwo
);
for
(
i
=
0
;
i
<
h
;
i
++
)
{
int
rightside
=
((
unsigned
long
)
block
&
0x0000000F
);
blockv
=
vec_ld
(
0
,
block
);
temp1
=
vec_ld
(
line_size
,
pixels
);
temp2
=
vec_ld
(
line_size
+
16
,
pixels
);
pixelsv1
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
,
pixels
));
if
(((((
unsigned
long
)
pixels
)
+
line_size
)
&
0x0000000F
)
==
0x0000000F
)
{
pixelsv2
=
temp2
;
}
else
{
pixelsv2
=
vec_perm
(
temp1
,
temp2
,
vec_lvsl
(
line_size
+
1
,
pixels
));
}
pixelsv1
=
vec_mergeh
(
vczero
,
pixelsv1
);
pixelsv2
=
vec_mergeh
(
vczero
,
pixelsv2
);
pixelssum2
=
vec_add
((
vector
unsigned
short
)
pixelsv1
,
(
vector
unsigned
short
)
pixelsv2
);
temp3
=
vec_add
(
pixelssum1
,
pixelssum2
);
temp3
=
vec_sra
(
temp3
,
vctwo
);
pixelssum1
=
vec_add
(
pixelssum2
,
vctwo
);
pixelsavg
=
vec_packsu
(
temp3
,
(
vector
unsigned
short
)
vczero
);
if
(
rightside
)
{
blocktemp
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
0
,
1
,
s0
,
s1
));
}
else
{
blocktemp
=
vec_perm
(
blockv
,
pixelsavg
,
vcprm
(
s0
,
s1
,
2
,
3
));
}
blockv
=
vec_avg
(
blocktemp
,
blockv
);
vec_st
(
blockv
,
0
,
block
);
block
+=
line_size
;
pixels
+=
line_size
;
}
}
#endif
/* HAVE_ALTIVEC */
av_cold
void
ff_hpeldsp_init_ppc
(
HpelDSPContext
*
c
,
int
flags
)
{
#if HAVE_ALTIVEC
int
mm_flags
=
av_get_cpu_flags
();
if
(
mm_flags
&
AV_CPU_FLAG_ALTIVEC
)
{
c
->
avg_pixels_tab
[
0
][
0
]
=
ff_avg_pixels16_altivec
;
c
->
avg_pixels_tab
[
1
][
0
]
=
avg_pixels8_altivec
;
c
->
avg_pixels_tab
[
1
][
3
]
=
avg_pixels8_xy2_altivec
;
c
->
put_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_altivec
;
c
->
put_pixels_tab
[
1
][
3
]
=
put_pixels8_xy2_altivec
;
c
->
put_pixels_tab
[
0
][
3
]
=
put_pixels16_xy2_altivec
;
c
->
put_no_rnd_pixels_tab
[
0
][
0
]
=
ff_put_pixels16_altivec
;
c
->
put_no_rnd_pixels_tab
[
1
][
3
]
=
put_no_rnd_pixels8_xy2_altivec
;
c
->
put_no_rnd_pixels_tab
[
0
][
3
]
=
put_no_rnd_pixels16_xy2_altivec
;
}
#endif
/* HAVE_ALTIVEC */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment