Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
659d4ba5
Commit
659d4ba5
authored
Feb 06, 2013
by
Daniel Kang
Committed by
Luca Barbato
Feb 06, 2013
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
dsputil: x86: Convert h263 loop filter to yasm
Signed-off-by:
Luca Barbato
<
lu_zero@gentoo.org
>
parent
12b54a1f
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
171 additions
and
177 deletions
+171
-177
dsputil.asm
libavcodec/x86/dsputil.asm
+163
-0
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+8
-177
No files found.
libavcodec/x86/dsputil.asm
View file @
659d4ba5
...
...
@@ -22,6 +22,8 @@
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
cextern
pb_FC
cextern
h263_loop_filter_strength
pb_f
:
times
16
db
15
pb_zzzzzzzz77777777
:
times
8
db
-
1
pb_7
:
times
8
db
7
...
...
@@ -648,3 +650,164 @@ BSWAP32_BUF
INIT_XMM
ssse3
BSWAP32_BUF
%macro
H263_LOOP_FILTER
5
pxor
m7
,
m7
mova
m0
,
[
%1
]
mova
m1
,
[
%1
]
mova
m2
,
[
%4
]
mova
m3
,
[
%4
]
punpcklbw
m0
,
m7
punpckhbw
m1
,
m7
punpcklbw
m2
,
m7
punpckhbw
m3
,
m7
psubw
m0
,
m2
psubw
m1
,
m3
mova
m2
,
[
%2
]
mova
m3
,
[
%2
]
mova
m4
,
[
%3
]
mova
m5
,
[
%3
]
punpcklbw
m2
,
m7
punpckhbw
m3
,
m7
punpcklbw
m4
,
m7
punpckhbw
m5
,
m7
psubw
m4
,
m2
psubw
m5
,
m3
psllw
m4
,
2
psllw
m5
,
2
paddw
m4
,
m0
paddw
m5
,
m1
pxor
m6
,
m6
pcmpgtw
m6
,
m4
pcmpgtw
m7
,
m5
pxor
m4
,
m6
pxor
m5
,
m7
psubw
m4
,
m6
psubw
m5
,
m7
psrlw
m4
,
3
psrlw
m5
,
3
packuswb
m4
,
m5
packsswb
m6
,
m7
pxor
m7
,
m7
movd
m2
,
%5
punpcklbw
m2
,
m2
punpcklbw
m2
,
m2
punpcklbw
m2
,
m2
psubusb
m2
,
m4
mova
m3
,
m2
psubusb
m3
,
m4
psubb
m2
,
m3
mova
m3
,
[
%2
]
mova
m4
,
[
%3
]
pxor
m3
,
m6
pxor
m4
,
m6
paddusb
m3
,
m2
psubusb
m4
,
m2
pxor
m3
,
m6
pxor
m4
,
m6
paddusb
m2
,
m2
packsswb
m0
,
m1
pcmpgtb
m7
,
m0
pxor
m0
,
m7
psubb
m0
,
m7
mova
m1
,
m0
psubusb
m0
,
m2
psubb
m1
,
m0
pand
m1
,
[
pb_FC
]
psrlw
m1
,
2
pxor
m1
,
m7
psubb
m1
,
m7
mova
m5
,
[
%1
]
mova
m6
,
[
%4
]
psubb
m5
,
m1
paddb
m6
,
m1
%endmacro
INIT_MMX
mmx
; void h263_v_loop_filter(uint8_t *src, int stride, int qscale)
cglobal
h263_v_loop_filter
,
3
,
5
movsxdifnidn
r1
,
r1d
movsxdifnidn
r2
,
r2d
lea
r4
,
[
ff_h263_loop_filter_strength
]
movzx
r3d
,
BYTE
[
r4
+
r2
]
movsx
r2
,
r3b
shl
r2
,
1
mov
r3
,
r0
sub
r3
,
r1
mov
r4
,
r3
sub
r4
,
r1
H263_LOOP_FILTER
r4
,
r3
,
r0
,
r0
+
r1
,
r2d
mova
[r3],
m3
mova
[r0],
m4
mova
[r4],
m5
mova
[
r0
+
r1
]
,
m6
RET
%macro
TRANSPOSE4X4
2
movd
m0
,
[
%1
]
movd
m1
,
[
%1
+
r1
]
movd
m2
,
[
%1
+
r1
*
2
]
movd
m3
,
[
%1
+
r3
]
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
mova
m1
,
m0
punpcklwd
m0
,
m2
punpckhwd
m1
,
m2
movd
[
%2
+
0
]
,
m0
punpckhdq
m0
,
m0
movd
[
%2
+
8
]
,
m0
movd
[
%2
+
16
]
,
m1
punpckhdq
m1
,
m1
movd
[
%2
+
24
]
,
m1
%endmacro
; void h263_h_loop_filter(uint8_t *src, int stride, int qscale)
INIT_MMX
mmx
cglobal
h263_h_loop_filter
,
3
,
5
,
0
,
32
movsxdifnidn
r1
,
r1d
movsxdifnidn
r2
,
r2d
lea
r4
,
[
ff_h263_loop_filter_strength
]
movzx
r3d
,
BYTE
[
r4
+
r2
]
movsx
r2
,
r3b
shl
r2
,
1
sub
r0
,
2
lea
r3
,
[
r1
*
3
]
TRANSPOSE4X4
r0
,
rsp
lea
r4
,
[
r0
+
r1
*
4
]
TRANSPOSE4X4
r4
,
rsp
+
4
H263_LOOP_FILTER
rsp
,
rsp
+
8
,
rsp
+
16
,
rsp
+
24
,
r2d
mova
m1
,
m5
mova
m0
,
m4
punpcklbw
m5
,
m3
punpcklbw
m4
,
m6
punpckhbw
m1
,
m3
punpckhbw
m0
,
m6
mova
m3
,
m5
mova
m6
,
m1
punpcklwd
m5
,
m4
punpcklwd
m1
,
m0
punpckhwd
m3
,
m4
punpckhwd
m6
,
m0
movd
[r0],
m5
punpckhdq
m5
,
m5
movd
[
r0
+
r1
*
1
]
,
m5
movd
[
r0
+
r1
*
2
]
,
m3
punpckhdq
m3
,
m3
movd
[
r0
+
r3
]
,
m3
movd
[r4],
m1
punpckhdq
m1
,
m1
movd
[
r4
+
r1
*
1
]
,
m1
movd
[
r4
+
r1
*
2
]
,
m6
punpckhdq
m6
,
m6
movd
[
r4
+
r3
]
,
m6
RET
libavcodec/x86/dsputil_mmx.c
View file @
659d4ba5
...
...
@@ -651,181 +651,12 @@ static void add_hfyu_median_prediction_cmov(uint8_t *dst, const uint8_t *top,
*
left_top
=
tl
;
}
#endif
#endif
/* HAVE_INLINE_ASM */
static
inline
void
transpose4x4
(
uint8_t
*
dst
,
uint8_t
*
src
,
x86_reg
dst_stride
,
x86_reg
src_stride
){
__asm__
volatile
(
//FIXME could save 1 instruction if done as 8x4 ...
"movd (%1), %%mm0
\n\t
"
"add %3, %1
\n\t
"
"movd (%1), %%mm1
\n\t
"
"movd (%1,%3,1), %%mm2
\n\t
"
"movd (%1,%3,2), %%mm3
\n\t
"
"punpcklbw %%mm1, %%mm0
\n\t
"
"punpcklbw %%mm3, %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"punpcklwd %%mm2, %%mm0
\n\t
"
"punpckhwd %%mm2, %%mm1
\n\t
"
"movd %%mm0, (%0)
\n\t
"
"add %2, %0
\n\t
"
"punpckhdq %%mm0, %%mm0
\n\t
"
"movd %%mm0, (%0)
\n\t
"
"movd %%mm1, (%0,%2,1)
\n\t
"
"punpckhdq %%mm1, %%mm1
\n\t
"
"movd %%mm1, (%0,%2,2)
\n\t
"
:
"+&r"
(
dst
),
"+&r"
(
src
)
:
"r"
(
dst_stride
),
"r"
(
src_stride
)
:
"memory"
);
}
#define H263_LOOP_FILTER \
"pxor %%mm7, %%mm7 \n\t" \
"movq %0, %%mm0 \n\t" \
"movq %0, %%mm1 \n\t" \
"movq %3, %%mm2 \n\t" \
"movq %3, %%mm3 \n\t" \
"punpcklbw %%mm7, %%mm0 \n\t" \
"punpckhbw %%mm7, %%mm1 \n\t" \
"punpcklbw %%mm7, %%mm2 \n\t" \
"punpckhbw %%mm7, %%mm3 \n\t" \
"psubw %%mm2, %%mm0 \n\t" \
"psubw %%mm3, %%mm1 \n\t" \
"movq %1, %%mm2 \n\t" \
"movq %1, %%mm3 \n\t" \
"movq %2, %%mm4 \n\t" \
"movq %2, %%mm5 \n\t" \
"punpcklbw %%mm7, %%mm2 \n\t" \
"punpckhbw %%mm7, %%mm3 \n\t" \
"punpcklbw %%mm7, %%mm4 \n\t" \
"punpckhbw %%mm7, %%mm5 \n\t" \
"psubw %%mm2, %%mm4 \n\t" \
"psubw %%mm3, %%mm5 \n\t" \
"psllw $2, %%mm4 \n\t" \
"psllw $2, %%mm5 \n\t" \
"paddw %%mm0, %%mm4 \n\t" \
"paddw %%mm1, %%mm5 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"pcmpgtw %%mm4, %%mm6 \n\t" \
"pcmpgtw %%mm5, %%mm7 \n\t" \
"pxor %%mm6, %%mm4 \n\t" \
"pxor %%mm7, %%mm5 \n\t" \
"psubw %%mm6, %%mm4 \n\t" \
"psubw %%mm7, %%mm5 \n\t" \
"psrlw $3, %%mm4 \n\t" \
"psrlw $3, %%mm5 \n\t" \
"packuswb %%mm5, %%mm4 \n\t" \
"packsswb %%mm7, %%mm6 \n\t" \
"pxor %%mm7, %%mm7 \n\t" \
"movd %4, %%mm2 \n\t" \
"punpcklbw %%mm2, %%mm2 \n\t" \
"punpcklbw %%mm2, %%mm2 \n\t" \
"punpcklbw %%mm2, %%mm2 \n\t" \
"psubusb %%mm4, %%mm2 \n\t" \
"movq %%mm2, %%mm3 \n\t" \
"psubusb %%mm4, %%mm3 \n\t" \
"psubb %%mm3, %%mm2 \n\t" \
"movq %1, %%mm3 \n\t" \
"movq %2, %%mm4 \n\t" \
"pxor %%mm6, %%mm3 \n\t" \
"pxor %%mm6, %%mm4 \n\t" \
"paddusb %%mm2, %%mm3 \n\t" \
"psubusb %%mm2, %%mm4 \n\t" \
"pxor %%mm6, %%mm3 \n\t" \
"pxor %%mm6, %%mm4 \n\t" \
"paddusb %%mm2, %%mm2 \n\t" \
"packsswb %%mm1, %%mm0 \n\t" \
"pcmpgtb %%mm0, %%mm7 \n\t" \
"pxor %%mm7, %%mm0 \n\t" \
"psubb %%mm7, %%mm0 \n\t" \
"movq %%mm0, %%mm1 \n\t" \
"psubusb %%mm2, %%mm0 \n\t" \
"psubb %%mm0, %%mm1 \n\t" \
"pand %5, %%mm1 \n\t" \
"psrlw $2, %%mm1 \n\t" \
"pxor %%mm7, %%mm1 \n\t" \
"psubb %%mm7, %%mm1 \n\t" \
"movq %0, %%mm5 \n\t" \
"movq %3, %%mm6 \n\t" \
"psubb %%mm1, %%mm5 \n\t" \
"paddb %%mm1, %%mm6 \n\t"
static
void
h263_v_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
qscale
)
{
if
(
CONFIG_H263_DECODER
||
CONFIG_H263_ENCODER
)
{
const
int
strength
=
ff_h263_loop_filter_strength
[
qscale
];
__asm__
volatile
(
H263_LOOP_FILTER
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %0
\n\t
"
"movq %%mm6, %3
\n\t
"
:
"+m"
(
*
(
uint64_t
*
)(
src
-
2
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
-
1
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
+
0
*
stride
)),
"+m"
(
*
(
uint64_t
*
)(
src
+
1
*
stride
))
:
"g"
(
2
*
strength
),
"m"
(
ff_pb_FC
)
);
}
}
static
void
h263_h_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
qscale
)
{
if
(
CONFIG_H263_DECODER
||
CONFIG_H263_ENCODER
)
{
const
int
strength
=
ff_h263_loop_filter_strength
[
qscale
];
DECLARE_ALIGNED
(
8
,
uint64_t
,
temp
)[
4
];
uint8_t
*
btemp
=
(
uint8_t
*
)
temp
;
src
-=
2
;
transpose4x4
(
btemp
,
src
,
8
,
stride
);
transpose4x4
(
btemp
+
4
,
src
+
4
*
stride
,
8
,
stride
);
__asm__
volatile
(
H263_LOOP_FILTER
// 5 3 4 6
:
"+m"
(
temp
[
0
]),
"+m"
(
temp
[
1
]),
"+m"
(
temp
[
2
]),
"+m"
(
temp
[
3
])
:
"g"
(
2
*
strength
),
"m"
(
ff_pb_FC
)
);
__asm__
volatile
(
"movq %%mm5, %%mm1
\n\t
"
"movq %%mm4, %%mm0
\n\t
"
"punpcklbw %%mm3, %%mm5
\n\t
"
"punpcklbw %%mm6, %%mm4
\n\t
"
"punpckhbw %%mm3, %%mm1
\n\t
"
"punpckhbw %%mm6, %%mm0
\n\t
"
"movq %%mm5, %%mm3
\n\t
"
"movq %%mm1, %%mm6
\n\t
"
"punpcklwd %%mm4, %%mm5
\n\t
"
"punpcklwd %%mm0, %%mm1
\n\t
"
"punpckhwd %%mm4, %%mm3
\n\t
"
"punpckhwd %%mm0, %%mm6
\n\t
"
"movd %%mm5, (%0)
\n\t
"
"punpckhdq %%mm5, %%mm5
\n\t
"
"movd %%mm5, (%0, %2)
\n\t
"
"movd %%mm3, (%0, %2, 2)
\n\t
"
"punpckhdq %%mm3, %%mm3
\n\t
"
"movd %%mm3, (%0, %3)
\n\t
"
"movd %%mm1, (%1)
\n\t
"
"punpckhdq %%mm1, %%mm1
\n\t
"
"movd %%mm1, (%1, %2)
\n\t
"
"movd %%mm6, (%1, %2, 2)
\n\t
"
"punpckhdq %%mm6, %%mm6
\n\t
"
"movd %%mm6, (%1, %3)
\n\t
"
::
"r"
(
src
),
"r"
(
src
+
4
*
stride
),
"r"
((
x86_reg
)
stride
),
"r"
((
x86_reg
)(
3
*
stride
))
);
}
}
void
ff_h263_v_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
qscale
);
void
ff_h263_h_loop_filter_mmx
(
uint8_t
*
src
,
int
stride
,
int
qscale
);
#if HAVE_INLINE_ASM
/* Draw the edges of width 'w' of an image of size width, height
* this MMX version can only handle w == 8 || w == 16. */
static
void
draw_edges_mmx
(
uint8_t
*
buf
,
int
wrap
,
int
width
,
int
height
,
...
...
@@ -1653,14 +1484,14 @@ static av_cold void dsputil_init_mmx(DSPContext *c, AVCodecContext *avctx,
c
->
gmc
=
gmc_mmx
;
c
->
add_bytes
=
add_bytes_mmx
;
#endif
/* HAVE_INLINE_ASM */
#if HAVE_YASM
if
(
CONFIG_H263_DECODER
||
CONFIG_H263_ENCODER
)
{
c
->
h263_v_loop_filter
=
h263_v_loop_filter_mmx
;
c
->
h263_h_loop_filter
=
h263_h_loop_filter_mmx
;
c
->
h263_v_loop_filter
=
ff_
h263_v_loop_filter_mmx
;
c
->
h263_h_loop_filter
=
ff_
h263_h_loop_filter_mmx
;
}
#endif
/* HAVE_INLINE_ASM */
#if HAVE_YASM
c
->
vector_clip_int32
=
ff_vector_clip_int32_mmx
;
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment