Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
838abfc1
Commit
838abfc1
authored
Jan 31, 2016
by
Timothy Gu
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: vc1dsp: Convert vc1_inv_trans_*_dc to NASM format
parent
b62825a4
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
111 additions
and
207 deletions
+111
-207
vc1dsp.asm
libavcodec/x86/vc1dsp.asm
+98
-0
vc1dsp_init.c
libavcodec/x86/vc1dsp_init.c
+13
-0
vc1dsp_mmx.c
libavcodec/x86/vc1dsp_mmx.c
+0
-207
No files found.
libavcodec/x86/vc1dsp.asm
View file @
838abfc1
...
@@ -395,3 +395,101 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
...
@@ -395,3 +395,101 @@ cglobal vc1_put_ver_16b_shift2, 4,7,0, dst, src, stride
jnz
.
loop
jnz
.
loop
REP_RET
REP_RET
%endif
; HAVE_MMX_INLINE
%endif
; HAVE_MMX_INLINE
%macro
INV_TRANS_INIT
0
movsxdifnidn
linesizeq
,
linesized
movd
m0
,
blockd
SPLATW
m0
,
m0
pxor
m1
,
m1
psubw
m1
,
m0
packuswb
m0
,
m0
packuswb
m1
,
m1
DEFINE_ARGS
dest
,
linesize
,
linesize3
lea
linesize3q
,
[
linesizeq
*
3
]
%endmacro
%macro
INV_TRANS_PROCESS
1
mov%1
m2
,
[
destq
+
linesizeq
*
0
]
mov%1
m3
,
[
destq
+
linesizeq
*
1
]
mov%1
m4
,
[
destq
+
linesizeq
*
2
]
mov%1
m5
,
[
destq
+
linesize3q
]
paddusb
m2
,
m0
paddusb
m3
,
m0
paddusb
m4
,
m0
paddusb
m5
,
m0
psubusb
m2
,
m1
psubusb
m3
,
m1
psubusb
m4
,
m1
psubusb
m5
,
m1
mov%1
[
linesizeq
*
0
+
destq
]
,
m2
mov%1
[
linesizeq
*
1
+
destq
]
,
m3
mov%1
[
linesizeq
*
2
+
destq
]
,
m4
mov%1
[
linesize3q
+
destq
]
,
m5
%endmacro
; ff_vc1_inv_trans_?x?_dc_mmxext(uint8_t *dest, int linesize, int16_t *block)
INIT_MMX
mmxext
cglobal
vc1_inv_trans_4x4_dc
,
3
,
4
,
0
,
dest
,
linesize
,
block
movsx
r3d
,
WORD
[blockq]
mov
blockd
,
r3d
; dc
shl
blockd
,
4
; 16 * dc
lea
blockd
,
[
blockq
+
r3
+
4
]
; 17 * dc + 4
sar
blockd
,
3
; >> 3
mov
r3d
,
blockd
; dc
shl
blockd
,
4
; 16 * dc
lea
blockd
,
[
blockq
+
r3
+
64
]
; 17 * dc + 64
sar
blockd
,
7
; >> 7
INV_TRANS_INIT
INV_TRANS_PROCESS
h
RET
INIT_MMX
mmxext
cglobal
vc1_inv_trans_4x8_dc
,
3
,
4
,
0
,
dest
,
linesize
,
block
movsx
r3d
,
WORD
[blockq]
mov
blockd
,
r3d
; dc
shl
blockd
,
4
; 16 * dc
lea
blockd
,
[
blockq
+
r3
+
4
]
; 17 * dc + 4
sar
blockd
,
3
; >> 3
shl
blockd
,
2
; 4 * dc
lea
blockd
,
[
blockq
*
3
+
64
]
; 12 * dc + 64
sar
blockd
,
7
; >> 7
INV_TRANS_INIT
INV_TRANS_PROCESS
h
lea
destq
,
[
destq
+
linesizeq
*
4
]
INV_TRANS_PROCESS
h
RET
INIT_MMX
mmxext
cglobal
vc1_inv_trans_8x4_dc
,
3
,
4
,
0
,
dest
,
linesize
,
block
movsx
blockd
,
WORD
[blockq]
; dc
lea
blockd
,
[
blockq
*
3
+
1
]
; 3 * dc + 1
sar
blockd
,
1
; >> 1
mov
r3d
,
blockd
; dc
shl
blockd
,
4
; 16 * dc
lea
blockd
,
[
blockq
+
r3
+
64
]
; 17 * dc + 64
sar
blockd
,
7
; >> 7
INV_TRANS_INIT
INV_TRANS_PROCESS
a
RET
INIT_MMX
mmxext
cglobal
vc1_inv_trans_8x8_dc
,
3
,
3
,
0
,
dest
,
linesize
,
block
movsx
blockd
,
WORD
[blockq]
; dc
lea
blockd
,
[
blockq
*
3
+
1
]
; 3 * dc + 1
sar
blockd
,
1
; >> 1
lea
blockd
,
[
blockq
*
3
+
16
]
; 3 * dc + 16
sar
blockd
,
5
; >> 5
INV_TRANS_INIT
INV_TRANS_PROCESS
a
lea
destq
,
[
destq
+
linesizeq
*
4
]
INV_TRANS_PROCESS
a
RET
libavcodec/x86/vc1dsp_init.c
View file @
838abfc1
...
@@ -92,6 +92,14 @@ void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
...
@@ -92,6 +92,14 @@ void ff_put_vc1_chroma_mc8_nornd_ssse3(uint8_t *dst, uint8_t *src,
int
stride
,
int
h
,
int
x
,
int
y
);
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_vc1_chroma_mc8_nornd_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
void
ff_avg_vc1_chroma_mc8_nornd_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_vc1_inv_trans_4x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
);
void
ff_vc1_inv_trans_4x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
);
void
ff_vc1_inv_trans_8x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
);
void
ff_vc1_inv_trans_8x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
);
av_cold
void
ff_vc1dsp_init_x86
(
VC1DSPContext
*
dsp
)
av_cold
void
ff_vc1dsp_init_x86
(
VC1DSPContext
*
dsp
)
...
@@ -130,6 +138,11 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
...
@@ -130,6 +138,11 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
dsp
->
avg_vc1_mspel_pixels_tab
[
1
][
0
]
=
avg_vc1_mspel_mc00_8_mmxext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
1
][
0
]
=
avg_vc1_mspel_mc00_8_mmxext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
0
][
0
]
=
avg_vc1_mspel_mc00_16_mmxext
;
dsp
->
avg_vc1_mspel_pixels_tab
[
0
][
0
]
=
avg_vc1_mspel_mc00_16_mmxext
;
dsp
->
vc1_inv_trans_8x8_dc
=
ff_vc1_inv_trans_8x8_dc_mmxext
;
dsp
->
vc1_inv_trans_4x8_dc
=
ff_vc1_inv_trans_4x8_dc_mmxext
;
dsp
->
vc1_inv_trans_8x4_dc
=
ff_vc1_inv_trans_8x4_dc_mmxext
;
dsp
->
vc1_inv_trans_4x4_dc
=
ff_vc1_inv_trans_4x4_dc_mmxext
;
}
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
dsp
->
vc1_v_loop_filter8
=
ff_vc1_v_loop_filter8_sse2
;
dsp
->
vc1_v_loop_filter8
=
ff_vc1_v_loop_filter8_sse2
;
...
...
libavcodec/x86/vc1dsp_mmx.c
View file @
838abfc1
...
@@ -481,208 +481,6 @@ DECLARE_FUNCTION(3, 1)
...
@@ -481,208 +481,6 @@ DECLARE_FUNCTION(3, 1)
DECLARE_FUNCTION
(
3
,
2
)
DECLARE_FUNCTION
(
3
,
2
)
DECLARE_FUNCTION
(
3
,
3
)
DECLARE_FUNCTION
(
3
,
3
)
static
void
vc1_inv_trans_4x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
17
*
dc
+
4
)
>>
3
;
dc
=
(
17
*
dc
+
64
)
>>
7
;
__asm__
volatile
(
"movd %0, %%mm0
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
__asm__
volatile
(
"movd %0, %%mm2
\n\t
"
"movd %1, %%mm3
\n\t
"
"movd %2, %%mm4
\n\t
"
"movd %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movd %%mm2, %0
\n\t
"
"movd %%mm3, %1
\n\t
"
"movd %%mm4, %2
\n\t
"
"movd %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
}
static
void
vc1_inv_trans_4x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
17
*
dc
+
4
)
>>
3
;
dc
=
(
12
*
dc
+
64
)
>>
7
;
__asm__
volatile
(
"movd %0, %%mm0
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
__asm__
volatile
(
"movd %0, %%mm2
\n\t
"
"movd %1, %%mm3
\n\t
"
"movd %2, %%mm4
\n\t
"
"movd %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movd %%mm2, %0
\n\t
"
"movd %%mm3, %1
\n\t
"
"movd %%mm4, %2
\n\t
"
"movd %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
dest
+=
4
*
linesize
;
__asm__
volatile
(
"movd %0, %%mm2
\n\t
"
"movd %1, %%mm3
\n\t
"
"movd %2, %%mm4
\n\t
"
"movd %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movd %%mm2, %0
\n\t
"
"movd %%mm3, %1
\n\t
"
"movd %%mm4, %2
\n\t
"
"movd %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
}
static
void
vc1_inv_trans_8x4_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
3
*
dc
+
1
)
>>
1
;
dc
=
(
17
*
dc
+
64
)
>>
7
;
__asm__
volatile
(
"movd %0, %%mm0
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
__asm__
volatile
(
"movq %0, %%mm2
\n\t
"
"movq %1, %%mm3
\n\t
"
"movq %2, %%mm4
\n\t
"
"movq %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movq %%mm2, %0
\n\t
"
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
}
static
void
vc1_inv_trans_8x8_dc_mmxext
(
uint8_t
*
dest
,
int
linesize
,
int16_t
*
block
)
{
int
dc
=
block
[
0
];
dc
=
(
3
*
dc
+
1
)
>>
1
;
dc
=
(
3
*
dc
+
16
)
>>
5
;
__asm__
volatile
(
"movd %0, %%mm0
\n\t
"
"pshufw $0, %%mm0, %%mm0
\n\t
"
"pxor %%mm1, %%mm1
\n\t
"
"psubw %%mm0, %%mm1
\n\t
"
"packuswb %%mm0, %%mm0
\n\t
"
"packuswb %%mm1, %%mm1
\n\t
"
::
"r"
(
dc
)
);
__asm__
volatile
(
"movq %0, %%mm2
\n\t
"
"movq %1, %%mm3
\n\t
"
"movq %2, %%mm4
\n\t
"
"movq %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movq %%mm2, %0
\n\t
"
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
dest
+=
4
*
linesize
;
__asm__
volatile
(
"movq %0, %%mm2
\n\t
"
"movq %1, %%mm3
\n\t
"
"movq %2, %%mm4
\n\t
"
"movq %3, %%mm5
\n\t
"
"paddusb %%mm0, %%mm2
\n\t
"
"paddusb %%mm0, %%mm3
\n\t
"
"paddusb %%mm0, %%mm4
\n\t
"
"paddusb %%mm0, %%mm5
\n\t
"
"psubusb %%mm1, %%mm2
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm1, %%mm5
\n\t
"
"movq %%mm2, %0
\n\t
"
"movq %%mm3, %1
\n\t
"
"movq %%mm4, %2
\n\t
"
"movq %%mm5, %3
\n\t
"
:
"+m"
(
*
(
uint32_t
*
)(
dest
+
0
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
1
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
2
*
linesize
)),
"+m"
(
*
(
uint32_t
*
)(
dest
+
3
*
linesize
))
);
}
#define FN_ASSIGN(OP, X, Y, INSN) \
#define FN_ASSIGN(OP, X, Y, INSN) \
dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = OP##vc1_mspel_mc##X##Y##INSN; \
dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN
dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = OP##vc1_mspel_mc##X##Y##_16##INSN
...
@@ -729,10 +527,5 @@ av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
...
@@ -729,10 +527,5 @@ av_cold void ff_vc1dsp_init_mmxext(VC1DSPContext *dsp)
FN_ASSIGN
(
avg_
,
3
,
1
,
_mmxext
);
FN_ASSIGN
(
avg_
,
3
,
1
,
_mmxext
);
FN_ASSIGN
(
avg_
,
3
,
2
,
_mmxext
);
FN_ASSIGN
(
avg_
,
3
,
2
,
_mmxext
);
FN_ASSIGN
(
avg_
,
3
,
3
,
_mmxext
);
FN_ASSIGN
(
avg_
,
3
,
3
,
_mmxext
);
dsp
->
vc1_inv_trans_8x8_dc
=
vc1_inv_trans_8x8_dc_mmxext
;
dsp
->
vc1_inv_trans_4x8_dc
=
vc1_inv_trans_4x8_dc_mmxext
;
dsp
->
vc1_inv_trans_8x4_dc
=
vc1_inv_trans_8x4_dc_mmxext
;
dsp
->
vc1_inv_trans_4x4_dc
=
vc1_inv_trans_4x4_dc_mmxext
;
}
}
#endif
/* HAVE_6REGS && HAVE_INLINE_ASM && HAVE_MMX_EXTERNAL */
#endif
/* HAVE_6REGS && HAVE_INLINE_ASM && HAVE_MMX_EXTERNAL */
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment