Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
1dae7ffa
Commit
1dae7ffa
authored
Nov 28, 2016
by
James Darnley
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/h264: mmx 4:2:2 idct add8 function
2.87 times faster (1830 vs. 638 cycles)
parent
815ea8c6
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
38 additions
and
1 deletion
+38
-1
h264_idct.asm
libavcodec/x86/h264_idct.asm
+32
-0
h264dsp_init.c
libavcodec/x86/h264dsp_init.c
+6
-1
No files found.
libavcodec/x86/h264_idct.asm
View file @
1dae7ffa
...
...
@@ -697,6 +697,38 @@ cglobal h264_idct_add8_8, 5, 8 + npicregs, 0, dst1, block_offset, block, stride,
call
h264_idct_add8_mmx_plane
RET
cglobal
h264_idct_add8_422_8
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; dst1, block_offset, block, stride, nnzc, cntr, coeff, dst2, picreg
movsxdifnidn
r3
,
r3d
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
%endif
%if
ARCH_X86_64
mov
dst2q
,
r0
%endif
mov
r5
,
16
; i
add
r2
,
512
; i * 16 * sizeof(dctcoef) ; #define dctcoef int16_t
call
h264_idct_add8_mmx_plane
add
r5
,
4
call
h264_idct_add8_mmx_plane
%if
ARCH_X86_64
add
dst2q
,
gprsize
; dest[1]
%else
add
r0mp
,
gprsize
%endif
add
r5
,
4
; set to 32
add
r2
,
256
; set to i * 16 * sizeof(dctcoef)
call
h264_idct_add8_mmx_plane
add
r5
,
4
call
h264_idct_add8_mmx_plane
RET
h264_idct_add8_mmxext_plane
:
movsxdifnidn
r3
,
r3d
.
nextblock
:
...
...
libavcodec/x86/h264dsp_init.c
View file @
1dae7ffa
...
...
@@ -78,6 +78,8 @@ IDCT_ADD_REP_FUNC2(, 8, 8, sse2)
IDCT_ADD_REP_FUNC2
(,
8
,
10
,
sse2
)
IDCT_ADD_REP_FUNC2
(,
8
,
10
,
avx
)
IDCT_ADD_REP_FUNC2
(,
8
_422
,
8
,
mmx
)
void
ff_h264_luma_dc_dequant_idct_mmx
(
int16_t
*
output
,
int16_t
*
input
,
int
qmul
);
void
ff_h264_luma_dc_dequant_idct_sse2
(
int16_t
*
output
,
int16_t
*
input
,
int
qmul
);
...
...
@@ -228,8 +230,11 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
c
->
h264_idct_add16
=
ff_h264_idct_add16_8_mmx
;
c
->
h264_idct8_add4
=
ff_h264_idct8_add4_8_mmx
;
if
(
chroma_format_idc
<=
1
)
if
(
chroma_format_idc
<=
1
)
{
c
->
h264_idct_add8
=
ff_h264_idct_add8_8_mmx
;
}
else
{
c
->
h264_idct_add8
=
ff_h264_idct_add8_422_8_mmx
;
}
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_8_mmx
;
if
(
cpu_flags
&
AV_CPU_FLAG_CMOV
)
c
->
h264_luma_dc_dequant_idct
=
ff_h264_luma_dc_dequant_idct_mmx
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment