Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
7042a55c
Commit
7042a55c
authored
Jan 13, 2016
by
James Darnley
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/h264: mmxext 4:2:2 chroma deblock/loop filter
2.6 times faster (366 vs. 142 cycles)
parent
95564466
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
47 additions
and
3 deletions
+47
-3
h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+43
-3
h264dsp_init.c
libavcodec/x86/h264dsp_init.c
+4
-0
No files found.
libavcodec/x86/h264_deblock.asm
View file @
7042a55c
...
@@ -864,7 +864,50 @@ ff_chroma_inter_body_mmxext:
...
@@ -864,7 +864,50 @@ ff_chroma_inter_body_mmxext:
DEBLOCK_P0_Q0
DEBLOCK_P0_Q0
ret
ret
%define
t5
r4
%define
t6
r5
cglobal
deblock_h_chroma422_8
,
5
,
6
,
0
,
0
-
(
1
+
ARCH_X86_64
*
2
)
*
mmsize
%
if
ARCH_X86_64
%
define
buf0
[
rsp
+
16
]
%
define
buf1
[
rsp
+
8
]
%
else
%
define
buf0
r0m
%
define
buf1
r2m
%
endif
movd
m6
,
[r4]
punpcklbw
m6
,
m6
movq
[rsp],
m6
CHROMA_H_START
TRANSPOSE4x8B_LOAD
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
movq
buf0
,
m0
movq
buf1
,
m3
LOAD_MASK
r2d
,
r3d
movd
m6
,
[rsp]
punpcklwd
m6
,
m6
pand
m7
,
m6
DEBLOCK_P0_Q0
movq
m0
,
buf0
movq
m3
,
buf1
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
lea
r0
,
[
r0
+
r1
*
8
]
lea
t5
,
[
t5
+
r1
*
8
]
TRANSPOSE4x8B_LOAD
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
movq
buf0
,
m0
movq
buf1
,
m3
LOAD_MASK
r2d
,
r3d
movd
m6
,
[
rsp
+
4
]
punpcklwd
m6
,
m6
pand
m7
,
m6
DEBLOCK_P0_Q0
movq
m0
,
buf0
movq
m3
,
buf1
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
RET
; in: %1=p0 %2=p1 %3=q1
; in: %1=p0 %2=p1 %3=q1
; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
; out: p0 = (p0 + q1 + 2*p1 + 2) >> 2
...
@@ -877,9 +920,6 @@ ff_chroma_inter_body_mmxext:
...
@@ -877,9 +920,6 @@ ff_chroma_inter_body_mmxext:
pavgb
%1
,
%2
; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
pavgb
%1
,
%2
; dst = avg(p1, avg(p0,q1) - ((p0^q1)&1))
%endmacro
%endmacro
%define
t5
r4
%define
t6
r5
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
; void ff_deblock_v_chroma_intra(uint8_t *pix, int stride, int alpha, int beta)
;------------------------------------------------------------------------------
;------------------------------------------------------------------------------
...
...
libavcodec/x86/h264dsp_init.c
View file @
7042a55c
...
@@ -129,6 +129,8 @@ LF_IFUNC(v, chroma_intra, depth, avx)
...
@@ -129,6 +129,8 @@ LF_IFUNC(v, chroma_intra, depth, avx)
LF_FUNCS
(
uint8_t
,
8
)
LF_FUNCS
(
uint8_t
,
8
)
LF_FUNCS
(
uint16_t
,
10
)
LF_FUNCS
(
uint16_t
,
10
)
void
ff_deblock_h_chroma422_8_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
);
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
LF_FUNC
(
v8
,
luma
,
8
,
mmxext
)
LF_FUNC
(
v8
,
luma
,
8
,
mmxext
)
static
void
deblock_v_luma_8_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
static
void
deblock_v_luma_8_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
...
@@ -245,6 +247,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
...
@@ -245,6 +247,8 @@ av_cold void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth,
if
(
chroma_format_idc
<=
1
)
{
if
(
chroma_format_idc
<=
1
)
{
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_mmxext
;
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_8_mmxext
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_deblock_h_chroma_intra_8_mmxext
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_deblock_h_chroma_intra_8_mmxext
;
}
else
{
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma422_8_mmxext
;
}
}
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
#if ARCH_X86_32 && HAVE_MMXEXT_EXTERNAL
c
->
h264_v_loop_filter_luma
=
deblock_v_luma_8_mmxext
;
c
->
h264_v_loop_filter_luma
=
deblock_v_luma_8_mmxext
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment