Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
5705b020
Commit
5705b020
authored
May 11, 2011
by
Jason Garrett-Glaser
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
10-bit H.264 x86 chroma v loopfilter asm
Also delete some unused deblock asm macros.
parent
9aa91043
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
121 additions
and
42 deletions
+121
-42
h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+0
-41
h264_deblock_10bit.asm
libavcodec/x86/h264_deblock_10bit.asm
+106
-0
h264dsp_mmx.c
libavcodec/x86/h264dsp_mmx.c
+15
-1
No files found.
libavcodec/x86/h264_deblock.asm
View file @
5705b020
...
...
@@ -106,47 +106,6 @@ cextern pb_A1
TRANSPOSE4x8_LOAD
bw
,
wd
,
dq
,
%1
,
%2
,
%3
,
%4
,
%5
,
%6
,
%7
,
%8
%endmacro
%macro
TRANSPOSE4x8W_LOAD
8
%if
mmsize
==
16
TRANSPOSE4x8_LOAD
wd
,
dq
,
qdq
,
%1
,
%2
,
%3
,
%4
,
%5
,
%6
,
%7
,
%8
%else
SWAP
1
,
4
,
2
,
3
mova
m0
,
[t5]
mova
m1
,
[
t5
+
r1
]
mova
m2
,
[
t5
+
r1
*
2
]
mova
m3
,
[
t5
+
t6
]
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
%endif
%endmacro
%macro
TRANSPOSE8x2W_STORE
8
punpckhwd
m0
,
m1
,
m2
punpcklwd
m1
,
m2
%if
mmsize
==
8
movd
%3
,
m0
movd
%1
,
m1
psrlq
m1
,
32
psrlq
m0
,
32
movd
%2
,
m1
movd
%4
,
m0
%else
movd
%5
,
m0
movd
%1
,
m1
psrldq
m1
,
4
psrldq
m0
,
4
movd
%2
,
m1
movd
%6
,
m0
psrldq
m1
,
4
psrldq
m0
,
4
movd
%3
,
m1
movd
%7
,
m0
psrldq
m1
,
4
psrldq
m0
,
4
movd
%4
,
m1
movd
%8
,
m0
%endif
%endmacro
%macro
SBUTTERFLY3
4
punpckh%1
%4
,
%2
,
%3
punpckl%1
%2
,
%3
...
...
libavcodec/x86/h264_deblock_10bit.asm
View file @
5705b020
...
...
@@ -34,6 +34,7 @@ pw_pixel_max: times 8 dw ((1 << 10)-1)
SECTION
.
text
cextern
pw_2
cextern
pw_3
cextern
pw_4
; out: %4 = |%1-%2|-%3
...
...
@@ -802,3 +803,108 @@ INIT_AVX
DEBLOCK_LUMA
avx
DEBLOCK_LUMA_INTRA
avx
%endif
; in: %1=p0, %2=q0, %3=p1, %4=q1, %5=mask, %6=tmp, %7=tmp
; out: %1=p0', %2=q0'
%macro
CHROMA_DEBLOCK_P0_Q0_INTRA
7
mova
%6
,
[
pw_2
]
paddw
%6
,
%3
paddw
%6
,
%4
paddw
%7
,
%6
,
%2
paddw
%6
,
%1
paddw
%6
,
%3
paddw
%7
,
%4
psraw
%6
,
2
psraw
%7
,
2
psubw
%6
,
%1
psubw
%7
,
%2
pand
%6
,
%5
pand
%7
,
%5
paddw
%1
,
%6
paddw
%2
,
%7
%endmacro
%macro
CHROMA_V_LOAD
1
mova
m0
,
[r0]
; p1
mova
m1
,
[
r0
+
r1
]
; p0
mova
m2
,
[
%1
]
; q0
mova
m3
,
[
%1
+
r1
]
; q1
%endmacro
%macro
CHROMA_V_STORE
0
mova
[
r0
+
1
*
r1
]
,
m1
mova
[
r0
+
2
*
r1
]
,
m2
%endmacro
%macro
DEBLOCK_CHROMA
1
;-----------------------------------------------------------------------------
; void deblock_v_chroma( uint16_t *pix, int stride, int alpha, int beta, int8_t *tc0 )
;-----------------------------------------------------------------------------
cglobal
deblock_v_chroma_10_
%1
,
5
,
7
-
(
mmsize
/
16
),
8
*
(
mmsize
/
16
)
mov
r5
,
r0
sub
r0
,
r1
sub
r0
,
r1
shl
r2d
,
2
shl
r3d
,
2
%if
mmsize
<
16
mov
r6
,
16
/
mmsize
.
loop
:
%endif
CHROMA_V_LOAD
r5
LOAD_AB
m4
,
m5
,
r2
,
r3
LOAD_MASK
m0
,
m1
,
m2
,
m3
,
m4
,
m5
,
m7
,
m6
,
m4
pxor
m4
,
m4
LOAD_TC
m6
,
r4
psubw
m6
,
[
pw_3
]
pmaxsw
m6
,
m4
pand
m7
,
m6
DEBLOCK_P0_Q0
m1
,
m2
,
m0
,
m3
,
m7
,
m5
,
m6
CHROMA_V_STORE
%if
mmsize
<
16
add
r0
,
mmsize
add
r5
,
mmsize
add
r4
,
mmsize
/
8
dec
r6
jg
.
loop
REP_RET
%else
RET
%endif
;-----------------------------------------------------------------------------
; void deblock_v_chroma_intra( uint16_t *pix, int stride, int alpha, int beta )
;-----------------------------------------------------------------------------
cglobal
deblock_v_chroma_intra_10_
%1
,
4
,
6
-
(
mmsize
/
16
),
8
*
(
mmsize
/
16
)
mov
r4
,
r0
sub
r0
,
r1
sub
r0
,
r1
shl
r2d
,
2
shl
r3d
,
2
%if
mmsize
<
16
mov
r5
,
16
/
mmsize
.
loop
:
%endif
CHROMA_V_LOAD
r4
LOAD_AB
m4
,
m5
,
r2
,
r3
LOAD_MASK
m0
,
m1
,
m2
,
m3
,
m4
,
m5
,
m7
,
m6
,
m4
CHROMA_DEBLOCK_P0_Q0_INTRA
m1
,
m2
,
m0
,
m3
,
m7
,
m5
,
m6
CHROMA_V_STORE
%if
mmsize
<
16
add
r0
,
mmsize
add
r4
,
mmsize
dec
r5
jg
.
loop
REP_RET
%else
RET
%endif
%endmacro
%ifndef
ARCH_X86_64
INIT_MMX
DEBLOCK_CHROMA
mmxext
%endif
INIT_XMM
DEBLOCK_CHROMA
sse2
INIT_AVX
DEBLOCK_CHROMA
avx
libavcodec/x86/h264dsp_mmx.c
View file @
5705b020
...
...
@@ -236,10 +236,18 @@ LF_FUNC (h, luma, depth, sse2)\
LF_IFUNC(h, luma_intra, depth, sse2)\
LF_FUNC (v, luma, depth, sse2)\
LF_IFUNC(v, luma_intra, depth, sse2)\
LF_FUNC (h, chroma, depth, sse2)\
LF_IFUNC(h, chroma_intra, depth, sse2)\
LF_FUNC (v, chroma, depth, sse2)\
LF_IFUNC(v, chroma_intra, depth, sse2)\
LF_FUNC (h, luma, depth, avx)\
LF_IFUNC(h, luma_intra, depth, avx)\
LF_FUNC (v, luma, depth, avx)\
LF_IFUNC(v, luma_intra, depth, avx)
LF_IFUNC(v, luma_intra, depth, avx)\
LF_FUNC (h, chroma, depth, avx)\
LF_IFUNC(h, chroma_intra, depth, avx)\
LF_FUNC (v, chroma, depth, avx)\
LF_IFUNC(v, chroma_intra, depth, avx)
LF_FUNCS
(
uint8_t
,
8
)
LF_FUNCS
(
uint16_t
,
10
)
...
...
@@ -401,12 +409,16 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
if
(
mm_flags
&
AV_CPU_FLAG_MMX
)
{
if
(
mm_flags
&
AV_CPU_FLAG_MMX2
)
{
#if ARCH_X86_32
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_10_mmxext
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_deblock_v_chroma_intra_10_mmxext
;
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_10_mmxext
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_10_mmxext
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_10_mmxext
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_10_mmxext
;
#endif
if
(
mm_flags
&
AV_CPU_FLAG_SSE2
)
{
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_10_sse2
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_deblock_v_chroma_intra_10_sse2
;
#if HAVE_ALIGNED_STACK
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_10_sse2
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_10_sse2
;
...
...
@@ -415,6 +427,8 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
#endif
}
if
(
mm_flags
&
AV_CPU_FLAG_AVX
)
{
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_10_avx
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_deblock_v_chroma_intra_10_avx
;
#if HAVE_ALIGNED_STACK
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_10_avx
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_10_avx
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment