Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
8ad77b65
Commit
8ad77b65
authored
May 10, 2011
by
Jason Garrett-Glaser
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update x86 H.264 deblock asm
Includes AVX versions from x264.
parent
b6675279
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
277 additions
and
193 deletions
+277
-193
h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+227
-168
h264dsp_mmx.c
libavcodec/x86/h264dsp_mmx.c
+34
-22
x86util.asm
libavcodec/x86/x86util.asm
+16
-3
No files found.
libavcodec/x86/h264_deblock.asm
View file @
8ad77b65
This diff is collapsed.
Click to expand it.
libavcodec/x86/h264dsp_mmx.c
View file @
8ad77b65
...
@@ -219,10 +219,10 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
...
@@ -219,10 +219,10 @@ static void h264_loop_filter_strength_mmx2( int16_t bS[2][4][4], uint8_t nnz[40]
}
}
#define LF_FUNC(DIR, TYPE, OPT) \
#define LF_FUNC(DIR, TYPE, OPT) \
void ff_
x264_
deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \
int alpha, int beta, int8_t *tc0);
int alpha, int beta, int8_t *tc0);
#define LF_IFUNC(DIR, TYPE, OPT) \
#define LF_IFUNC(DIR, TYPE, OPT) \
void ff_
x264_
deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \
void ff_deblock_ ## DIR ## _ ## TYPE ## _ ## OPT (uint8_t *pix, int stride, \
int alpha, int beta);
int alpha, int beta);
LF_FUNC
(
h
,
chroma
,
mmxext
)
LF_FUNC
(
h
,
chroma
,
mmxext
)
...
@@ -234,18 +234,18 @@ LF_FUNC (h, luma, mmxext)
...
@@ -234,18 +234,18 @@ LF_FUNC (h, luma, mmxext)
LF_IFUNC
(
h
,
luma_intra
,
mmxext
)
LF_IFUNC
(
h
,
luma_intra
,
mmxext
)
#if HAVE_YASM && ARCH_X86_32
#if HAVE_YASM && ARCH_X86_32
LF_FUNC
(
v8
,
luma
,
mmxext
)
LF_FUNC
(
v8
,
luma
,
mmxext
)
static
void
ff_
x264_
deblock_v_luma_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
)
static
void
ff_deblock_v_luma_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
,
int8_t
*
tc0
)
{
{
if
((
tc0
[
0
]
&
tc0
[
1
])
>=
0
)
if
((
tc0
[
0
]
&
tc0
[
1
])
>=
0
)
ff_
x264_
deblock_v8_luma_mmxext
(
pix
+
0
,
stride
,
alpha
,
beta
,
tc0
);
ff_deblock_v8_luma_mmxext
(
pix
+
0
,
stride
,
alpha
,
beta
,
tc0
);
if
((
tc0
[
2
]
&
tc0
[
3
])
>=
0
)
if
((
tc0
[
2
]
&
tc0
[
3
])
>=
0
)
ff_
x264_
deblock_v8_luma_mmxext
(
pix
+
8
,
stride
,
alpha
,
beta
,
tc0
+
2
);
ff_deblock_v8_luma_mmxext
(
pix
+
8
,
stride
,
alpha
,
beta
,
tc0
+
2
);
}
}
LF_IFUNC
(
v8
,
luma_intra
,
mmxext
)
LF_IFUNC
(
v8
,
luma_intra
,
mmxext
)
static
void
ff_
x264_
deblock_v_luma_intra_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
)
static
void
ff_deblock_v_luma_intra_mmxext
(
uint8_t
*
pix
,
int
stride
,
int
alpha
,
int
beta
)
{
{
ff_
x264_
deblock_v8_luma_intra_mmxext
(
pix
+
0
,
stride
,
alpha
,
beta
);
ff_deblock_v8_luma_intra_mmxext
(
pix
+
0
,
stride
,
alpha
,
beta
);
ff_
x264_
deblock_v8_luma_intra_mmxext
(
pix
+
8
,
stride
,
alpha
,
beta
);
ff_deblock_v8_luma_intra_mmxext
(
pix
+
8
,
stride
,
alpha
,
beta
);
}
}
#endif
#endif
...
@@ -253,6 +253,10 @@ LF_FUNC (h, luma, sse2)
...
@@ -253,6 +253,10 @@ LF_FUNC (h, luma, sse2)
LF_IFUNC
(
h
,
luma_intra
,
sse2
)
LF_IFUNC
(
h
,
luma_intra
,
sse2
)
LF_FUNC
(
v
,
luma
,
sse2
)
LF_FUNC
(
v
,
luma
,
sse2
)
LF_IFUNC
(
v
,
luma_intra
,
sse2
)
LF_IFUNC
(
v
,
luma_intra
,
sse2
)
LF_FUNC
(
h
,
luma
,
avx
)
LF_IFUNC
(
h
,
luma_intra
,
avx
)
LF_FUNC
(
v
,
luma
,
avx
)
LF_IFUNC
(
v
,
luma_intra
,
avx
)
/***********************************/
/***********************************/
/* weighted prediction */
/* weighted prediction */
...
@@ -314,15 +318,15 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
...
@@ -314,15 +318,15 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c
->
h264_idct_add8
=
ff_h264_idct_add8_mmx2
;
c
->
h264_idct_add8
=
ff_h264_idct_add8_mmx2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_mmx2
;
c
->
h264_idct_add16intra
=
ff_h264_idct_add16intra_mmx2
;
c
->
h264_v_loop_filter_chroma
=
ff_
x264_
deblock_v_chroma_mmxext
;
c
->
h264_v_loop_filter_chroma
=
ff_deblock_v_chroma_mmxext
;
c
->
h264_h_loop_filter_chroma
=
ff_
x264_
deblock_h_chroma_mmxext
;
c
->
h264_h_loop_filter_chroma
=
ff_deblock_h_chroma_mmxext
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_
x264_
deblock_v_chroma_intra_mmxext
;
c
->
h264_v_loop_filter_chroma_intra
=
ff_deblock_v_chroma_intra_mmxext
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_
x264_
deblock_h_chroma_intra_mmxext
;
c
->
h264_h_loop_filter_chroma_intra
=
ff_deblock_h_chroma_intra_mmxext
;
#if ARCH_X86_32
#if ARCH_X86_32
c
->
h264_v_loop_filter_luma
=
ff_
x264_
deblock_v_luma_mmxext
;
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_mmxext
;
c
->
h264_h_loop_filter_luma
=
ff_
x264_
deblock_h_luma_mmxext
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_mmxext
;
c
->
h264_v_loop_filter_luma_intra
=
ff_
x264_
deblock_v_luma_intra_mmxext
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_mmxext
;
c
->
h264_h_loop_filter_luma_intra
=
ff_
x264_
deblock_h_luma_intra_mmxext
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_mmxext
;
#endif
#endif
c
->
weight_h264_pixels_tab
[
0
]
=
ff_h264_weight_16x16_mmx2
;
c
->
weight_h264_pixels_tab
[
0
]
=
ff_h264_weight_16x16_mmx2
;
c
->
weight_h264_pixels_tab
[
1
]
=
ff_h264_weight_16x8_mmx2
;
c
->
weight_h264_pixels_tab
[
1
]
=
ff_h264_weight_16x8_mmx2
;
...
@@ -360,10 +364,10 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
...
@@ -360,10 +364,10 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c
->
biweight_h264_pixels_tab
[
4
]
=
ff_h264_biweight_8x4_sse2
;
c
->
biweight_h264_pixels_tab
[
4
]
=
ff_h264_biweight_8x4_sse2
;
#if HAVE_ALIGNED_STACK
#if HAVE_ALIGNED_STACK
c
->
h264_v_loop_filter_luma
=
ff_
x264_
deblock_v_luma_sse2
;
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_sse2
;
c
->
h264_h_loop_filter_luma
=
ff_
x264_
deblock_h_luma_sse2
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_sse2
;
c
->
h264_v_loop_filter_luma_intra
=
ff_
x264_
deblock_v_luma_intra_sse2
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_sse2
;
c
->
h264_h_loop_filter_luma_intra
=
ff_
x264_
deblock_h_luma_intra_sse2
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_sse2
;
#endif
#endif
c
->
h264_idct_add16
=
ff_h264_idct_add16_sse2
;
c
->
h264_idct_add16
=
ff_h264_idct_add16_sse2
;
...
@@ -377,6 +381,14 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
...
@@ -377,6 +381,14 @@ void ff_h264dsp_init_x86(H264DSPContext *c, const int bit_depth)
c
->
biweight_h264_pixels_tab
[
3
]
=
ff_h264_biweight_8x8_ssse3
;
c
->
biweight_h264_pixels_tab
[
3
]
=
ff_h264_biweight_8x8_ssse3
;
c
->
biweight_h264_pixels_tab
[
4
]
=
ff_h264_biweight_8x4_ssse3
;
c
->
biweight_h264_pixels_tab
[
4
]
=
ff_h264_biweight_8x4_ssse3
;
}
}
if
(
mm_flags
&
AV_CPU_FLAG_AVX
)
{
#if HAVE_ALIGNED_STACK
c
->
h264_v_loop_filter_luma
=
ff_deblock_v_luma_avx
;
c
->
h264_h_loop_filter_luma
=
ff_deblock_h_luma_avx
;
c
->
h264_v_loop_filter_luma_intra
=
ff_deblock_v_luma_intra_avx
;
c
->
h264_h_loop_filter_luma_intra
=
ff_deblock_h_luma_intra_avx
;
#endif
}
}
}
}
}
#endif
#endif
...
...
libavcodec/x86/x86util.asm
View file @
8ad77b65
...
@@ -24,16 +24,20 @@
...
@@ -24,16 +24,20 @@
;******************************************************************************
;******************************************************************************
%macro
SBUTTERFLY
4
%macro
SBUTTERFLY
4
%if
avx_enabled
==
0
mova
m%4
,
m%2
mova
m%4
,
m%2
punpckl%1
m%2
,
m%3
punpckl%1
m%2
,
m%3
punpckh%1
m%4
,
m%3
punpckh%1
m%4
,
m%3
%else
punpckh%1
m%4
,
m%2
,
m%3
punpckl%1
m%2
,
m%3
%endif
SWAP
%3
,
%4
SWAP
%3
,
%4
%endmacro
%endmacro
%macro
SBUTTERFLY2
4
%macro
SBUTTERFLY2
4
mova
m%4
,
m%2
punpckl%1
m%4
,
m%2
,
m%3
punpckh%1
m%2
,
m%3
punpckh%1
m%2
,
m%2
,
m%3
punpckl%1
m%4
,
m%3
SWAP
%2
,
%4
,
%3
SWAP
%2
,
%4
,
%3
%endmacro
%endmacro
...
@@ -444,3 +448,12 @@
...
@@ -444,3 +448,12 @@
%macro
PMINUB_MMXEXT
3
; dst, src, ignored
%macro
PMINUB_MMXEXT
3
; dst, src, ignored
pminub
%1
,
%2
pminub
%1
,
%2
%endmacro
%endmacro
%macro
SPLATW
2
-
3
0
%if
mmsize
==
16
pshuflw
%1
,
%2
,
(
%3
)
*
0x55
punpcklqdq
%1
,
%1
%else
pshufw
%1
,
%2
,
(
%3
)
*
0x55
%endif
%endmacro
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment