Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
942e22c6
Commit
942e22c6
authored
Jun 16, 2014
by
plepere
Committed by
Michael Niedermayer
Jun 25, 2014
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
avcodec/x86/hevc: add avx2 dc idct
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
a30f1b15
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
72 additions
and
3 deletions
+72
-3
hevc_idct.asm
libavcodec/x86/hevc_idct.asm
+48
-3
hevcdsp.h
libavcodec/x86/hevcdsp.h
+6
-0
hevcdsp_init.c
libavcodec/x86/hevcdsp_init.c
+18
-0
No files found.
libavcodec/x86/hevc_idct.asm
View file @
942e22c6
...
@@ -20,12 +20,12 @@
...
@@ -20,12 +20,12 @@
; */
; */
%include
"libavutil/x86/x86util.asm"
%include
"libavutil/x86/x86util.asm"
SECTION_RODATA
SECTION_RODATA
32
max_pixels_10
:
times
8
dw
((
1
<<
10
)
-
1
)
max_pixels_10
:
times
16
dw
((
1
<<
10
)
-
1
)
dc_add_10
:
times
4
dd
((
1
<<
14
-
10
)
+
1
)
dc_add_10
:
times
4
dd
((
1
<<
14
-
10
)
+
1
)
SECTION
.
text
SECTION
_TEXT
32
;the idct_dc_add macros and functions were largely inspired by x264 project's code in the h264_idct.asm file
;the idct_dc_add macros and functions were largely inspired by x264 project's code in the h264_idct.asm file
...
@@ -41,6 +41,18 @@ SECTION .text
...
@@ -41,6 +41,18 @@ SECTION .text
packuswb
m1
,
m1
packuswb
m1
,
m1
%endmacro
%endmacro
%macro
DC_ADD_INIT_AVX2
2
add
%1
w
,
((
1
<<
14
-
8
)
+
1
)
sar
%1
w
,
(
15
-
8
)
movd
xm0
,
%1
d
vpbroadcastw
m0
,
xm0
;SPLATW
lea
%1
,
[
%2
*
3
]
pxor
m1
,
m1
psubw
m1
,
m0
packuswb
m0
,
m0
packuswb
m1
,
m1
%endmacro
%macro
DC_ADD_OP
4
%macro
DC_ADD_OP
4
%1
m2
,
[
%2
]
%1
m2
,
[
%2
]
%1
m3
,
[
%2
+
%3
]
%1
m3
,
[
%2
+
%3
]
...
@@ -112,6 +124,19 @@ cglobal hevc_idct16_dc_add_8, 3, 4, 0
...
@@ -112,6 +124,19 @@ cglobal hevc_idct16_dc_add_8, 3, 4, 0
DC_ADD_OP
mova
,
r0
,
r2
,
r3
DC_ADD_OP
mova
,
r0
,
r2
,
r3
RET
RET
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
; void ff_hevc_idct32_dc_add_8_avx2(uint8_t *dst, int16_t *coeffs, ptrdiff_t stride)
cglobal
hevc_idct32_dc_add_8
,
3
,
4
,
6
movsx
r3
,
word
[r1]
DC_ADD_INIT_AVX2
r3
,
r2
DC_ADD_OP
mova
,
r0
,
r2
,
r3
,
%
rep
7
lea
r0
,
[
r0
+
r2
*
4
]
DC_ADD_OP
mova
,
r0
,
r2
,
r3
%endrep
RET
%endif
;HAVE_AVX2_EXTERNAL
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
; void ff_hevc_idct_dc_add_10(pixel *dst, int16_t *block, int stride)
; void ff_hevc_idct_dc_add_10(pixel *dst, int16_t *block, int stride)
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
...
@@ -178,3 +203,23 @@ IDCT8_DC_ADD
...
@@ -178,3 +203,23 @@ IDCT8_DC_ADD
INIT_XMM
avx
INIT_XMM
avx
IDCT8_DC_ADD
IDCT8_DC_ADD
%endif
%endif
%if
HAVE_AVX2_EXTERNAL
INIT_YMM
avx2
cglobal
hevc_idct16_dc_add_10
,
3
,
4
,
7
mov
r1w
,
[r1]
add
r1w
,
((
1
<<
4
)
+
1
)
sar
r1w
,
5
movd
xm0
,
r1d
lea
r1
,
[
r2
*
3
]
vpbroadcastw
m0
,
xm0
;SPLATW
mova
m6
,
[
max_pixels_10
]
IDCT_DC_ADD_OP_10
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
IDCT_DC_ADD_OP_10
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
IDCT_DC_ADD_OP_10
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
IDCT_DC_ADD_OP_10
r0
,
r2
,
r1
RET
%endif
;HAVE_AVX_EXTERNAL
libavcodec/x86/hevcdsp.h
View file @
942e22c6
...
@@ -133,6 +133,8 @@ idct_dc_proto(8, 8,mmxext);
...
@@ -133,6 +133,8 @@ idct_dc_proto(8, 8,mmxext);
idct_dc_proto
(
16
,
8
,
sse2
);
idct_dc_proto
(
16
,
8
,
sse2
);
idct_dc_proto
(
32
,
8
,
sse2
);
idct_dc_proto
(
32
,
8
,
sse2
);
idct_dc_proto
(
32
,
8
,
avx2
);
idct_dc_proto
(
4
,
10
,
mmxext
);
idct_dc_proto
(
4
,
10
,
mmxext
);
idct_dc_proto
(
8
,
10
,
sse2
);
idct_dc_proto
(
8
,
10
,
sse2
);
...
@@ -142,6 +144,10 @@ idct_dc_proto(8, 10, avx);
...
@@ -142,6 +144,10 @@ idct_dc_proto(8, 10, avx);
idct_dc_proto
(
16
,
10
,
avx
);
idct_dc_proto
(
16
,
10
,
avx
);
idct_dc_proto
(
32
,
10
,
avx
);
idct_dc_proto
(
32
,
10
,
avx
);
idct_dc_proto
(
16
,
10
,
avx2
);
idct_dc_proto
(
32
,
10
,
avx2
);
...
...
libavcodec/x86/hevcdsp_init.c
View file @
942e22c6
...
@@ -92,6 +92,17 @@ void ff_hevc_idct32_dc_add_10_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t strid
...
@@ -92,6 +92,17 @@ void ff_hevc_idct32_dc_add_10_avx(uint8_t *dst, int16_t *coeffs, ptrdiff_t strid
}
}
#endif //HAVE_AVX_EXTERNAL
#endif //HAVE_AVX_EXTERNAL
#if HAVE_AVX2_EXTERNAL
void
ff_hevc_idct32_dc_add_10_avx2
(
uint8_t
*
dst
,
int16_t
*
coeffs
,
ptrdiff_t
stride
)
{
ff_hevc_idct16_dc_add_10_avx2
(
dst
,
coeffs
,
stride
);
ff_hevc_idct16_dc_add_10_avx2
(
dst
+
32
,
coeffs
,
stride
);
ff_hevc_idct16_dc_add_10_avx2
(
dst
+
16
*
stride
,
coeffs
,
stride
);
ff_hevc_idct16_dc_add_10_avx2
(
dst
+
16
*
stride
+
32
,
coeffs
,
stride
);
}
#endif //HAVE_AVX2_EXTERNAL
#define mc_rep_func(name, bitd, step, W, opt) \
#define mc_rep_func(name, bitd, step, W, opt) \
void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride, \
void ff_hevc_put_hevc_##name##W##_##bitd##_##opt(int16_t *_dst, ptrdiff_t dststride, \
uint8_t *_src, ptrdiff_t _srcstride, int height, \
uint8_t *_src, ptrdiff_t _srcstride, int height, \
...
@@ -438,6 +449,9 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
...
@@ -438,6 +449,9 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
QPEL_LINKS
(
c
->
put_hevc_qpel
,
1
,
0
,
qpel_v
,
8
,
sse4
);
QPEL_LINKS
(
c
->
put_hevc_qpel
,
1
,
0
,
qpel_v
,
8
,
sse4
);
QPEL_LINKS
(
c
->
put_hevc_qpel
,
1
,
1
,
qpel_hv
,
8
,
sse4
);
QPEL_LINKS
(
c
->
put_hevc_qpel
,
1
,
1
,
qpel_hv
,
8
,
sse4
);
}
}
if
(
EXTERNAL_AVX2
(
mm_flags
))
{
c
->
transform_dc_add
[
3
]
=
ff_hevc_idct32_dc_add_8_avx2
;
}
}
else
if
(
bit_depth
==
10
)
{
}
else
if
(
bit_depth
==
10
)
{
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
transform_dc_add
[
0
]
=
ff_hevc_idct4_dc_add_10_mmxext
;
c
->
transform_dc_add
[
0
]
=
ff_hevc_idct4_dc_add_10_mmxext
;
...
@@ -473,6 +487,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
...
@@ -473,6 +487,10 @@ void ff_hevcdsp_init_x86(HEVCDSPContext *c, const int bit_depth)
c
->
transform_dc_add
[
2
]
=
ff_hevc_idct16_dc_add_10_avx
;
c
->
transform_dc_add
[
2
]
=
ff_hevc_idct16_dc_add_10_avx
;
c
->
transform_dc_add
[
3
]
=
ff_hevc_idct32_dc_add_10_avx
;
c
->
transform_dc_add
[
3
]
=
ff_hevc_idct32_dc_add_10_avx
;
}
}
if
(
EXTERNAL_AVX2
(
mm_flags
))
{
c
->
transform_dc_add
[
2
]
=
ff_hevc_idct16_dc_add_10_avx2
;
c
->
transform_dc_add
[
3
]
=
ff_hevc_idct32_dc_add_10_avx2
;
}
}
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment