Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f99a5ef9
Commit
f99a5ef9
authored
Jun 30, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
ac3dsp: add x86-optimized versions of ac3dsp.extract_exponents().
parent
8b7b2d6a
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
111 additions
and
0 deletions
+111
-0
ac3dsp.asm
libavcodec/x86/ac3dsp.asm
+102
-0
ac3dsp_mmx.c
libavcodec/x86/ac3dsp_mmx.c
+9
-0
No files found.
libavcodec/x86/ac3dsp.asm
View file @
f99a5ef9
...
@@ -32,6 +32,11 @@ cextern ac3_bap_bits
...
@@ -32,6 +32,11 @@ cextern ac3_bap_bits
pw_bap_mul1
:
dw
21846
,
21846
,
0
,
32768
,
21846
,
21846
,
0
,
32768
pw_bap_mul1
:
dw
21846
,
21846
,
0
,
32768
,
21846
,
21846
,
0
,
32768
pw_bap_mul2
:
dw
5
,
7
,
0
,
7
,
5
,
7
,
0
,
7
pw_bap_mul2
:
dw
5
,
7
,
0
,
7
,
5
,
7
,
0
,
7
; used in ff_ac3_extract_exponents()
pd_1
:
times
4
dd
1
pd_151
:
times
4
dd
151
pb_shuf_4dwb
:
db
0
,
4
,
8
,
12
SECTION
.
text
SECTION
.
text
;-----------------------------------------------------------------------------
;-----------------------------------------------------------------------------
...
@@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
...
@@ -346,3 +351,100 @@ cglobal ac3_compute_mantissa_size_sse2, 1,2,4, mant_cnt, sum
movd
eax
,
m0
movd
eax
,
m0
add
eax
,
sumd
add
eax
,
sumd
RET
RET
;------------------------------------------------------------------------------
; void ff_ac3_extract_exponents(uint8_t *exp, int32_t *coef, int nb_coefs)
;------------------------------------------------------------------------------
%macro
PABSD_MMX
2
; src/dst, tmp
pxor
%2
,
%2
pcmpgtd
%2
,
%1
pxor
%1
,
%2
psubd
%1
,
%2
%endmacro
%macro
PABSD_SSSE3
1
-
2
; src/dst, unused
pabsd
%1
,
%1
%endmacro
%ifdef
HAVE_AMD3DNOW
INIT_MMX
cglobal
ac3_extract_exponents_3dnow
,
3
,
3
,
0
,
exp
,
coef
,
len
add
expq
,
lenq
lea
coefq
,
[
coefq
+
4
*
lenq
]
neg
lenq
movq
m3
,
[
pd_1
]
movq
m4
,
[
pd_151
]
.
loop
:
movq
m0
,
[
coefq
+
4
*
lenq
]
movq
m1
,
[
coefq
+
4
*
lenq
+
8
]
PABSD_MMX
m0
,
m2
PABSD_MMX
m1
,
m2
pslld
m0
,
1
por
m0
,
m3
pi2fd
m2
,
m0
psrld
m2
,
23
movq
m0
,
m4
psubd
m0
,
m2
pslld
m1
,
1
por
m1
,
m3
pi2fd
m2
,
m1
psrld
m2
,
23
movq
m1
,
m4
psubd
m1
,
m2
packssdw
m0
,
m0
packuswb
m0
,
m0
packssdw
m1
,
m1
packuswb
m1
,
m1
punpcklwd
m0
,
m1
movd
[
expq
+
lenq
]
,
m0
add
lenq
,
4
jl
.
loop
REP_RET
%endif
%macro
AC3_EXTRACT_EXPONENTS
1
cglobal
ac3_extract_exponents_
%1
,
3
,
3
,
5
,
exp
,
coef
,
len
add
expq
,
lenq
lea
coefq
,
[
coefq
+
4
*
lenq
]
neg
lenq
mova
m2
,
[
pd_1
]
mova
m3
,
[
pd_151
]
%ifidn
%1
,
ssse3
;
movd
m4
,
[
pb_shuf_4dwb
]
%endif
.
loop
:
; move 4 32-bit coefs to xmm0
mova
m0
,
[
coefq
+
4
*
lenq
]
; absolute value
PABSD
m0
,
m1
; convert to float and extract exponents
pslld
m0
,
1
por
m0
,
m2
cvtdq2ps
m1
,
m0
psrld
m1
,
23
mova
m0
,
m3
psubd
m0
,
m1
; move the lowest byte in each of 4 dwords to the low dword
%ifidn
%1
,
ssse3
pshufb
m0
,
m4
%else
packssdw
m0
,
m0
packuswb
m0
,
m0
%endif
movd
[
expq
+
lenq
]
,
m0
add
lenq
,
4
jl
.
loop
REP_RET
%endmacro
%ifdef
HAVE_SSE
INIT_XMM
%define
PABSD
PABSD_MMX
AC3_EXTRACT_EXPONENTS
sse2
%ifdef
HAVE_SSSE3
%define
PABSD
PABSD_SSSE3
AC3_EXTRACT_EXPONENTS
ssse3
%endif
%endif
libavcodec/x86/ac3dsp_mmx.c
View file @
f99a5ef9
...
@@ -44,6 +44,10 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i
...
@@ -44,6 +44,10 @@ extern void ff_float_to_fixed24_sse2 (int32_t *dst, const float *src, unsigned i
extern
int
ff_ac3_compute_mantissa_size_sse2
(
uint16_t
mant_cnt
[
6
][
16
]);
extern
int
ff_ac3_compute_mantissa_size_sse2
(
uint16_t
mant_cnt
[
6
][
16
]);
extern
void
ff_ac3_extract_exponents_3dnow
(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
extern
void
ff_ac3_extract_exponents_sse2
(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
extern
void
ff_ac3_extract_exponents_ssse3
(
uint8_t
*
exp
,
int32_t
*
coef
,
int
nb_coefs
);
av_cold
void
ff_ac3dsp_init_x86
(
AC3DSPContext
*
c
,
int
bit_exact
)
av_cold
void
ff_ac3dsp_init_x86
(
AC3DSPContext
*
c
,
int
bit_exact
)
{
{
int
mm_flags
=
av_get_cpu_flags
();
int
mm_flags
=
av_get_cpu_flags
();
...
@@ -56,6 +60,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
...
@@ -56,6 +60,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c
->
ac3_rshift_int32
=
ff_ac3_rshift_int32_mmx
;
c
->
ac3_rshift_int32
=
ff_ac3_rshift_int32_mmx
;
}
}
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
&&
HAVE_AMD3DNOW
)
{
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
&&
HAVE_AMD3DNOW
)
{
c
->
extract_exponents
=
ff_ac3_extract_exponents_3dnow
;
if
(
!
bit_exact
)
{
if
(
!
bit_exact
)
{
c
->
float_to_fixed24
=
ff_float_to_fixed24_3dnow
;
c
->
float_to_fixed24
=
ff_float_to_fixed24_3dnow
;
}
}
...
@@ -72,6 +77,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
...
@@ -72,6 +77,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_sse2
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_sse2
;
c
->
float_to_fixed24
=
ff_float_to_fixed24_sse2
;
c
->
float_to_fixed24
=
ff_float_to_fixed24_sse2
;
c
->
compute_mantissa_size
=
ff_ac3_compute_mantissa_size_sse2
;
c
->
compute_mantissa_size
=
ff_ac3_compute_mantissa_size_sse2
;
c
->
extract_exponents
=
ff_ac3_extract_exponents_sse2
;
if
(
!
(
mm_flags
&
AV_CPU_FLAG_SSE2SLOW
))
{
if
(
!
(
mm_flags
&
AV_CPU_FLAG_SSE2SLOW
))
{
c
->
ac3_lshift_int16
=
ff_ac3_lshift_int16_sse2
;
c
->
ac3_lshift_int16
=
ff_ac3_lshift_int16_sse2
;
c
->
ac3_rshift_int32
=
ff_ac3_rshift_int32_sse2
;
c
->
ac3_rshift_int32
=
ff_ac3_rshift_int32_sse2
;
...
@@ -79,6 +85,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
...
@@ -79,6 +85,9 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
}
}
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
&&
HAVE_SSSE3
)
{
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
&&
HAVE_SSSE3
)
{
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_ssse3
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_ssse3
;
if
(
!
(
mm_flags
&
AV_CPU_FLAG_ATOM
))
{
c
->
extract_exponents
=
ff_ac3_extract_exponents_ssse3
;
}
}
}
#endif
#endif
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment