Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
e6e98234
Commit
e6e98234
authored
Mar 20, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add apply_window_int16() to DSPContext with x86-optimized versions and use it
in the ac3_fixed encoder.
parent
e971d813
Hide whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
210 additions
and
36 deletions
+210
-36
ac3enc.c
libavcodec/ac3enc.c
+1
-1
ac3enc_fixed.c
libavcodec/ac3enc_fixed.c
+2
-8
ac3enc_float.c
libavcodec/ac3enc_float.c
+2
-2
ac3tab.c
libavcodec/ac3tab.c
+1
-1
dsputil.c
libavcodec/dsputil.c
+14
-0
dsputil.h
libavcodec/dsputil.h
+14
-0
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+38
-2
dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+126
-0
ac3_fixed
tests/ref/acodec/ac3_fixed
+1
-1
rm
tests/ref/lavf/rm
+1
-1
ac3_rm
tests/ref/seek/ac3_rm
+10
-20
No files found.
libavcodec/ac3enc.c
View file @
e6e98234
...
...
@@ -167,7 +167,7 @@ static av_cold int mdct_init(AVCodecContext *avctx, AC3MDCTContext *mdct,
static
void
mdct512
(
AC3MDCTContext
*
mdct
,
CoefType
*
out
,
SampleType
*
in
);
static
void
apply_window
(
DSPContext
*
dsp
,
SampleType
*
output
,
const
SampleType
*
input
,
const
SampleType
*
window
,
int
n
);
const
SampleType
*
window
,
unsigned
int
le
n
);
static
int
normalize_samples
(
AC3EncodeContext
*
s
);
...
...
libavcodec/ac3enc_fixed.c
View file @
e6e98234
...
...
@@ -252,15 +252,9 @@ static void mdct512(AC3MDCTContext *mdct, int32_t *out, int16_t *in)
* Apply KBD window to input samples prior to MDCT.
*/
static
void
apply_window
(
DSPContext
*
dsp
,
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
int
n
)
const
int16_t
*
window
,
unsigned
int
le
n
)
{
int
i
;
int
n2
=
n
>>
1
;
for
(
i
=
0
;
i
<
n2
;
i
++
)
{
output
[
i
]
=
MUL16
(
input
[
i
],
window
[
i
])
>>
15
;
output
[
n
-
i
-
1
]
=
MUL16
(
input
[
n
-
i
-
1
],
window
[
i
])
>>
15
;
}
dsp
->
apply_window_int16
(
output
,
input
,
window
,
len
);
}
...
...
libavcodec/ac3enc_float.c
View file @
e6e98234
...
...
@@ -83,9 +83,9 @@ static void mdct512(AC3MDCTContext *mdct, float *out, float *in)
* Apply KBD window to input samples prior to MDCT.
*/
static
void
apply_window
(
DSPContext
*
dsp
,
float
*
output
,
const
float
*
input
,
const
float
*
window
,
int
n
)
const
float
*
window
,
unsigned
int
le
n
)
{
dsp
->
vector_fmul
(
output
,
input
,
window
,
n
);
dsp
->
vector_fmul
(
output
,
input
,
window
,
le
n
);
}
...
...
libavcodec/ac3tab.c
View file @
e6e98234
...
...
@@ -141,7 +141,7 @@ const uint8_t ff_ac3_rematrix_band_tab[5] = { 13, 25, 37, 61, 253 };
/* AC-3 MDCT window */
/* MDCT window */
const
int16_t
ff_ac3_window
[
AC3_WINDOW_SIZE
/
2
]
=
{
DECLARE_ALIGNED
(
16
,
const
int16_t
,
ff_ac3_window
)
[
AC3_WINDOW_SIZE
/
2
]
=
{
4
,
7
,
12
,
16
,
21
,
28
,
34
,
42
,
51
,
61
,
72
,
84
,
97
,
111
,
127
,
145
,
164
,
184
,
207
,
231
,
257
,
285
,
315
,
347
,
...
...
libavcodec/dsputil.c
View file @
e6e98234
...
...
@@ -3890,6 +3890,19 @@ static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, co
return
res
;
}
static
void
apply_window_int16_c
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
)
{
int
i
;
int
len2
=
len
>>
1
;
for
(
i
=
0
;
i
<
len2
;
i
++
)
{
int16_t
w
=
window
[
i
];
output
[
i
]
=
(
MUL16
(
input
[
i
],
w
)
+
(
1
<<
14
))
>>
15
;
output
[
len
-
i
-
1
]
=
(
MUL16
(
input
[
len
-
i
-
1
],
w
)
+
(
1
<<
14
))
>>
15
;
}
}
#define W0 2048
#define W1 2841
/* 2048*sqrt (2)*cos (1*pi/16) */
#define W2 2676
/* 2048*sqrt (2)*cos (2*pi/16) */
...
...
@@ -4364,6 +4377,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
vector_clipf
=
vector_clipf_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_and_madd_int16
=
scalarproduct_and_madd_int16_c
;
c
->
apply_window_int16
=
apply_window_int16_c
;
c
->
scalarproduct_float
=
scalarproduct_float_c
;
c
->
butterflies_float
=
butterflies_float_c
;
c
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
...
...
libavcodec/dsputil.h
View file @
e6e98234
...
...
@@ -524,6 +524,20 @@ typedef struct DSPContext {
*/
int32_t
(
*
scalarproduct_and_madd_int16
)(
int16_t
*
v1
/*align 16*/
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
len
,
int
mul
);
/**
* Apply symmetric window in 16-bit fixed-point.
* @param output destination array
* constraints: 16-byte aligned
* @param input source array
* constraints: 16-byte aligned
* @param window window array
* constraints: 16-byte aligned, at least len/2 elements
* @param len full window length
* constraints: multiple of ? greater than zero
*/
void
(
*
apply_window_int16
)(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
/* rv30 functions */
qpel_mc_func
put_rv30_tpel_pixels_tab
[
4
][
16
];
qpel_mc_func
avg_rv30_tpel_pixels_tab
[
4
][
16
];
...
...
libavcodec/x86/dsputil_mmx.c
View file @
e6e98234
...
...
@@ -2388,6 +2388,20 @@ int32_t ff_scalarproduct_int16_sse2(const int16_t *v1, const int16_t *v2, int or
int32_t
ff_scalarproduct_and_madd_int16_mmx2
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
int32_t
ff_scalarproduct_and_madd_int16_sse2
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
int32_t
ff_scalarproduct_and_madd_int16_ssse3
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
void
ff_apply_window_int16_mmxext
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_mmxext_ba
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_sse2
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_sse2_ba
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_ssse3
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_apply_window_int16_ssse3_atom
(
int16_t
*
output
,
const
int16_t
*
input
,
const
int16_t
*
window
,
unsigned
int
len
);
void
ff_add_hfyu_median_prediction_mmx2
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
int
w
,
int
*
left
,
int
*
left_top
);
int
ff_add_hfyu_left_prediction_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
w
,
int
left
);
int
ff_add_hfyu_left_prediction_sse4
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
w
,
int
left
);
...
...
@@ -2749,6 +2763,11 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_YASM
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_mmx2
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_mmx2
;
if
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)
{
c
->
apply_window_int16
=
ff_apply_window_int16_mmxext_ba
;
}
else
{
c
->
apply_window_int16
=
ff_apply_window_int16_mmxext
;
}
#endif
}
if
(
mm_flags
&
AV_CPU_FLAG_SSE
){
...
...
@@ -2771,13 +2790,30 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
#if HAVE_YASM
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_sse2
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_sse2
;
if
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)
{
c
->
apply_window_int16
=
ff_apply_window_int16_sse2_ba
;
}
else
{
if
(
!
(
mm_flags
&
AV_CPU_FLAG_SSE2SLOW
))
{
c
->
apply_window_int16
=
ff_apply_window_int16_sse2
;
}
}
c
->
emulated_edge_mc
=
emulated_edge_mc_sse
;
c
->
gmc
=
gmc_sse
;
#endif
}
if
((
mm_flags
&
AV_CPU_FLAG_SSSE3
)
&&
!
(
mm_flags
&
(
AV_CPU_FLAG_SSE42
|
AV_CPU_FLAG_3DNOW
))
&&
HAVE_YASM
)
// cachesplit
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_ssse3
;
if
(
mm_flags
&
AV_CPU_FLAG_SSSE3
)
{
#if HAVE_YASM
if
(
mm_flags
&
AV_CPU_FLAG_ATOM
)
{
c
->
apply_window_int16
=
ff_apply_window_int16_ssse3_atom
;
}
else
{
c
->
apply_window_int16
=
ff_apply_window_int16_ssse3
;
}
if
(
!
(
mm_flags
&
(
AV_CPU_FLAG_SSE42
|
AV_CPU_FLAG_3DNOW
)))
{
// cachesplit
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_ssse3
;
}
#endif
}
}
if
(
CONFIG_ENCODERS
)
...
...
libavcodec/x86/dsputil_yasm.asm
View file @
e6e98234
...
...
@@ -27,6 +27,8 @@ pb_zzzzzzzz77777777: times 8 db -1
pb_7
:
times
8
db
7
pb_zzzz3333zzzzbbbb
:
db
-
1
,
-
1
,
-
1
,
-
1
,
3
,
3
,
3
,
3
,
-
1
,
-
1
,
-
1
,
-
1
,
11
,
11
,
11
,
11
pb_zz11zz55zz99zzdd
:
db
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
,
5
,
5
,
-
1
,
-
1
,
9
,
9
,
-
1
,
-
1
,
13
,
13
pb_revwords
:
db
14
,
15
,
12
,
13
,
10
,
11
,
8
,
9
,
6
,
7
,
4
,
5
,
2
,
3
,
0
,
1
pd_16384
:
times
4
dd
16384
section
.
text
align
=
16
...
...
@@ -202,6 +204,130 @@ SCALARPRODUCT_LOOP 0
RET
;-----------------------------------------------------------------------------
; void ff_apply_window_int16(int16_t *output, const int16_t *input,
; const int16_t *window, unsigned int len)
;-----------------------------------------------------------------------------
%macro
REVERSE_WORDS_MMXEXT
1
-
2
pshufw
%1
,
%1
,
0x1B
%endmacro
%macro
REVERSE_WORDS_SSE2
1
-
2
pshuflw
%1
,
%1
,
0x1B
pshufhw
%1
,
%1
,
0x1B
pshufd
%1
,
%1
,
0x4E
%endmacro
%macro
REVERSE_WORDS_SSSE3
2
pshufb
%1
,
%2
%endmacro
; dst = (dst * src) >> 15
; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
; in from the pmullw result.
%macro
MUL16FIXED_MMXEXT
3
; dst, src, temp
mova
%3
,
%1
pmulhw
%1
,
%2
pmullw
%3
,
%2
psrlw
%3
,
15
psllw
%1
,
1
por
%1
,
%3
%endmacro
; dst = ((dst * src) + (1<<14)) >> 15
%macro
MUL16FIXED_SSSE3
3
; dst, src, unused
pmulhrsw
%1
,
%2
%endmacro
%macro
APPLY_WINDOW_INT16
3
; %1=instruction set, %2=mmxext/sse2 bit exact version, %3=has_ssse3
cglobal
apply_window_int16_
%1
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
lea
offset2q
,
[
offsetq
-
mmsize
]
%if
%2
mova
m5
,
[
pd_16384
]
%elifidn
%1
,
ssse3
mova
m5
,
[
pb_revwords
]
ALIGN
16
%endif
.
loop
:
%if
%2
; This version expands 16-bit to 32-bit, multiplies by the window,
; adds 16384 for rounding, right shifts 15, then repacks back to words to
; save to the output. The window is reversed for the second half.
mova
m3
,
[
windowq
+
offset2q
]
mova
m4
,
[
inputq
+
offset2q
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offset2q
]
,
m0
REVERSE_WORDS
m3
mova
m4
,
[
inputq
+
offsetq
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offsetq
]
,
m0
%elif
%3
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The ssse3 version is bit-identical.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
pmulhrsw
m1
,
m0
REVERSE_WORDS
m0
,
m5
pmulhrsw
m0
,
[
inputq
+
offsetq
]
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m0
%else
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The mmxext and sse2 versions do not use rounding, and
; therefore are not bit-identical to the C version.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
mova
m2
,
[
inputq
+
offsetq
]
MUL16FIXED
m1
,
m0
,
m3
REVERSE_WORDS
m0
MUL16FIXED
m2
,
m0
,
m3
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m2
%endif
add
offsetd
,
mmsize
sub
offset2d
,
mmsize
jae
.
loop
REP_RET
%endmacro
INIT_MMX
%define
REVERSE_WORDS
REVERSE_WORDS_MMXEXT
%define
MUL16FIXED
MUL16FIXED_MMXEXT
APPLY_WINDOW_INT16
mmxext
,
0
,
0
APPLY_WINDOW_INT16
mmxext_ba
,
1
,
0
INIT_XMM
%define
REVERSE_WORDS
REVERSE_WORDS_SSE2
APPLY_WINDOW_INT16
sse2
,
0
,
0
APPLY_WINDOW_INT16
sse2_ba
,
1
,
0
APPLY_WINDOW_INT16
ssse3_atom
,
0
,
1
%define
REVERSE_WORDS
REVERSE_WORDS_SSSE3
APPLY_WINDOW_INT16
ssse3
,
0
,
1
; void add_hfyu_median_prediction_mmx2(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
cglobal
add_hfyu_median_prediction_mmx2
,
6
,
6
,
0
,
dst
,
top
,
diff
,
w
,
left
,
left_top
...
...
tests/ref/acodec/ac3_fixed
View file @
e6e98234
b3a8f0a8809a58b2ece90744f06fff96
*./tests/data/acodec/ac3.rm
346073c97eada69330f61e103a170ca1
*./tests/data/acodec/ac3.rm
98751 ./tests/data/acodec/ac3.rm
tests/ref/lavf/rm
View file @
e6e98234
7
da378131db880bcf2e58305d54418ec
*./tests/data/lavf/lavf.rm
7
b7ede9548a09346675edad36acfbf19
*./tests/data/lavf/lavf.rm
346706 ./tests/data/lavf/lavf.rm
tests/ref/seek/ac3_rm
View file @
e6e98234
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st:-1 flags:0 ts:-1.000000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st:-1 flags:1 ts: 1.894167
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st: 0 flags:0 ts: 0.788000
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916
ret:-1 st:-1 flags:1 ts: 1.894167
ret:-1 st: 0 flags:0 ts: 0.788000
ret: 0 st: 0 flags:1 ts:-0.317000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st:-1 flags:0 ts: 2.576668
ret: 0 st: 0 flags:1 dts:524.800000 pts:524.800000 pos: 6155 size: 244
ret:-1 st:-1 flags:0 ts: 2.576668
ret:-1 st:-1 flags:1 ts: 1.470835
ret: 0 st: 0 flags:0 ts: 0.365000
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916
ret:-1 st: 0 flags:0 ts: 0.365000
ret: 0 st: 0 flags:1 ts:-0.741000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:0 ts: 2.153336
ret: 0 st:-1 flags:1 ts: 1.047503
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:1 ts: 1.047503
ret: 0 st: 0 flags:0 ts:-0.058000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret: 0 st: 0 flags:1 ts: 2.836000
ret: 0 st: 0 flags:1 dts: 2.681000 pts: 2.681000 pos: 44105 size: 558
ret:-1 st: 0 flags:1 ts: 2.836000
ret:-1 st:-1 flags:0 ts: 1.730004
ret: 0 st:-1 flags:1 ts: 0.624171
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:1 ts: 0.624171
ret: 0 st: 0 flags:0 ts:-0.482000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st: 0 flags:1 ts: 2.413000
ret:-1 st:-1 flags:0 ts: 1.306672
ret: 0 st:-1 flags:1 ts: 0.200839
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st:-1 flags:1 ts: 0.200839
ret: 0 st: 0 flags:0 ts:-0.905000
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st: 0 flags:1 ts: 1.989000
ret: 0 st:-1 flags:0 ts: 0.883340
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916
ret:-1 st:-1 flags:0 ts: 0.883340
ret: 0 st:-1 flags:1 ts:-0.222493
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
ret:-1 st: 0 flags:0 ts: 2.672000
ret:-1 st: 0 flags:1 ts: 1.566000
ret: 0 st:-1 flags:0 ts: 0.460008
ret: 0 st: 0 flags:1 dts:12581.487000 pts:12581.487000 pos: 5822 size: 916
ret:-1 st:-1 flags:0 ts: 0.460008
ret: 0 st:-1 flags:1 ts:-0.645825
ret: 0 st: 0 flags:1 dts: 0.000000 pts: 0.000000 pos: 271 size: 556
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment