Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
f27e1d64
Commit
f27e1d64
authored
Jul 13, 2008
by
Loren Merritt
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
simplify vorbis windowing
Originally committed as revision 14205 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
6647ab80
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
201 additions
and
81 deletions
+201
-81
dsputil.c
libavcodec/dsputil.c
+33
-8
dsputil.h
libavcodec/dsputil.h
+5
-0
dsputil_mmx.c
libavcodec/i386/dsputil_mmx.c
+120
-1
vorbis_dec.c
libavcodec/vorbis_dec.c
+39
-72
x86_cpu.h
libavutil/x86_cpu.h
+4
-0
No files found.
libavcodec/dsputil.c
View file @
f27e1d64
...
@@ -3930,17 +3930,40 @@ void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
...
@@ -3930,17 +3930,40 @@ void ff_vector_fmul_add_add_c(float *dst, const float *src0, const float *src1,
dst
[
i
*
step
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
]
+
src3
;
dst
[
i
*
step
]
=
src0
[
i
]
*
src1
[
i
]
+
src2
[
i
]
+
src3
;
}
}
void
ff_
float_to_int16_c
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
void
ff_
vector_fmul_window_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
float
add_bias
,
int
len
){
int
i
;
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
int_fast32_t
tmp
=
((
const
int32_t
*
)
src
)[
i
];
dst
[
i
]
=
src0
[
i
]
*
win
[
len
-
i
-
1
]
+
src1
[
i
]
*
win
[
i
]
+
add_bias
;
}
static
av_always_inline
int
float_to_int16_one
(
const
float
*
src
){
int_fast32_t
tmp
=
*
(
const
int32_t
*
)
src
;
if
(
tmp
&
0xf0000
){
if
(
tmp
&
0xf0000
){
tmp
=
(
0x43c0ffff
-
tmp
)
>>
31
;
tmp
=
(
0x43c0ffff
-
tmp
)
>>
31
;
// is this faster on some gcc/cpu combinations?
// is this faster on some gcc/cpu combinations?
// if(tmp > 0x43c0ffff) tmp = 0xFFFF;
// if(tmp > 0x43c0ffff) tmp = 0xFFFF;
// else tmp = 0;
// else tmp = 0;
}
}
dst
[
i
]
=
tmp
-
0x8000
;
return
tmp
-
0x8000
;
}
void
ff_float_to_int16_c
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
float_to_int16_one
(
src
+
i
);
}
void
ff_float_to_int16_interleave_c
(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
int
channels
){
int
i
,
j
,
c
;
if
(
channels
==
2
){
for
(
i
=
0
;
i
<
len
;
i
++
){
dst
[
2
*
i
]
=
float_to_int16_one
(
src
+
i
);
dst
[
2
*
i
+
1
]
=
float_to_int16_one
(
src
+
i
+
len
);
}
}
else
{
for
(
c
=
0
;
c
<
channels
;
c
++
,
src
+=
len
)
for
(
i
=
0
,
j
=
c
;
i
<
len
;
i
++
,
j
+=
channels
)
dst
[
j
]
=
float_to_int16_one
(
src
+
i
);
}
}
}
}
...
@@ -4450,7 +4473,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
...
@@ -4450,7 +4473,9 @@ void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
vector_fmul
=
vector_fmul_c
;
c
->
vector_fmul
=
vector_fmul_c
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_c
;
c
->
vector_fmul_add_add
=
ff_vector_fmul_add_add_c
;
c
->
vector_fmul_add_add
=
ff_vector_fmul_add_add_c
;
c
->
vector_fmul_window
=
ff_vector_fmul_window_c
;
c
->
float_to_int16
=
ff_float_to_int16_c
;
c
->
float_to_int16
=
ff_float_to_int16_c
;
c
->
float_to_int16_interleave
=
ff_float_to_int16_interleave_c
;
c
->
add_int16
=
add_int16_c
;
c
->
add_int16
=
add_int16_c
;
c
->
sub_int16
=
sub_int16_c
;
c
->
sub_int16
=
sub_int16_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
c
->
scalarproduct_int16
=
scalarproduct_int16_c
;
...
...
libavcodec/dsputil.h
View file @
f27e1d64
...
@@ -63,6 +63,8 @@ void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
...
@@ -63,6 +63,8 @@ void ff_h264_lowres_idct_put_c(uint8_t *dst, int stride, DCTELEM *block);
void
ff_vector_fmul_add_add_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
void
ff_vector_fmul_add_add_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
src3
,
int
blocksize
,
int
step
);
const
float
*
src2
,
int
src3
,
int
blocksize
,
int
step
);
void
ff_vector_fmul_window_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
float
add_bias
,
int
len
);
void
ff_float_to_int16_c
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
ff_float_to_int16_c
(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
/* encoding scans */
/* encoding scans */
...
@@ -364,10 +366,13 @@ typedef struct DSPContext {
...
@@ -364,10 +366,13 @@ typedef struct DSPContext {
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
(
*
vector_fmul_reverse
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
/* assume len is a multiple of 8, and src arrays are 16-byte aligned */
void
(
*
vector_fmul_add_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
src3
,
int
len
,
int
step
);
void
(
*
vector_fmul_add_add
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
src2
,
int
src3
,
int
len
,
int
step
);
/* assume len is a multiple of 4, and arrays are 16-byte aligned */
void
(
*
vector_fmul_window
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
float
add_bias
,
int
len
);
/* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
/* C version: convert floats from the range [384.0,386.0] to ints in [-32768,32767]
* simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
* simd versions: convert floats from [-32768.0,32767.0] without rescaling and arrays are 16byte aligned */
void
(
*
float_to_int16
)(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
(
*
float_to_int16
)(
int16_t
*
dst
,
const
float
*
src
,
long
len
);
void
(
*
float_to_int16_interleave
)(
int16_t
*
dst
,
const
float
*
src
,
long
len
,
int
channels
);
/* (I)DCT */
/* (I)DCT */
void
(
*
fdct
)(
DCTELEM
*
block
/* align 16*/
);
void
(
*
fdct
)(
DCTELEM
*
block
/* align 16*/
);
...
...
libavcodec/i386/dsputil_mmx.c
View file @
f27e1d64
...
@@ -2022,6 +2022,39 @@ static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *
...
@@ -2022,6 +2022,39 @@ static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *
ff_vector_fmul_add_add_c
(
dst
,
src0
,
src1
,
src2
,
src3
,
len
,
step
);
ff_vector_fmul_add_add_c
(
dst
,
src0
,
src1
,
src2
,
src3
,
len
,
step
);
}
}
static
void
vector_fmul_window_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
const
float
*
win
,
float
add_bias
,
int
len
){
#ifdef HAVE_6REGS
if
(
add_bias
==
0
){
x86_reg
i
=
-
len
*
2
;
x86_reg
j
=
len
*
2
-
16
;
asm
volatile
(
"1:
\n
"
"movaps (%5,%0), %%xmm0
\n
"
"movaps (%5,%1), %%xmm1
\n
"
"movaps %%xmm0, %%xmm2
\n
"
"movaps %%xmm1, %%xmm3
\n
"
"shufps $0x1b, %%xmm2, %%xmm2
\n
"
"shufps $0x1b, %%xmm3, %%xmm3
\n
"
"mulps (%4,%0), %%xmm0
\n
"
"mulps (%4,%1), %%xmm1
\n
"
"mulps (%3,%0), %%xmm3
\n
"
"mulps (%3,%1), %%xmm2
\n
"
"addps %%xmm3, %%xmm0
\n
"
"addps %%xmm2, %%xmm1
\n
"
"movaps %%xmm0, (%2,%0)
\n
"
"movaps %%xmm1, (%2,%1)
\n
"
"sub $16, %1
\n
"
"add $16, %0
\n
"
"jl 1b
\n
"
:
"+r"
(
i
),
"+r"
(
j
)
:
"r"
(
dst
+
len
/
2
),
"r"
(
src0
+
len
/
2
),
"r"
(
src1
+
len
/
2
),
"r"
(
win
+
len
/
2
)
);
}
else
#endif
ff_vector_fmul_window_c
(
dst
,
src0
,
src1
,
win
,
add_bias
,
len
);
}
static
void
float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
static
void
float_to_int16_3dnow
(
int16_t
*
dst
,
const
float
*
src
,
long
len
){
// not bit-exact: pf2id uses different rounding than C and SSE
// not bit-exact: pf2id uses different rounding than C and SSE
asm
volatile
(
asm
volatile
(
...
@@ -2083,6 +2116,87 @@ static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
...
@@ -2083,6 +2116,87 @@ static void float_to_int16_sse2(int16_t *dst, const float *src, long len){
);
);
}
}
#define FLOAT_TO_INT16_INTERLEAVE(cpu, body) \
/* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/
\
static av_noinline void float_to_int16_interleave2_##cpu(int16_t *dst, const float *src, long len, int channels){\
DECLARE_ALIGNED_16(int16_t, tmp[len*channels]);\
int i,j,c;\
float_to_int16_##cpu(tmp, src, len*channels);\
for(c=0; c<channels; c++){\
int16_t *ptmp = tmp+c*len;\
for(i=0, j=c; i<len; i++, j+=channels)\
dst[j] = ptmp[i];\
}\
}\
\
static void float_to_int16_interleave_##cpu(int16_t *dst, const float *src, long len, int channels){\
if(channels==1)\
float_to_int16_##cpu(dst, src, len);\
else if(channels>2)\
float_to_int16_interleave2_##cpu(dst, src, len, channels);\
else{\
float *src1;\
asm volatile(\
"shl $2, %0 \n"\
"add %0, %1 \n"\
"add %0, %2 \n"\
"lea (%2,%0), %3 \n"\
"neg %0 \n"\
body\
:"+r"(len), "+r"(dst), "+r"(src), "=r"(src1)\
);\
}\
}
FLOAT_TO_INT16_INTERLEAVE
(
3
dnow
,
"1:
\n
"
"pf2id (%2,%0), %%mm0
\n
"
"pf2id 8(%2,%0), %%mm1
\n
"
"pf2id (%3,%0), %%mm2
\n
"
"pf2id 8(%3,%0), %%mm3
\n
"
"packssdw %%mm1, %%mm0
\n
"
"packssdw %%mm3, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklwd %%mm2, %%mm0
\n
"
"punpckhwd %%mm2, %%mm1
\n
"
"movq %%mm0, (%1,%0)
\n
"
"movq %%mm0, 8(%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
"femms
\n
"
)
FLOAT_TO_INT16_INTERLEAVE
(
sse
,
"1:
\n
"
"cvtps2pi (%2,%0), %%mm0
\n
"
"cvtps2pi 8(%2,%0), %%mm1
\n
"
"cvtps2pi (%3,%0), %%mm2
\n
"
"cvtps2pi 8(%3,%0), %%mm3
\n
"
"packssdw %%mm1, %%mm0
\n
"
"packssdw %%mm3, %%mm2
\n
"
"movq %%mm0, %%mm1
\n
"
"punpcklwd %%mm2, %%mm0
\n
"
"punpckhwd %%mm2, %%mm1
\n
"
"movq %%mm0, (%1,%0)
\n
"
"movq %%mm0, 8(%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
"emms
\n
"
)
FLOAT_TO_INT16_INTERLEAVE
(
sse2
,
"1:
\n
"
"cvtps2dq (%2,%0), %%xmm0
\n
"
"cvtps2dq (%3,%0), %%xmm1
\n
"
"packssdw %%xmm1, %%xmm0
\n
"
"movhlps %%xmm0, %%xmm1
\n
"
"punpcklwd %%xmm1, %%xmm0
\n
"
"movdqa %%xmm0, (%1,%0)
\n
"
"add $16, %0
\n
"
"js 1b
\n
"
)
extern
void
ff_snow_horizontal_compose97i_sse2
(
IDWTELEM
*
b
,
int
width
);
extern
void
ff_snow_horizontal_compose97i_sse2
(
IDWTELEM
*
b
,
int
width
);
extern
void
ff_snow_horizontal_compose97i_mmx
(
IDWTELEM
*
b
,
int
width
);
extern
void
ff_snow_horizontal_compose97i_mmx
(
IDWTELEM
*
b
,
int
width
);
extern
void
ff_snow_vertical_compose97i_sse2
(
IDWTELEM
*
b0
,
IDWTELEM
*
b1
,
IDWTELEM
*
b2
,
IDWTELEM
*
b3
,
IDWTELEM
*
b4
,
IDWTELEM
*
b5
,
int
width
);
extern
void
ff_snow_vertical_compose97i_sse2
(
IDWTELEM
*
b0
,
IDWTELEM
*
b1
,
IDWTELEM
*
b2
,
IDWTELEM
*
b3
,
IDWTELEM
*
b4
,
IDWTELEM
*
b5
,
int
width
);
...
@@ -2519,8 +2633,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -2519,8 +2633,10 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
if
(
mm_flags
&
MM_3DNOW
){
if
(
mm_flags
&
MM_3DNOW
){
c
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_3dnow
;
c
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_3dnow
;
c
->
vector_fmul
=
vector_fmul_3dnow
;
c
->
vector_fmul
=
vector_fmul_3dnow
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
float_to_int16
=
float_to_int16_3dnow
;
c
->
float_to_int16
=
float_to_int16_3dnow
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_3dnow
;
}
}
}
if
(
mm_flags
&
MM_3DNOWEXT
)
if
(
mm_flags
&
MM_3DNOWEXT
)
c
->
vector_fmul_reverse
=
vector_fmul_reverse_3dnow2
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_3dnow2
;
...
@@ -2528,11 +2644,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -2528,11 +2644,14 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_sse
;
c
->
vorbis_inverse_coupling
=
vorbis_inverse_coupling_sse
;
c
->
vector_fmul
=
vector_fmul_sse
;
c
->
vector_fmul
=
vector_fmul_sse
;
c
->
float_to_int16
=
float_to_int16_sse
;
c
->
float_to_int16
=
float_to_int16_sse
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_sse
;
c
->
vector_fmul_reverse
=
vector_fmul_reverse_sse
;
c
->
vector_fmul_add_add
=
vector_fmul_add_add_sse
;
c
->
vector_fmul_add_add
=
vector_fmul_add_add_sse
;
c
->
vector_fmul_window
=
vector_fmul_window_sse
;
}
}
if
(
mm_flags
&
MM_SSE2
){
if
(
mm_flags
&
MM_SSE2
){
c
->
float_to_int16
=
float_to_int16_sse2
;
c
->
float_to_int16
=
float_to_int16_sse2
;
c
->
float_to_int16_interleave
=
float_to_int16_interleave_sse2
;
}
}
if
(
mm_flags
&
MM_3DNOW
)
if
(
mm_flags
&
MM_3DNOW
)
c
->
vector_fmul_add_add
=
vector_fmul_add_add_3dnow
;
// faster than sse
c
->
vector_fmul_add_add
=
vector_fmul_add_add_3dnow
;
// faster than sse
...
...
libavcodec/vorbis_dec.c
View file @
f27e1d64
...
@@ -149,10 +149,10 @@ typedef struct vorbis_context_s {
...
@@ -149,10 +149,10 @@ typedef struct vorbis_context_s {
uint_fast8_t
mode_count
;
uint_fast8_t
mode_count
;
vorbis_mode
*
modes
;
vorbis_mode
*
modes
;
uint_fast8_t
mode_number
;
// mode number for the current packet
uint_fast8_t
mode_number
;
// mode number for the current packet
uint_fast8_t
previous_window
;
float
*
channel_residues
;
float
*
channel_residues
;
float
*
channel_floors
;
float
*
channel_floors
;
float
*
saved
;
float
*
saved
;
uint_fast16_t
saved_start
;
float
*
ret
;
float
*
ret
;
float
*
buf
;
float
*
buf
;
float
*
buf_tmp
;
float
*
buf_tmp
;
...
@@ -903,7 +903,7 @@ static int vorbis_parse_id_hdr(vorbis_context *vc){
...
@@ -903,7 +903,7 @@ static int vorbis_parse_id_hdr(vorbis_context *vc){
vc
->
ret
=
av_malloc
((
vc
->
blocksize
[
1
]
/
2
)
*
vc
->
audio_channels
*
sizeof
(
float
));
vc
->
ret
=
av_malloc
((
vc
->
blocksize
[
1
]
/
2
)
*
vc
->
audio_channels
*
sizeof
(
float
));
vc
->
buf
=
av_malloc
(
vc
->
blocksize
[
1
]
*
sizeof
(
float
));
vc
->
buf
=
av_malloc
(
vc
->
blocksize
[
1
]
*
sizeof
(
float
));
vc
->
buf_tmp
=
av_malloc
(
vc
->
blocksize
[
1
]
*
sizeof
(
float
));
vc
->
buf_tmp
=
av_malloc
(
vc
->
blocksize
[
1
]
*
sizeof
(
float
));
vc
->
saved_start
=
0
;
vc
->
previous_window
=
0
;
ff_mdct_init
(
&
vc
->
mdct
[
0
],
bl0
,
1
);
ff_mdct_init
(
&
vc
->
mdct
[
0
],
bl0
,
1
);
ff_mdct_init
(
&
vc
->
mdct
[
1
],
bl1
,
1
);
ff_mdct_init
(
&
vc
->
mdct
[
1
],
bl1
,
1
);
...
@@ -1394,13 +1394,26 @@ void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
...
@@ -1394,13 +1394,26 @@ void vorbis_inverse_coupling(float *mag, float *ang, int blocksize)
}
}
}
}
static
void
copy_normalize
(
float
*
dst
,
float
*
src
,
int
len
,
int
exp_bias
,
float
add_bias
)
{
int
i
;
if
(
exp_bias
)
{
for
(
i
=
0
;
i
<
len
;
i
++
)
((
uint32_t
*
)
dst
)[
i
]
=
((
uint32_t
*
)
src
)[
i
]
+
exp_bias
;
// dst[k]=src[i]*(1<<bias)
}
else
{
for
(
i
=
0
;
i
<
len
;
i
++
)
dst
[
i
]
=
src
[
i
]
+
add_bias
;
}
}
// Decode the audio packet using the functions above
// Decode the audio packet using the functions above
static
int
vorbis_parse_audio_packet
(
vorbis_context
*
vc
)
{
static
int
vorbis_parse_audio_packet
(
vorbis_context
*
vc
)
{
GetBitContext
*
gb
=&
vc
->
gb
;
GetBitContext
*
gb
=&
vc
->
gb
;
uint_fast8_t
previous_window
=
0
,
next_window
=
0
;
uint_fast8_t
previous_window
=
vc
->
previous_window
;
uint_fast8_t
mode_number
;
uint_fast8_t
mode_number
;
uint_fast8_t
blockflag
;
uint_fast16_t
blocksize
;
uint_fast16_t
blocksize
;
int_fast32_t
i
,
j
;
int_fast32_t
i
,
j
;
uint_fast8_t
no_residue
[
vc
->
audio_channels
];
uint_fast8_t
no_residue
[
vc
->
audio_channels
];
...
@@ -1411,7 +1424,6 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
...
@@ -1411,7 +1424,6 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
uint_fast8_t
res_chan
[
vc
->
audio_channels
];
uint_fast8_t
res_chan
[
vc
->
audio_channels
];
uint_fast8_t
res_num
=
0
;
uint_fast8_t
res_num
=
0
;
int_fast16_t
retlen
=
0
;
int_fast16_t
retlen
=
0
;
uint_fast16_t
saved_start
=
0
;
float
fadd_bias
=
vc
->
add_bias
;
float
fadd_bias
=
vc
->
add_bias
;
if
(
get_bits1
(
gb
))
{
if
(
get_bits1
(
gb
))
{
...
@@ -1429,12 +1441,12 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
...
@@ -1429,12 +1441,12 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
AV_DEBUG
(
" Mode number: %d , mapping: %d , blocktype %d
\n
"
,
mode_number
,
vc
->
modes
[
mode_number
].
mapping
,
vc
->
modes
[
mode_number
].
blockflag
);
AV_DEBUG
(
" Mode number: %d , mapping: %d , blocktype %d
\n
"
,
mode_number
,
vc
->
modes
[
mode_number
].
mapping
,
vc
->
modes
[
mode_number
].
blockflag
);
if
(
vc
->
modes
[
mode_number
].
blockflag
)
{
blockflag
=
vc
->
modes
[
mode_number
].
blockflag
;
previous_window
=
get_bits1
(
gb
);
blocksize
=
vc
->
blocksize
[
blockflag
];
next_window
=
get_bits1
(
gb
);
if
(
blockflag
)
{
skip_bits
(
gb
,
2
);
// previous_window, next_window
}
}
blocksize
=
vc
->
blocksize
[
vc
->
modes
[
mode_number
].
blockflag
];
memset
(
ch_res_ptr
,
0
,
sizeof
(
float
)
*
vc
->
audio_channels
*
blocksize
/
2
);
//FIXME can this be removed ?
memset
(
ch_res_ptr
,
0
,
sizeof
(
float
)
*
vc
->
audio_channels
*
blocksize
/
2
);
//FIXME can this be removed ?
memset
(
ch_floor_ptr
,
0
,
sizeof
(
float
)
*
vc
->
audio_channels
*
blocksize
/
2
);
//FIXME can this be removed ?
memset
(
ch_floor_ptr
,
0
,
sizeof
(
float
)
*
vc
->
audio_channels
*
blocksize
/
2
);
//FIXME can this be removed ?
...
@@ -1504,76 +1516,31 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
...
@@ -1504,76 +1516,31 @@ static int vorbis_parse_audio_packet(vorbis_context *vc) {
// MDCT, overlap/add, save data for next overlapping FPMATH
// MDCT, overlap/add, save data for next overlapping FPMATH
retlen
=
(
blocksize
+
vc
->
blocksize
[
previous_window
])
/
4
;
for
(
j
=
0
;
j
<
vc
->
audio_channels
;
++
j
)
{
for
(
j
=
0
;
j
<
vc
->
audio_channels
;
++
j
)
{
uint_fast8_t
step
=
vc
->
audio_channels
;
uint_fast16_t
bs0
=
vc
->
blocksize
[
0
];
uint_fast16_t
k
;
uint_fast16_t
bs1
=
vc
->
blocksize
[
1
];
float
*
saved
=
vc
->
saved
+
j
*
vc
->
blocksize
[
1
]
/
2
;
float
*
saved
=
vc
->
saved
+
j
*
bs1
/
2
;
float
*
ret
=
vc
->
ret
;
float
*
ret
=
vc
->
ret
+
j
*
retlen
;
const
float
*
lwin
=
vc
->
win
[
1
];
const
float
*
swin
=
vc
->
win
[
0
];
float
*
buf
=
vc
->
buf
;
float
*
buf
=
vc
->
buf
;
float
*
buf_tmp
=
vc
->
buf_tmp
;
const
float
*
win
=
vc
->
win
[
blockflag
&
previous_window
];
ch_floor_ptr
=
vc
->
channel_floors
+
j
*
blocksize
/
2
;
saved_start
=
vc
->
saved_start
;
vc
->
mdct
[
0
].
fft
.
imdct_calc
(
&
vc
->
mdct
[
vc
->
modes
[
mode_number
].
blockflag
],
buf
,
ch_floor_ptr
,
buf_tmp
);
vc
->
mdct
[
0
].
fft
.
imdct_calc
(
&
vc
->
mdct
[
blockflag
],
buf
,
vc
->
channel_floors
+
j
*
blocksize
/
2
,
vc
->
buf_tmp
);
//FIXME process channels together, to allow faster simd vector_fmul_add_add?
if
(
blockflag
==
previous_window
)
{
if
(
vc
->
modes
[
mode_number
].
blockflag
)
{
vc
->
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
,
win
,
fadd_bias
,
blocksize
/
2
);
// -- overlap/add
}
else
if
(
blockflag
>
previous_window
)
{
if
(
previous_window
)
{
vc
->
dsp
.
vector_fmul_window
(
ret
,
saved
,
buf
+
(
bs1
-
bs0
)
/
4
,
win
,
fadd_bias
,
bs0
/
2
);
vc
->
dsp
.
vector_fmul_add_add
(
ret
+
j
,
buf
,
lwin
,
saved
,
vc
->
add_bias
,
vc
->
blocksize
[
1
]
/
2
,
step
);
copy_normalize
(
ret
+
bs0
/
2
,
buf
+
(
bs1
+
bs0
)
/
4
,
(
bs1
-
bs0
)
/
4
,
vc
->
exp_bias
,
fadd_bias
);
retlen
=
vc
->
blocksize
[
1
]
/
2
;
}
else
{
}
else
{
int
len
=
(
vc
->
blocksize
[
1
]
-
vc
->
blocksize
[
0
])
/
4
;
copy_normalize
(
ret
,
saved
,
(
bs1
-
bs0
)
/
4
,
vc
->
exp_bias
,
fadd_bias
);
buf
+=
len
;
vc
->
dsp
.
vector_fmul_window
(
ret
+
(
bs1
-
bs0
)
/
4
,
saved
+
(
bs1
-
bs0
)
/
4
,
buf
,
win
,
fadd_bias
,
bs0
/
2
);
vc
->
dsp
.
vector_fmul_add_add
(
ret
+
j
,
buf
,
swin
,
saved
,
vc
->
add_bias
,
vc
->
blocksize
[
0
]
/
2
,
step
);
k
=
vc
->
blocksize
[
0
]
/
2
*
step
+
j
;
buf
+=
vc
->
blocksize
[
0
]
/
2
;
if
(
vc
->
exp_bias
){
for
(
i
=
0
;
i
<
len
;
i
++
,
k
+=
step
)
((
uint32_t
*
)
ret
)[
k
]
=
((
uint32_t
*
)
buf
)[
i
]
+
vc
->
exp_bias
;
// ret[k]=buf[i]*(1<<bias)
}
else
{
for
(
i
=
0
;
i
<
len
;
i
++
,
k
+=
step
)
ret
[
k
]
=
buf
[
i
]
+
fadd_bias
;
}
buf
=
vc
->
buf
;
retlen
=
vc
->
blocksize
[
0
]
/
2
+
len
;
}
// -- save
if
(
next_window
)
{
buf
+=
vc
->
blocksize
[
1
]
/
2
;
vc
->
dsp
.
vector_fmul_reverse
(
saved
,
buf
,
lwin
,
vc
->
blocksize
[
1
]
/
2
);
saved_start
=
0
;
}
else
{
saved_start
=
(
vc
->
blocksize
[
1
]
-
vc
->
blocksize
[
0
])
/
4
;
buf
+=
vc
->
blocksize
[
1
]
/
2
;
for
(
i
=
0
;
i
<
saved_start
;
i
++
)
((
uint32_t
*
)
saved
)[
i
]
=
((
uint32_t
*
)
buf
)[
i
]
+
vc
->
exp_bias
;
vc
->
dsp
.
vector_fmul_reverse
(
saved
+
saved_start
,
buf
+
saved_start
,
swin
,
vc
->
blocksize
[
0
]
/
2
);
}
}
else
{
// --overlap/add
if
(
vc
->
add_bias
)
{
for
(
k
=
j
,
i
=
0
;
i
<
saved_start
;
++
i
,
k
+=
step
)
ret
[
k
]
=
saved
[
i
]
+
fadd_bias
;
}
else
{
for
(
k
=
j
,
i
=
0
;
i
<
saved_start
;
++
i
,
k
+=
step
)
ret
[
k
]
=
saved
[
i
];
}
vc
->
dsp
.
vector_fmul_add_add
(
ret
+
k
,
buf
,
swin
,
saved
+
saved_start
,
vc
->
add_bias
,
vc
->
blocksize
[
0
]
/
2
,
step
);
retlen
=
saved_start
+
vc
->
blocksize
[
0
]
/
2
;
// -- save
buf
+=
vc
->
blocksize
[
0
]
/
2
;
vc
->
dsp
.
vector_fmul_reverse
(
saved
,
buf
,
swin
,
vc
->
blocksize
[
0
]
/
2
);
saved_start
=
0
;
}
}
memcpy
(
saved
,
buf
+
blocksize
/
2
,
blocksize
/
2
*
sizeof
(
float
));
}
}
vc
->
saved_start
=
saved_start
;
return
retlen
*
vc
->
audio_channels
;
vc
->
previous_window
=
blockflag
;
return
retlen
;
}
}
// Return the decoded audio packet through the standard api
// Return the decoded audio packet through the standard api
...
@@ -1610,8 +1577,8 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
...
@@ -1610,8 +1577,8 @@ static int vorbis_decode_frame(AVCodecContext *avccontext,
AV_DEBUG
(
"parsed %d bytes %d bits, returned %d samples (*ch*bits)
\n
"
,
get_bits_count
(
gb
)
/
8
,
get_bits_count
(
gb
)
%
8
,
len
);
AV_DEBUG
(
"parsed %d bytes %d bits, returned %d samples (*ch*bits)
\n
"
,
get_bits_count
(
gb
)
/
8
,
get_bits_count
(
gb
)
%
8
,
len
);
vc
->
dsp
.
float_to_int16
(
data
,
vc
->
ret
,
len
);
vc
->
dsp
.
float_to_int16
_interleave
(
data
,
vc
->
ret
,
len
,
vc
->
audio_channels
);
*
data_size
=
len
*
2
;
*
data_size
=
len
*
2
*
vc
->
audio_channels
;
return
buf_size
;
return
buf_size
;
}
}
...
...
libavutil/x86_cpu.h
View file @
f27e1d64
...
@@ -68,6 +68,10 @@ typedef int32_t x86_reg;
...
@@ -68,6 +68,10 @@ typedef int32_t x86_reg;
# define HAVE_7REGS 1
# define HAVE_7REGS 1
#endif
#endif
#if defined(ARCH_X86_64) || (defined(ARCH_X86_32) && (defined(HAVE_EBX_AVAILABLE) || defined(HAVE_EBP_AVAILABLE)))
# define HAVE_6REGS 1
#endif
#if defined(ARCH_X86_64) && defined(PIC)
#if defined(ARCH_X86_64) && defined(PIC)
# define BROKEN_RELOCATIONS 1
# define BROKEN_RELOCATIONS 1
#endif
#endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment