Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
9d06037d
Commit
9d06037d
authored
Oct 30, 2011
by
Justin Ruggles
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
twinvq: add SSE/AVX optimized sum/difference stereo interleaving
parent
7b966566
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
101 additions
and
18 deletions
+101
-18
dsputil.c
libavcodec/dsputil.c
+13
-0
dsputil.h
libavcodec/dsputil.h
+17
-0
twinvq.c
libavcodec/twinvq.c
+16
-18
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+7
-0
dsputil_yasm.asm
libavcodec/x86/dsputil_yasm.asm
+48
-0
No files found.
libavcodec/dsputil.c
View file @
9d06037d
...
@@ -2509,6 +2509,18 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2,
...
@@ -2509,6 +2509,18 @@ static void butterflies_float_c(float *restrict v1, float *restrict v2,
}
}
}
}
static
void
butterflies_float_interleave_c
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
)
{
int
i
;
for
(
i
=
0
;
i
<
len
;
i
++
)
{
float
f1
=
src0
[
i
];
float
f2
=
src1
[
i
];
dst
[
2
*
i
]
=
f1
+
f2
;
dst
[
2
*
i
+
1
]
=
f1
-
f2
;
}
}
static
float
scalarproduct_float_c
(
const
float
*
v1
,
const
float
*
v2
,
int
len
)
static
float
scalarproduct_float_c
(
const
float
*
v1
,
const
float
*
v2
,
int
len
)
{
{
float
p
=
0
.
0
;
float
p
=
0
.
0
;
...
@@ -3036,6 +3048,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
...
@@ -3036,6 +3048,7 @@ av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
c
->
vector_clip_int32
=
vector_clip_int32_c
;
c
->
vector_clip_int32
=
vector_clip_int32_c
;
c
->
scalarproduct_float
=
scalarproduct_float_c
;
c
->
scalarproduct_float
=
scalarproduct_float_c
;
c
->
butterflies_float
=
butterflies_float_c
;
c
->
butterflies_float
=
butterflies_float_c
;
c
->
butterflies_float_interleave
=
butterflies_float_interleave_c
;
c
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
c
->
vector_fmul_scalar
=
vector_fmul_scalar_c
;
c
->
vector_fmac_scalar
=
vector_fmac_scalar_c
;
c
->
vector_fmac_scalar
=
vector_fmac_scalar_c
;
...
...
libavcodec/dsputil.h
View file @
9d06037d
...
@@ -453,6 +453,23 @@ typedef struct DSPContext {
...
@@ -453,6 +453,23 @@ typedef struct DSPContext {
*/
*/
void
(
*
butterflies_float
)(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
);
void
(
*
butterflies_float
)(
float
*
restrict
v1
,
float
*
restrict
v2
,
int
len
);
/**
* Calculate the sum and difference of two vectors of floats and interleave
* results into a separate output vector of floats, with each sum
* positioned before the corresponding difference.
*
* @param dst output vector
* constraints: 16-byte aligned
* @param src0 first input vector
* constraints: 32-byte aligned
* @param src1 second input vector
* constraints: 32-byte aligned
* @param len number of elements in the input
* constraints: multiple of 8
*/
void
(
*
butterflies_float_interleave
)(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
/* (I)DCT */
/* (I)DCT */
void
(
*
fdct
)(
DCTELEM
*
block
/* align 16*/
);
void
(
*
fdct
)(
DCTELEM
*
block
/* align 16*/
);
void
(
*
fdct248
)(
DCTELEM
*
block
/* align 16*/
);
void
(
*
fdct248
)(
DCTELEM
*
block
/* align 16*/
);
...
...
libavcodec/twinvq.c
View file @
9d06037d
...
@@ -665,8 +665,9 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
...
@@ -665,8 +665,9 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
float
*
out
)
float
*
out
)
{
{
const
ModeTab
*
mtab
=
tctx
->
mtab
;
const
ModeTab
*
mtab
=
tctx
->
mtab
;
int
size1
,
size2
;
float
*
prev_buf
=
tctx
->
prev_frame
+
tctx
->
last_block_pos
[
0
];
float
*
prev_buf
=
tctx
->
prev_frame
+
tctx
->
last_block_pos
[
0
];
int
i
,
j
;
int
i
;
for
(
i
=
0
;
i
<
tctx
->
avctx
->
channels
;
i
++
)
{
for
(
i
=
0
;
i
<
tctx
->
avctx
->
channels
;
i
++
)
{
imdct_and_window
(
tctx
,
ftype
,
wtype
,
imdct_and_window
(
tctx
,
ftype
,
wtype
,
...
@@ -675,27 +676,24 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
...
@@ -675,27 +676,24 @@ static void imdct_output(TwinContext *tctx, enum FrameType ftype, int wtype,
i
);
i
);
}
}
size2
=
tctx
->
last_block_pos
[
0
];
size1
=
mtab
->
size
-
size2
;
if
(
tctx
->
avctx
->
channels
==
2
)
{
if
(
tctx
->
avctx
->
channels
==
2
)
{
for
(
i
=
0
;
i
<
mtab
->
size
-
tctx
->
last_block_pos
[
0
];
i
++
)
{
tctx
->
dsp
.
butterflies_float_interleave
(
out
,
prev_buf
,
float
f1
=
prev_buf
[
i
];
&
prev_buf
[
2
*
mtab
->
size
],
float
f2
=
prev_buf
[
2
*
mtab
->
size
+
i
];
size1
);
out
[
2
*
i
]
=
f1
+
f2
;
out
[
2
*
i
+
1
]
=
f1
-
f2
;
out
+=
2
*
size1
;
}
for
(
j
=
0
;
i
<
mtab
->
size
;
j
++
,
i
++
)
{
tctx
->
dsp
.
butterflies_float_interleave
(
out
,
tctx
->
curr_frame
,
float
f1
=
tctx
->
curr_frame
[
j
];
&
tctx
->
curr_frame
[
2
*
mtab
->
size
],
float
f2
=
tctx
->
curr_frame
[
2
*
mtab
->
size
+
j
];
size2
);
out
[
2
*
i
]
=
f1
+
f2
;
out
[
2
*
i
+
1
]
=
f1
-
f2
;
}
}
else
{
}
else
{
memcpy
(
out
,
prev_buf
,
memcpy
(
out
,
prev_buf
,
size1
*
sizeof
(
*
out
));
(
mtab
->
size
-
tctx
->
last_block_pos
[
0
])
*
sizeof
(
*
out
));
out
+=
mtab
->
size
-
tctx
->
last_block_pos
[
0
]
;
out
+=
size1
;
memcpy
(
out
,
tctx
->
curr_frame
,
memcpy
(
out
,
tctx
->
curr_frame
,
size2
*
sizeof
(
*
out
));
(
tctx
->
last_block_pos
[
0
])
*
sizeof
(
*
out
));
}
}
}
}
...
...
libavcodec/x86/dsputil_mmx.c
View file @
9d06037d
...
@@ -2424,6 +2424,11 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min
...
@@ -2424,6 +2424,11 @@ void ff_vector_clip_int32_int_sse2(int32_t *dst, const int32_t *src, int32_t min
void
ff_vector_clip_int32_sse4
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
void
ff_vector_clip_int32_sse4
(
int32_t
*
dst
,
const
int32_t
*
src
,
int32_t
min
,
int32_t
max
,
unsigned
int
len
);
int32_t
max
,
unsigned
int
len
);
extern
void
ff_butterflies_float_interleave_sse
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
extern
void
ff_butterflies_float_interleave_avx
(
float
*
dst
,
const
float
*
src0
,
const
float
*
src1
,
int
len
);
void
dsputil_init_mmx
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
void
dsputil_init_mmx
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
{
int
mm_flags
=
av_get_cpu_flags
();
int
mm_flags
=
av_get_cpu_flags
();
...
@@ -2868,6 +2873,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -2868,6 +2873,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
vector_clipf
=
vector_clipf_sse
;
c
->
vector_clipf
=
vector_clipf_sse
;
#if HAVE_YASM
#if HAVE_YASM
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
c
->
scalarproduct_float
=
ff_scalarproduct_float_sse
;
c
->
butterflies_float_interleave
=
ff_butterflies_float_interleave_sse
;
#endif
#endif
}
}
if
(
HAVE_AMD3DNOW
&&
(
mm_flags
&
AV_CPU_FLAG_3DNOW
))
if
(
HAVE_AMD3DNOW
&&
(
mm_flags
&
AV_CPU_FLAG_3DNOW
))
...
@@ -2925,6 +2931,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
...
@@ -2925,6 +2931,7 @@ void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
put_h264_chroma_pixels_tab
[
0
]
=
ff_put_h264_chroma_mc8_10_avx
;
c
->
put_h264_chroma_pixels_tab
[
0
]
=
ff_put_h264_chroma_mc8_10_avx
;
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_10_avx
;
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_10_avx
;
}
}
c
->
butterflies_float_interleave
=
ff_butterflies_float_interleave_avx
;
}
}
#endif
#endif
}
}
...
...
libavcodec/x86/dsputil_yasm.asm
View file @
9d06037d
...
@@ -1129,3 +1129,51 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
...
@@ -1129,3 +1129,51 @@ VECTOR_CLIP_INT32 11, 1, 1, 0
%else
%else
VECTOR_CLIP_INT32
6
,
1
,
0
,
0
VECTOR_CLIP_INT32
6
,
1
,
0
,
0
%endif
%endif
;-----------------------------------------------------------------------------
; void ff_butterflies_float_interleave(float *dst, const float *src0,
; const float *src1, int len);
;-----------------------------------------------------------------------------
%macro
BUTTERFLIES_FLOAT_INTERLEAVE
0
cglobal
butterflies_float_interleave
,
4
,
4
,
3
,
dst
,
src0
,
src1
,
len
%ifdef
ARCH_X86_64
movsxd
lenq
,
lend
%endif
test
lenq
,
lenq
jz
.
end
shl
lenq
,
2
lea
src0q
,
[
src0q
+
lenq
]
lea
src1q
,
[
src1q
+
lenq
]
lea
dstq
,
[
dstq
+
2
*
lenq
]
neg
lenq
.
loop
:
mova
m0
,
[
src0q
+
lenq
]
mova
m1
,
[
src1q
+
lenq
]
subps
m2
,
m0
,
m1
addps
m0
,
m0
,
m1
unpcklps
m1
,
m0
,
m2
unpckhps
m0
,
m0
,
m2
%if
cpuflag
(
avx
)
vextractf128
[
dstq
+
2
*
lenq
]
,
m1
,
0
vextractf128
[
dstq
+
2
*
lenq
+
16
]
,
m0
,
0
vextractf128
[
dstq
+
2
*
lenq
+
32
]
,
m1
,
1
vextractf128
[
dstq
+
2
*
lenq
+
48
]
,
m0
,
1
%else
mova
[
dstq
+
2
*
lenq
]
,
m1
mova
[
dstq
+
2
*
lenq
+
mmsize
]
,
m0
%endif
add
lenq
,
mmsize
jl
.
loop
%if
mmsize
==
32
vzeroupper
RET
%endif
.
end
:
REP_RET
%endmacro
INIT_XMM
sse
BUTTERFLIES_FLOAT_INTERLEAVE
INIT_YMM
avx
BUTTERFLIES_FLOAT_INTERLEAVE
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment