Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
99200bae
Commit
99200bae
authored
Oct 24, 2003
by
Michael Niedermayer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
optimizing
Originally committed as revision 2428 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
1b245cc2
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
60 additions
and
23 deletions
+60
-23
fdct_mmx.c
libavcodec/i386/fdct_mmx.c
+60
-23
No files found.
libavcodec/i386/fdct_mmx.c
View file @
99200bae
...
...
@@ -210,64 +210,101 @@ static always_inline void fdct_col(const int16_t *in, int16_t *out, int offset)
movq_r2m
(
mm3
,
*
(
out
+
offset
+
7
*
8
));
}
static
always_inline
void
fdct_row
(
const
int16_t
*
in
,
int16_t
*
out
,
const
int16_t
*
table
,
int
mmx2
)
static
always_inline
void
fdct_row
_mmx2
(
const
int16_t
*
in
,
int16_t
*
out
,
const
int16_t
*
table
)
{
if
(
mmx2
){
pshufw_m2r
(
*
(
in
+
4
),
mm5
,
0x1B
);
movq_m2r
(
*
(
in
+
0
),
mm0
);
}
else
{
movq_r2r
(
mm0
,
mm1
);
paddsw_r2r
(
mm5
,
mm0
);
psubsw_r2r
(
mm5
,
mm1
);
pshufw_r2r
(
mm1
,
mm5
,
0x88
);
pshufw_r2r
(
mm1
,
mm6
,
0xDD
);
pshufw_r2r
(
mm0
,
mm1
,
0xDD
);
pshufw_r2r
(
mm0
,
mm0
,
0x88
);
movq_m2r
(
*
(
table
+
0
),
mm3
);
movq_m2r
(
*
(
table
+
4
),
mm4
);
movq_m2r
(
*
(
table
+
16
),
mm2
);
movq_m2r
(
*
(
table
+
20
),
mm7
);
pmaddwd_r2r
(
mm0
,
mm3
);
pmaddwd_r2r
(
mm1
,
mm4
);
pmaddwd_r2r
(
mm5
,
mm2
);
pmaddwd_r2r
(
mm6
,
mm7
);
pmaddwd_m2r
(
*
(
table
+
8
),
mm0
);
pmaddwd_m2r
(
*
(
table
+
12
),
mm1
);
pmaddwd_m2r
(
*
(
table
+
24
),
mm5
);
pmaddwd_m2r
(
*
(
table
+
28
),
mm6
);
paddd_r2r
(
mm1
,
mm0
);
paddd_r2r
(
mm6
,
mm5
);
movq_m2r
(
*
fdct_r_row
,
mm7
);
paddd_r2r
(
mm7
,
mm3
);
paddd_r2r
(
mm7
,
mm0
);
paddd_r2r
(
mm7
,
mm2
);
paddd_r2r
(
mm7
,
mm5
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm3
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm2
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm0
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm5
);
packssdw_r2r
(
mm0
,
mm3
);
packssdw_r2r
(
mm5
,
mm2
);
movq_r2r
(
mm3
,
mm6
);
punpcklwd_r2r
(
mm2
,
mm3
);
punpckhwd_r2r
(
mm2
,
mm6
);
movq_r2m
(
mm3
,
*
(
out
+
0
));
movq_r2m
(
mm6
,
*
(
out
+
4
));
}
static
always_inline
void
fdct_row_mmx
(
const
int16_t
*
in
,
int16_t
*
out
,
const
int16_t
*
table
)
{
movd_m2r
(
*
(
in
+
6
),
mm5
);
punpcklwd_m2r
(
*
(
in
+
4
),
mm5
);
movq_r2r
(
mm5
,
mm2
);
psrlq_i2r
(
0x20
,
mm5
);
movq_m2r
(
*
(
in
+
0
),
mm0
);
punpcklwd_r2r
(
mm2
,
mm5
);
}
movq_r2r
(
mm0
,
mm1
);
paddsw_r2r
(
mm5
,
mm0
);
psubsw_r2r
(
mm5
,
mm1
);
movq_r2r
(
mm0
,
mm2
);
punpcklwd_r2r
(
mm1
,
mm0
);
punpckhwd_r2r
(
mm1
,
mm2
);
movq_r2r
(
mm2
,
mm1
);
movq_r2r
(
mm0
,
mm2
);
movq_r2r
(
mm0
,
mm1
);
movq_m2r
(
*
(
table
+
0
),
mm3
);
punpcklwd_r2r
(
mm
1
,
mm0
);
punpcklwd_r2r
(
mm
2
,
mm0
);
movq_r2r
(
mm0
,
mm5
);
punpckldq_r2r
(
mm0
,
mm0
);
movq_m2r
(
*
(
table
+
4
),
mm4
);
punpckhwd_r2r
(
mm
1
,
mm2
);
punpckhwd_r2r
(
mm
2
,
mm1
);
pmaddwd_r2r
(
mm0
,
mm3
);
movq_r2r
(
mm
2
,
mm6
);
movq_m2r
(
*
(
table
+
16
),
mm
1
);
punpckldq_r2r
(
mm
2
,
mm2
);
pmaddwd_r2r
(
mm
2
,
mm4
);
movq_r2r
(
mm
1
,
mm6
);
movq_m2r
(
*
(
table
+
16
),
mm
2
);
punpckldq_r2r
(
mm
1
,
mm1
);
pmaddwd_r2r
(
mm
1
,
mm4
);
punpckhdq_r2r
(
mm5
,
mm5
);
pmaddwd_m2r
(
*
(
table
+
8
),
mm0
);
punpckhdq_r2r
(
mm6
,
mm6
);
movq_m2r
(
*
(
table
+
20
),
mm7
);
pmaddwd_r2r
(
mm5
,
mm
1
);
pmaddwd_r2r
(
mm5
,
mm
2
);
paddd_m2r
(
*
fdct_r_row
,
mm3
);
pmaddwd_r2r
(
mm6
,
mm7
);
pmaddwd_m2r
(
*
(
table
+
12
),
mm
2
);
pmaddwd_m2r
(
*
(
table
+
12
),
mm
1
);
paddd_r2r
(
mm4
,
mm3
);
pmaddwd_m2r
(
*
(
table
+
24
),
mm5
);
pmaddwd_m2r
(
*
(
table
+
28
),
mm6
);
paddd_r2r
(
mm7
,
mm
1
);
paddd_r2r
(
mm7
,
mm
2
);
paddd_m2r
(
*
fdct_r_row
,
mm0
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm3
);
paddd_m2r
(
*
fdct_r_row
,
mm
1
);
paddd_r2r
(
mm
2
,
mm0
);
paddd_m2r
(
*
fdct_r_row
,
mm
2
);
paddd_r2r
(
mm
1
,
mm0
);
paddd_m2r
(
*
fdct_r_row
,
mm5
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm
1
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm
2
);
paddd_r2r
(
mm6
,
mm5
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm0
);
psrad_i2r
(
SHIFT_FRW_ROW
,
mm5
);
packssdw_r2r
(
mm0
,
mm3
);
packssdw_r2r
(
mm5
,
mm
1
);
packssdw_r2r
(
mm5
,
mm
2
);
movq_r2r
(
mm3
,
mm6
);
punpcklwd_r2r
(
mm
1
,
mm3
);
punpckhwd_r2r
(
mm
1
,
mm6
);
punpcklwd_r2r
(
mm
2
,
mm3
);
punpckhwd_r2r
(
mm
2
,
mm6
);
movq_r2m
(
mm3
,
*
(
out
+
0
));
movq_r2m
(
mm6
,
*
(
out
+
4
));
}
...
...
@@ -288,7 +325,7 @@ void ff_fdct_mmx(int16_t *block)
table
=
tab_frw_01234567
;
out
=
block
;
for
(
i
=
8
;
i
>
0
;
i
--
)
{
fdct_row
(
block1
,
out
,
table
,
0
);
fdct_row
_mmx
(
block1
,
out
,
table
);
block1
+=
8
;
table
+=
32
;
out
+=
8
;
...
...
@@ -311,7 +348,7 @@ void ff_fdct_mmx2(int16_t *block)
table
=
tab_frw_01234567
;
out
=
block
;
for
(
i
=
8
;
i
>
0
;
i
--
)
{
fdct_row
(
block1
,
out
,
table
,
1
);
fdct_row
_mmx2
(
block1
,
out
,
table
);
block1
+=
8
;
table
+=
32
;
out
+=
8
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment