Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
2dd2f716
Commit
2dd2f716
authored
Jun 29, 2010
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
MMX idct_add for VP8.
Originally committed as revision 23886 to
svn://svn.ffmpeg.org/ffmpeg/trunk
parent
29e71937
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
106 additions
and
0 deletions
+106
-0
vp8dsp-init.c
libavcodec/x86/vp8dsp-init.c
+2
-0
vp8dsp.asm
libavcodec/x86/vp8dsp.asm
+89
-0
x86util.asm
libavcodec/x86/x86util.asm
+15
-0
No files found.
libavcodec/x86/vp8dsp-init.c
View file @
2dd2f716
...
...
@@ -196,6 +196,7 @@ HVBILIN(ssse3, 8, 16, 16)
extern
void
ff_vp8_idct_dc_add_mmx
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
extern
void
ff_vp8_idct_dc_add_sse4
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
extern
void
ff_vp8_luma_dc_wht_mmxext
(
DCTELEM
block
[
4
][
4
][
16
],
DCTELEM
dc
[
16
]);
extern
void
ff_vp8_idct_add_mmx
(
uint8_t
*
dst
,
DCTELEM
block
[
16
],
int
stride
);
#endif
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
...
...
@@ -229,6 +230,7 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
#if HAVE_YASM
if
(
mm_flags
&
FF_MM_MMX
)
{
c
->
vp8_idct_dc_add
=
ff_vp8_idct_dc_add_mmx
;
c
->
vp8_idct_add
=
ff_vp8_idct_add_mmx
;
c
->
put_vp8_epel_pixels_tab
[
0
][
0
][
0
]
=
c
->
put_vp8_bilinear_pixels_tab
[
0
][
0
][
0
]
=
ff_put_vp8_pixels16_mmx
;
c
->
put_vp8_epel_pixels_tab
[
1
][
0
][
0
]
=
...
...
libavcodec/x86/vp8dsp.asm
View file @
2dd2f716
...
...
@@ -142,6 +142,9 @@ filter_h6_shuf1: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12
filter_h6_shuf2
:
db
1
,
2
,
2
,
3
,
3
,
4
,
4
,
5
,
5
,
6
,
6
,
7
,
7
,
8
,
8
,
9
filter_h6_shuf3
:
db
3
,
4
,
4
,
5
,
5
,
6
,
6
,
7
,
7
,
8
,
8
,
9
,
9
,
10
,
10
,
11
pw_20091
:
times
4
dw
20091
pw_17734
:
times
4
dw
17734
cextern
pw_3
cextern
pw_4
cextern
pw_64
...
...
@@ -923,6 +926,92 @@ cglobal vp8_idct_dc_add_sse4, 3, 3, 6
pextrd
[
r1
+
r2
]
,
xmm2
,
3
RET
;-----------------------------------------------------------------------------
; void vp8_idct_add_<opt>(uint8_t *dst, DCTELEM block[16], int stride);
;-----------------------------------------------------------------------------
; calculate %1=%2+%1; %2=%2-%1, with %3=temp register
%macro
SUMSUB
3
mova
%3
,
%1
paddw
%1
,
%2
psubw
%2
,
%3
%endmacro
; calculate %1=mul_35468(%1)-mul_20091(%2); %2=mul_20091(%1)+mul_35468(%2)
; this macro assumes that m6/m7 have words for 20091/17734 loaded
%macro
VP8_MULTIPLY_SUMSUB
4
mova
%3
,
%1
mova
%4
,
%2
pmulhw
%3
,
m6
;20091(1)
pmulhw
%4
,
m6
;20091(2)
paddw
%3
,
%1
paddw
%4
,
%2
psllw
%1
,
1
psllw
%2
,
1
pmulhw
%1
,
m7
;35468(1)
pmulhw
%2
,
m7
;35468(2)
psubw
%1
,
%4
paddw
%2
,
%3
%endmacro
; calculate x0=%1+%3; x1=%1-%3
; x2=mul_35468(%2)-mul_20091(%4); x3=mul_20091(%2)+mul_35468(%4)
; %1=x0+x3 (tmp0); %2=x1+x2 (tmp1); %3=x1-x2 (tmp2); %4=x0-x3 (tmp3)
; %5/%6 are temporary registers
; we assume m6/m7 have constant words 20091/17734 loaded in them
%macro
VP8_IDCT_TRANSFORM4x4_1D
6
SUMSUB_BA
m%3
,
m%1
,
m%5
;t0, t1
VP8_MULTIPLY_SUMSUB
m%2
,
m%4
,
m%5
,
m%6
;t2, t3
SUMSUB_BA
m%4
,
m%3
,
m%5
;tmp0, tmp3
SUMSUB_BA
m%2
,
m%1
,
m%5
;tmp1, tmp2
SWAP
%4
,
%1
SWAP
%4
,
%3
%endmacro
; transpose a 4x4 table
%macro
TRANSPOSE4x4
5
; output in %1/%4/%5/%3
mova
m%5
,
m%1
punpcklwd
m%1
,
m%2
punpckhwd
m%5
,
m%2
mova
m%2
,
m%3
punpcklwd
m%3
,
m%4
punpckhwd
m%2
,
m%4
mova
m%4
,
m%1
punpckldq
m%1
,
m%3
;col0
punpckhdq
m%4
,
m%3
;col1
mova
m%3
,
m%5
punpckldq
m%5
,
m%2
;col2
punpckhdq
m%3
,
m%2
;col3
SWAP
%4
,
%2
SWAP
%4
,
%5
SWAP
%4
,
%3
%endmacro
INIT_MMX
cglobal
vp8_idct_add_mmx
,
3
,
3
; load block data
movq
m0
,
[r1]
movq
m1
,
[
r1
+
8
]
movq
m2
,
[
r1
+
16
]
movq
m3
,
[
r1
+
24
]
movq
m6
,
[
pw_20091
]
movq
m7
,
[
pw_17734
]
; actual IDCT
VP8_IDCT_TRANSFORM4x4_1D
0
,
1
,
2
,
3
,
4
,
5
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
paddw
m0
,
[
pw_4
]
VP8_IDCT_TRANSFORM4x4_1D
0
,
1
,
2
,
3
,
4
,
5
TRANSPOSE4x4W
0
,
1
,
2
,
3
,
4
; store
pxor
m4
,
m4
lea
r1
,
[
r0
+
2
*
r2
]
STORE_DIFFx2
m0
,
m1
,
m6
,
m7
,
m4
,
3
,
r0
,
r2
STORE_DIFFx2
m2
,
m3
,
m6
,
m7
,
m4
,
3
,
r1
,
r2
RET
;-----------------------------------------------------------------------------
; void vp8_luma_dc_wht_mmxext(DCTELEM block[4][4][16], DCTELEM dc[16])
;-----------------------------------------------------------------------------
...
...
libavcodec/x86/x86util.asm
View file @
2dd2f716
...
...
@@ -365,3 +365,18 @@
packuswb
%1
,
%1
movh
%4
,
%1
%endmacro
%macro
STORE_DIFFx2
8
; add1, add2, reg1, reg2, zero, shift, source, stride
movh
%3
,
[
%7
]
movh
%4
,
[
%7
+
%8
]
punpcklbw
%3
,
%5
punpcklbw
%4
,
%5
psraw
%1
,
%6
psraw
%2
,
%6
paddw
%3
,
%1
paddw
%4
,
%2
packuswb
%3
,
%5
packuswb
%4
,
%5
movh
[
%7
]
,
%3
movh
[
%7
+
%8
]
,
%4
%endmacro
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment