Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
15ce1601
Commit
15ce1601
authored
Mar 10, 2015
by
Christophe Gisquet
Committed by
Michael Niedermayer
Mar 14, 2015
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: xvid_idct: SSE2 merged add version
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
decd5193
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
91 additions
and
10 deletions
+91
-10
xvididct.asm
libavcodec/x86/xvididct.asm
+89
-3
xvididct_init.c
libavcodec/x86/xvididct_init.c
+2
-7
No files found.
libavcodec/x86/xvididct.asm
View file @
15ce1601
...
...
@@ -384,6 +384,12 @@ SECTION .text
; Must now load args as gprs are no longer used for masks
; DEST is set to where address of dest was loaded
%
if
ARCH_X86_32
%
if
%2
==
2
; Not enough xmms, store
movdqa
[
%1
+
1
*
16
]
,
TAN3
movdqa
[
%1
+
2
*
16
]
,
xmm3
movdqa
[
%1
+
5
*
16
]
,
REG0
movdqa
[
%1
+
6
*
16
]
,
xmm5
%
endif
%
xdefine
DEST
r2q
; BLOCK is r0, stride r1
movifnidn
DEST
,
destm
movifnidn
strideq
,
stridem
...
...
@@ -397,8 +403,6 @@ SECTION .text
movq
[
DEST
+
strideq
]
,
TAN3
movhps
[
DEST
+
2
*
strideq
]
,
TAN3
; REG0 and TAN3 are now available (and likely used in second half)
%
else
%
warning
Unimplemented
%
endif
%endif
%endmacro
...
...
@@ -427,7 +431,88 @@ SECTION .text
movq
[
DEST
+
2
*
strideq
]
,
xmm5
movhps
[
DEST
+
strideq
]
,
xmm5
%elif
%2
==
2
%warning
Unimplemented
pxor
xmm0
,
xmm0
%
if
ARCH_X86_32
; free: m3 REG0=m4 m5
; input: m1, m7, m2, m6
movq
xmm3
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
punpcklbw
xmm3
,
xmm0
punpcklbw
xmm4
,
xmm0
paddsw
xmm3
,
%3
paddsw
xmm4
,
[
%1
+
1
*
16
]
movq
%3
,
[
DEST
+
2
*
strideq
]
movq
xmm5
,
[
DEST
+
r3q
]
punpcklbw
%3
,
xmm0
punpcklbw
xmm5
,
xmm0
paddsw
%3
,
[
%1
+
2
*
16
]
paddsw
xmm5
,
%5
packuswb
xmm3
,
xmm4
packuswb
%3
,
xmm5
movq
[
DEST
+
0
*
strideq
]
,
xmm3
movhps
[
DEST
+
1
*
strideq
]
,
xmm3
movq
[
DEST
+
2
*
strideq
]
,
%3
movhps
[
DEST
+
r3q
]
,
%3
lea
DEST
,
[
DEST
+
4
*
strideq
]
movq
xmm3
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
%3
,
[
DEST
+
2
*
strideq
]
movq
xmm5
,
[
DEST
+
r3q
]
punpcklbw
xmm3
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
%3
,
xmm0
punpcklbw
xmm5
,
xmm0
paddsw
xmm3
,
%6
paddsw
xmm4
,
[
%1
+
5
*
16
]
paddsw
%3
,
[
%1
+
6
*
16
]
paddsw
xmm5
,
%4
packuswb
xmm3
,
xmm4
packuswb
%3
,
xmm5
movq
[
DEST
+
0
*
strideq
]
,
xmm3
movhps
[
DEST
+
1
*
strideq
]
,
xmm3
movq
[
DEST
+
2
*
strideq
]
,
%3
movhps
[
DEST
+
r3q
]
,
%3
%
else
; l1:TAN3=m13 l2:m3 l5:REG0=m8 l6=m5
; input: m1, m7/SREG2=m9, TAN1=m14, REG4=m10
movq
xmm2
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
xmm12
,
[
DEST
+
2
*
strideq
]
movq
xmm11
,
[
DEST
+
r3q
]
punpcklbw
xmm2
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
xmm12
,
xmm0
punpcklbw
xmm11
,
xmm0
paddsw
xmm2
,
%3
paddsw
xmm4
,
TAN3
paddsw
xmm12
,
xmm3
paddsw
xmm11
,
%5
packuswb
xmm2
,
xmm4
packuswb
xmm12
,
xmm11
movq
[
DEST
+
0
*
strideq
]
,
xmm2
movhps
[
DEST
+
1
*
strideq
]
,
xmm2
movq
[
DEST
+
2
*
strideq
]
,
xmm12
movhps
[
DEST
+
r3q
]
,
xmm12
lea
DEST
,
[
DEST
+
4
*
strideq
]
movq
xmm2
,
[
DEST
+
0
*
strideq
]
movq
xmm4
,
[
DEST
+
1
*
strideq
]
movq
xmm12
,
[
DEST
+
2
*
strideq
]
movq
xmm11
,
[
DEST
+
r3q
]
punpcklbw
xmm2
,
xmm0
punpcklbw
xmm4
,
xmm0
punpcklbw
xmm12
,
xmm0
punpcklbw
xmm11
,
xmm0
paddsw
xmm2
,
%6
paddsw
xmm4
,
REG0
paddsw
xmm12
,
xmm5
paddsw
xmm11
,
%4
packuswb
xmm2
,
xmm4
packuswb
xmm12
,
xmm11
movq
[
DEST
+
0
*
strideq
]
,
xmm2
movhps
[
DEST
+
1
*
strideq
]
,
xmm2
movq
[
DEST
+
2
*
strideq
]
,
xmm12
movhps
[
DEST
+
r3q
]
,
xmm12
%
endif
%endif
%endmacro
...
...
@@ -623,6 +708,7 @@ cglobal xvid_idct_add, 0, NUM_GPRS, 8+7*ARCH_X86_64, dest, stride, block
INIT_XMM
sse2
IDCT_SSE2
0
IDCT_SSE2
1
IDCT_SSE2
2
%if
ARCH_X86_32
...
...
libavcodec/x86/xvididct_init.c
View file @
15ce1601
...
...
@@ -27,12 +27,7 @@
#include "xvididct.h"
void
ff_xvid_idct_put_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
static
void
xvid_idct_sse2_add
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
{
ff_xvid_idct_sse2
(
block
);
ff_add_pixels_clamped
(
block
,
dest
,
line_size
);
}
void
ff_xvid_idct_add_sse2
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
);
#if ARCH_X86_32
static
void
xvid_idct_mmx_put
(
uint8_t
*
dest
,
int
line_size
,
short
*
block
)
...
...
@@ -88,7 +83,7 @@ av_cold void ff_xvid_idct_init_x86(IDCTDSPContext *c, AVCodecContext *avctx,
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
idct_put
=
ff_xvid_idct_put_sse2
;
c
->
idct_add
=
xvid_idct_sse2_add
;
c
->
idct_add
=
ff_xvid_idct_add_sse2
;
c
->
idct
=
ff_xvid_idct_sse2
;
c
->
perm_type
=
FF_IDCT_PERM_SSE2
;
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment