Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
57b5b84e
Commit
57b5b84e
authored
Mar 26, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: dsputil: Move ff_apply_window_int16_* bits to ac3dsp, where they belong
parent
c2c5be57
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
131 additions
and
130 deletions
+131
-130
ac3dsp.asm
libavcodec/x86/ac3dsp.asm
+131
-0
dsputil.asm
libavcodec/x86/dsputil.asm
+0
-130
No files found.
libavcodec/x86/ac3dsp.asm
View file @
57b5b84e
...
...
@@ -35,6 +35,10 @@ pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
pd_1
:
times
4
dd
1
pd_151
:
times
4
dd
151
; used in ff_apply_window_int16()
pb_revwords
:
SHUFFLE_MASK_W
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
pd_16384
:
times
4
dd
16384
SECTION
.
text
;-----------------------------------------------------------------------------
...
...
@@ -419,3 +423,130 @@ AC3_EXTRACT_EXPONENTS
INIT_XMM
ssse3
AC3_EXTRACT_EXPONENTS
%endif
;-----------------------------------------------------------------------------
; void ff_apply_window_int16(int16_t *output, const int16_t *input,
; const int16_t *window, unsigned int len)
;-----------------------------------------------------------------------------
%macro
REVERSE_WORDS
1
-
2
%if
cpuflag
(
ssse3
)
&&
notcpuflag
(
atom
)
pshufb
%1
,
%2
%elif
cpuflag
(
sse2
)
pshuflw
%1
,
%1
,
0x1B
pshufhw
%1
,
%1
,
0x1B
pshufd
%1
,
%1
,
0x4E
%elif
cpuflag
(
mmxext
)
pshufw
%1
,
%1
,
0x1B
%endif
%endmacro
%macro
MUL16FIXED
3
%if
cpuflag
(
ssse3
)
; dst, src, unused
; dst = ((dst * src) + (1<<14)) >> 15
pmulhrsw
%1
,
%2
%elif
cpuflag
(
mmxext
)
; dst, src, temp
; dst = (dst * src) >> 15
; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
; in from the pmullw result.
mova
%3
,
%1
pmulhw
%1
,
%2
pmullw
%3
,
%2
psrlw
%3
,
15
psllw
%1
,
1
por
%1
,
%3
%endif
%endmacro
%macro
APPLY_WINDOW_INT16
1
; %1 bitexact version
%if
%1
cglobal
apply_window_int16
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
%else
cglobal
apply_window_int16_round
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
%endif
lea
offset2q
,
[
offsetq
-
mmsize
]
%if
cpuflag
(
ssse3
)
&&
notcpuflag
(
atom
)
mova
m5
,
[
pb_revwords
]
ALIGN
16
%elif
%1
mova
m5
,
[
pd_16384
]
%endif
.
loop
:
%if
cpuflag
(
ssse3
)
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The ssse3 version is bit-identical.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
pmulhrsw
m1
,
m0
REVERSE_WORDS
m0
,
m5
pmulhrsw
m0
,
[
inputq
+
offsetq
]
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m0
%elif
%1
; This version expands 16-bit to 32-bit, multiplies by the window,
; adds 16384 for rounding, right shifts 15, then repacks back to words to
; save to the output. The window is reversed for the second half.
mova
m3
,
[
windowq
+
offset2q
]
mova
m4
,
[
inputq
+
offset2q
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offset2q
]
,
m0
REVERSE_WORDS
m3
mova
m4
,
[
inputq
+
offsetq
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offsetq
]
,
m0
%else
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The mmxext and sse2 versions do not use rounding, and
; therefore are not bit-identical to the C version.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
mova
m2
,
[
inputq
+
offsetq
]
MUL16FIXED
m1
,
m0
,
m3
REVERSE_WORDS
m0
MUL16FIXED
m2
,
m0
,
m3
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m2
%endif
add
offsetd
,
mmsize
sub
offset2d
,
mmsize
jae
.
loop
REP_RET
%endmacro
INIT_MMX
mmxext
APPLY_WINDOW_INT16
0
INIT_XMM
sse2
APPLY_WINDOW_INT16
0
INIT_MMX
mmxext
APPLY_WINDOW_INT16
1
INIT_XMM
sse2
APPLY_WINDOW_INT16
1
INIT_XMM
ssse3
APPLY_WINDOW_INT16
1
INIT_XMM
ssse3
,
atom
APPLY_WINDOW_INT16
1
libavcodec/x86/dsputil.asm
View file @
57b5b84e
...
...
@@ -27,8 +27,6 @@ pb_zzzzzzzz77777777: times 8 db -1
pb_7
:
times
8
db
7
pb_zzzz3333zzzzbbbb
:
db
-
1
,
-
1
,
-
1
,
-
1
,
3
,
3
,
3
,
3
,
-
1
,
-
1
,
-
1
,
-
1
,
11
,
11
,
11
,
11
pb_zz11zz55zz99zzdd
:
db
-
1
,
-
1
,
1
,
1
,
-
1
,
-
1
,
5
,
5
,
-
1
,
-
1
,
9
,
9
,
-
1
,
-
1
,
13
,
13
pb_revwords
:
SHUFFLE_MASK_W
7
,
6
,
5
,
4
,
3
,
2
,
1
,
0
pd_16384
:
times
4
dd
16384
pb_bswap32
:
db
3
,
2
,
1
,
0
,
7
,
6
,
5
,
4
,
11
,
10
,
9
,
8
,
15
,
14
,
13
,
12
SECTION_TEXT
...
...
@@ -205,134 +203,6 @@ SCALARPRODUCT_LOOP 0
RET
;-----------------------------------------------------------------------------
; void ff_apply_window_int16(int16_t *output, const int16_t *input,
; const int16_t *window, unsigned int len)
;-----------------------------------------------------------------------------
%macro
REVERSE_WORDS
1
-
2
%if
cpuflag
(
ssse3
)
&&
notcpuflag
(
atom
)
pshufb
%1
,
%2
%elif
cpuflag
(
sse2
)
pshuflw
%1
,
%1
,
0x1B
pshufhw
%1
,
%1
,
0x1B
pshufd
%1
,
%1
,
0x4E
%elif
cpuflag
(
mmxext
)
pshufw
%1
,
%1
,
0x1B
%endif
%endmacro
%macro
MUL16FIXED
3
%if
cpuflag
(
ssse3
)
; dst, src, unused
; dst = ((dst * src) + (1<<14)) >> 15
pmulhrsw
%1
,
%2
%elif
cpuflag
(
mmxext
)
; dst, src, temp
; dst = (dst * src) >> 15
; pmulhw cuts off the bottom bit, so we have to lshift by 1 and add it back
; in from the pmullw result.
mova
%3
,
%1
pmulhw
%1
,
%2
pmullw
%3
,
%2
psrlw
%3
,
15
psllw
%1
,
1
por
%1
,
%3
%endif
%endmacro
%macro
APPLY_WINDOW_INT16
1
; %1 bitexact version
%if
%1
cglobal
apply_window_int16
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
%else
cglobal
apply_window_int16_round
,
4
,
5
,
6
,
output
,
input
,
window
,
offset
,
offset2
%endif
lea
offset2q
,
[
offsetq
-
mmsize
]
%if
cpuflag
(
ssse3
)
&&
notcpuflag
(
atom
)
mova
m5
,
[
pb_revwords
]
ALIGN
16
%elif
%1
mova
m5
,
[
pd_16384
]
%endif
.
loop
:
%if
cpuflag
(
ssse3
)
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The ssse3 version is bit-identical.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
pmulhrsw
m1
,
m0
REVERSE_WORDS
m0
,
m5
pmulhrsw
m0
,
[
inputq
+
offsetq
]
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m0
%elif
%1
; This version expands 16-bit to 32-bit, multiplies by the window,
; adds 16384 for rounding, right shifts 15, then repacks back to words to
; save to the output. The window is reversed for the second half.
mova
m3
,
[
windowq
+
offset2q
]
mova
m4
,
[
inputq
+
offset2q
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offset2q
]
,
m0
REVERSE_WORDS
m3
mova
m4
,
[
inputq
+
offsetq
]
pxor
m0
,
m0
punpcklwd
m0
,
m3
punpcklwd
m1
,
m4
pmaddwd
m0
,
m1
paddd
m0
,
m5
psrad
m0
,
15
pxor
m2
,
m2
punpckhwd
m2
,
m3
punpckhwd
m1
,
m4
pmaddwd
m2
,
m1
paddd
m2
,
m5
psrad
m2
,
15
packssdw
m0
,
m2
mova
[
outputq
+
offsetq
]
,
m0
%else
; This version does the 16x16->16 multiplication in-place without expanding
; to 32-bit. The mmxext and sse2 versions do not use rounding, and
; therefore are not bit-identical to the C version.
mova
m0
,
[
windowq
+
offset2q
]
mova
m1
,
[
inputq
+
offset2q
]
mova
m2
,
[
inputq
+
offsetq
]
MUL16FIXED
m1
,
m0
,
m3
REVERSE_WORDS
m0
MUL16FIXED
m2
,
m0
,
m3
mova
[
outputq
+
offset2q
]
,
m1
mova
[
outputq
+
offsetq
]
,
m2
%endif
add
offsetd
,
mmsize
sub
offset2d
,
mmsize
jae
.
loop
REP_RET
%endmacro
INIT_MMX
mmxext
APPLY_WINDOW_INT16
0
INIT_XMM
sse2
APPLY_WINDOW_INT16
0
INIT_MMX
mmxext
APPLY_WINDOW_INT16
1
INIT_XMM
sse2
APPLY_WINDOW_INT16
1
INIT_XMM
ssse3
APPLY_WINDOW_INT16
1
INIT_XMM
ssse3
,
atom
APPLY_WINDOW_INT16
1
; void ff_add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top,
; const uint8_t *diff, int w,
; int *left, int *left_top)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment