Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
5e9c6ef8
Commit
5e9c6ef8
authored
Jul 15, 2012
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: h264_weight_10bit: port to cpuflags
parent
83f9ed42
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
69 additions
and
63 deletions
+69
-63
h264_weight_10bit.asm
libavcodec/x86/h264_weight_10bit.asm
+69
-63
No files found.
libavcodec/x86/h264_weight_10bit.asm
View file @
5e9c6ef8
...
...
@@ -48,7 +48,7 @@ SECTION .text
movifnidn
r5d
,
r5m
%endmacro
%macro
WEIGHT_SETUP
1
%macro
WEIGHT_SETUP
0
mova
m0
,
[
pw_1
]
movd
m2
,
r3m
pslld
m0
,
m2
; 1<<log2_denom
...
...
@@ -59,19 +59,19 @@ SECTION .text
pshufd
m3
,
m3
,
0
mova
m4
,
[
pw_pixel_max
]
paddw
m2
,
[
sq_1
]
; log2_denom+1
%if
nidn
%1
,
sse4
%if
notcpuflag
(
sse4
)
pxor
m7
,
m7
%endif
%endmacro
%macro
WEIGHT_OP
2
-
3
%if
%0
==
2
mova
m5
,
[
r0
+
%
2
]
%macro
WEIGHT_OP
1
-
2
%if
%0
==
1
mova
m5
,
[
r0
+
%
1
]
punpckhwd
m6
,
m5
,
m0
punpcklwd
m5
,
m0
%else
movq
m5
,
[
r0
+
%
2
]
movq
m6
,
[
r0
+
%
3
]
movq
m5
,
[
r0
+
%
1
]
movq
m6
,
[
r0
+
%
2
]
punpcklwd
m5
,
m0
punpcklwd
m6
,
m0
%endif
...
...
@@ -79,7 +79,7 @@ SECTION .text
pmaddwd
m6
,
m3
psrad
m5
,
m2
psrad
m6
,
m2
%if
idn
%1
,
sse4
%if
cpuflag
(
sse4
)
packusdw
m5
,
m6
pminsw
m5
,
m4
%else
...
...
@@ -88,14 +88,14 @@ SECTION .text
%endif
%endmacro
%macro
WEIGHT_FUNC_DBL
1
cglobal
h264_weight_16_10
_
%1
%macro
WEIGHT_FUNC_DBL
0
cglobal
h264_weight_16_10
WEIGHT_PROLOGUE
WEIGHT_SETUP
%1
WEIGHT_SETUP
.
nextrow
:
WEIGHT_OP
%1
,
0
WEIGHT_OP
0
mova
[
r0
]
,
m5
WEIGHT_OP
%1
,
16
WEIGHT_OP
16
mova
[
r0
+
16
]
,
m5
add
r0
,
r1
dec
r2d
...
...
@@ -103,17 +103,18 @@ cglobal h264_weight_16_10_%1
REP_RET
%endmacro
INIT_XMM
WEIGHT_FUNC_DBL
sse2
WEIGHT_FUNC_DBL
sse4
INIT_XMM
sse2
WEIGHT_FUNC_DBL
INIT_XMM
sse4
WEIGHT_FUNC_DBL
%macro
WEIGHT_FUNC_MM
1
cglobal
h264_weight_8_10
_
%1
%macro
WEIGHT_FUNC_MM
0
cglobal
h264_weight_8_10
WEIGHT_PROLOGUE
WEIGHT_SETUP
%1
WEIGHT_SETUP
.
nextrow
:
WEIGHT_OP
%1
,
0
WEIGHT_OP
0
mova
[r0],
m5
add
r0
,
r1
dec
r2d
...
...
@@ -121,19 +122,20 @@ cglobal h264_weight_8_10_%1
REP_RET
%endmacro
INIT_XMM
WEIGHT_FUNC_MM
sse2
WEIGHT_FUNC_MM
sse4
INIT_XMM
sse2
WEIGHT_FUNC_MM
INIT_XMM
sse4
WEIGHT_FUNC_MM
%macro
WEIGHT_FUNC_HALF_MM
1
cglobal
h264_weight_4_10
_
%1
%macro
WEIGHT_FUNC_HALF_MM
0
cglobal
h264_weight_4_10
WEIGHT_PROLOGUE
sar
r2d
,
1
WEIGHT_SETUP
%1
WEIGHT_SETUP
lea
r3
,
[
r1
*
2
]
.
nextrow
:
WEIGHT_OP
%1
,
0
,
r1
WEIGHT_OP
0
,
r1
movh
[r0],
m5
movhps
[
r0
+
r1
]
,
m5
add
r0
,
r3
...
...
@@ -142,9 +144,10 @@ cglobal h264_weight_4_10_%1
REP_RET
%endmacro
INIT_XMM
WEIGHT_FUNC_HALF_MM
sse2
WEIGHT_FUNC_HALF_MM
sse4
INIT_XMM
sse2
WEIGHT_FUNC_HALF_MM
INIT_XMM
sse4
WEIGHT_FUNC_HALF_MM
;-----------------------------------------------------------------------------
...
...
@@ -168,7 +171,7 @@ DECLARE_REG_TMP 7
movifnidn
t0d
,
r7m
%endmacro
%macro
BIWEIGHT_SETUP
1
%macro
BIWEIGHT_SETUP
0
lea
t0
,
[
t0
*
4
+
1
]
; (offset<<2)+1
or
t0
,
1
shl
r6
,
16
...
...
@@ -182,23 +185,23 @@ DECLARE_REG_TMP 7
pshufd
m5
,
m5
,
0
mova
m3
,
[
pw_pixel_max
]
movifnidn
r3d
,
r3m
%if
nidn
%1
,
sse4
%if
notcpuflag
(
sse4
)
pxor
m7
,
m7
%endif
%endmacro
%macro
BIWEIGHT
2
-
3
%if
%0
==
2
mova
m0
,
[
r0
+
%
2
]
mova
m1
,
[
r1
+
%
2
]
%macro
BIWEIGHT
1
-
2
%if
%0
==
1
mova
m0
,
[
r0
+
%
1
]
mova
m1
,
[
r1
+
%
1
]
punpckhwd
m2
,
m0
,
m1
punpcklwd
m0
,
m1
%else
movq
m0
,
[
r0
+
%
2
]
movq
m1
,
[
r1
+
%
2
]
movq
m0
,
[
r0
+
%
1
]
movq
m1
,
[
r1
+
%
1
]
punpcklwd
m0
,
m1
movq
m2
,
[
r0
+
%
3
]
movq
m1
,
[
r1
+
%
3
]
movq
m2
,
[
r0
+
%
2
]
movq
m1
,
[
r1
+
%
2
]
punpcklwd
m2
,
m1
%endif
pmaddwd
m0
,
m4
...
...
@@ -207,7 +210,7 @@ DECLARE_REG_TMP 7
paddd
m2
,
m5
psrad
m0
,
m6
psrad
m2
,
m6
%if
idn
%1
,
sse4
%if
cpuflag
(
sse4
)
packusdw
m0
,
m2
pminsw
m0
,
m3
%else
...
...
@@ -216,14 +219,14 @@ DECLARE_REG_TMP 7
%endif
%endmacro
%macro
BIWEIGHT_FUNC_DBL
1
cglobal
h264_biweight_16_10
_
%1
%macro
BIWEIGHT_FUNC_DBL
0
cglobal
h264_biweight_16_10
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP
%1
BIWEIGHT_SETUP
.
nextrow
:
BIWEIGHT
%1
,
0
BIWEIGHT
0
mova
[
r0
]
,
m0
BIWEIGHT
%1
,
16
BIWEIGHT
16
mova
[
r0
+
16
]
,
m0
add
r0
,
r2
add
r1
,
r2
...
...
@@ -232,16 +235,17 @@ cglobal h264_biweight_16_10_%1
REP_RET
%endmacro
INIT_XMM
BIWEIGHT_FUNC_DBL
sse2
BIWEIGHT_FUNC_DBL
sse4
INIT_XMM
sse2
BIWEIGHT_FUNC_DBL
INIT_XMM
sse4
BIWEIGHT_FUNC_DBL
%macro
BIWEIGHT_FUNC
1
cglobal
h264_biweight_8_10
_
%1
%macro
BIWEIGHT_FUNC
0
cglobal
h264_biweight_8_10
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP
%1
BIWEIGHT_SETUP
.
nextrow
:
BIWEIGHT
%1
,
0
BIWEIGHT
0
mova
[r0],
m0
add
r0
,
r2
add
r1
,
r2
...
...
@@ -250,18 +254,19 @@ cglobal h264_biweight_8_10_%1
REP_RET
%endmacro
INIT_XMM
BIWEIGHT_FUNC
sse2
BIWEIGHT_FUNC
sse4
INIT_XMM
sse2
BIWEIGHT_FUNC
INIT_XMM
sse4
BIWEIGHT_FUNC
%macro
BIWEIGHT_FUNC_HALF
1
cglobal
h264_biweight_4_10
_
%1
%macro
BIWEIGHT_FUNC_HALF
0
cglobal
h264_biweight_4_10
BIWEIGHT_PROLOGUE
BIWEIGHT_SETUP
%1
BIWEIGHT_SETUP
sar
r3d
,
1
lea
r4
,
[
r2
*
2
]
.
nextrow
:
BIWEIGHT
%1
,
0
,
r2
BIWEIGHT
0
,
r2
movh
[
r0
]
,
m0
movhps
[
r0
+
r2
]
,
m0
add
r0
,
r4
...
...
@@ -271,6 +276,7 @@ cglobal h264_biweight_4_10_%1
REP_RET
%endmacro
INIT_XMM
BIWEIGHT_FUNC_HALF
sse2
BIWEIGHT_FUNC_HALF
sse4
INIT_XMM
sse2
BIWEIGHT_FUNC_HALF
INIT_XMM
sse4
BIWEIGHT_FUNC_HALF
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment