Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
a3bf7b86
Commit
a3bf7b86
authored
Jul 28, 2011
by
Jason Garrett-Glaser
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
H.264: tweak some other x86 asm for Atom
parent
5ef953e8
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
48 additions
and
51 deletions
+48
-51
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+2
-2
h264_chromamc.asm
libavcodec/x86/h264_chromamc.asm
+21
-23
h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+9
-10
h264_idct.asm
libavcodec/x86/h264_idct.asm
+14
-14
x86util.asm
libavcodec/x86/x86util.asm
+2
-2
No files found.
libavcodec/x86/dsputil_mmx.c
View file @
a3bf7b86
...
...
@@ -456,12 +456,12 @@ static void put_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
"movdqu (%1,%3), %%xmm1
\n\t
"
"movdqu (%1,%3,2), %%xmm2
\n\t
"
"movdqu (%1,%4), %%xmm3
\n\t
"
"lea (%1,%3,4), %1
\n\t
"
"movdqa %%xmm0, (%2)
\n\t
"
"movdqa %%xmm1, (%2,%3)
\n\t
"
"movdqa %%xmm2, (%2,%3,2)
\n\t
"
"movdqa %%xmm3, (%2,%4)
\n\t
"
"subl $4, %0
\n\t
"
"lea (%1,%3,4), %1
\n\t
"
"lea (%2,%3,4), %2
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+r"
(
pixels
),
"+r"
(
block
)
...
...
@@ -478,6 +478,7 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
"movdqu (%1,%3), %%xmm1
\n\t
"
"movdqu (%1,%3,2), %%xmm2
\n\t
"
"movdqu (%1,%4), %%xmm3
\n\t
"
"lea (%1,%3,4), %1
\n\t
"
"pavgb (%2), %%xmm0
\n\t
"
"pavgb (%2,%3), %%xmm1
\n\t
"
"pavgb (%2,%3,2), %%xmm2
\n\t
"
...
...
@@ -487,7 +488,6 @@ static void avg_pixels16_sse2(uint8_t *block, const uint8_t *pixels, int line_si
"movdqa %%xmm2, (%2,%3,2)
\n\t
"
"movdqa %%xmm3, (%2,%4)
\n\t
"
"subl $4, %0
\n\t
"
"lea (%1,%3,4), %1
\n\t
"
"lea (%2,%3,4), %2
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+r"
(
pixels
),
"+r"
(
block
)
...
...
libavcodec/x86/h264_chromamc.asm
View file @
a3bf7b86
...
...
@@ -72,17 +72,17 @@ SECTION .text
.
next4rows
movq
mm0
,
[
r1
]
movq
mm1
,
[
r1
+
r2
]
add
r1
,
r4
CHROMAMC_AVG
mm0
,
[
r0
]
CHROMAMC_AVG
mm1
,
[
r0
+
r2
]
movq
[
r0
]
,
mm0
movq
[
r0
+
r2
]
,
mm1
add
r0
,
r4
add
r1
,
r4
movq
mm0
,
[
r1
]
movq
mm1
,
[
r1
+
r2
]
add
r1
,
r4
CHROMAMC_AVG
mm0
,
[
r0
]
CHROMAMC_AVG
mm1
,
[
r0
+
r2
]
add
r1
,
r4
movq
[
r0
]
,
mm0
movq
[
r0
+
r2
]
,
mm1
add
r0
,
r4
...
...
@@ -472,8 +472,8 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
mov
r6d
,
r4d
shl
r4d
,
8
sub
r4
,
r6
add
r4
,
8
; x*288+8 = x<<8 | (8-x)
mov
r6
,
8
add
r4
,
8
; x*288+8 = x<<8 | (8-x)
sub
r6d
,
r5d
imul
r6
,
r4
; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
imul
r4d
,
r5d
; y *(x*255+8) = y *x<<8 | y *(8-x)
...
...
@@ -481,24 +481,23 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
movd
m7
,
r6d
movd
m6
,
r4d
movdqa
m5
,
[
rnd_2d_
%2
]
movq
m0
,
[
r1
]
movq
m1
,
[
r1
+
1
]
pshuflw
m7
,
m7
,
0
pshuflw
m6
,
m6
,
0
punpcklbw
m0
,
m1
movlhps
m7
,
m7
movlhps
m6
,
m6
movq
m0
,
[
r1
]
movq
m1
,
[
r1
+
1
]
punpcklbw
m0
,
m1
add
r1
,
r2
.
next2rows
movq
m1
,
[
r1
]
movq
m2
,
[
r1
+
1
]
movq
m3
,
[
r1
+
r2
]
movq
m4
,
[
r1
+
r2
+
1
]
movq
m1
,
[
r1
+
r2
*
1
]
movq
m2
,
[
r1
+
r2
*
1
+
1
]
movq
m3
,
[
r1
+
r2
*
2
]
movq
m4
,
[
r1
+
r2
*
2
+
1
]
lea
r1
,
[
r1
+
r2
*
2
]
punpcklbw
m1
,
m2
punpcklbw
m3
,
m4
movdqa
m2
,
m1
punpcklbw
m3
,
m4
movdqa
m4
,
m3
pmaddubsw
m0
,
m7
pmaddubsw
m1
,
m6
...
...
@@ -508,8 +507,8 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
paddw
m2
,
m5
paddw
m1
,
m0
paddw
m3
,
m2
movdqa
m0
,
m4
psrlw
m1
,
6
movdqa
m0
,
m4
psrlw
m3
,
6
%ifidn
%1
,
avg
movq
m2
,
[
r0
]
...
...
@@ -576,6 +575,7 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
movq
m1
,
[
r1
+
r2
]
movdqa
m2
,
m1
movq
m3
,
[
r1
+
r2
*
2
]
lea
r1
,
[
r1
+
r2
*
2
]
punpcklbw
m0
,
m1
punpcklbw
m2
,
m3
pmaddubsw
m0
,
m7
...
...
@@ -594,7 +594,6 @@ cglobal %1_%2_chroma_mc8_%3, 6, 7, 8
movhps
[
r0
+
r2
]
,
m0
sub
r3d
,
2
lea
r0
,
[
r0
+
r2
*
2
]
lea
r1
,
[
r1
+
r2
*
2
]
jg
.
next2yrows
REP_RET
%endmacro
...
...
@@ -607,8 +606,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
mov
r6
,
r4
shl
r4d
,
8
sub
r4d
,
r6d
add
r4d
,
8
; x*288+8
mov
r6
,
8
add
r4d
,
8
; x*288+8
sub
r6d
,
r5d
imul
r6d
,
r4d
; (8-y)*(x*255+8) = (8-y)*x<<8 | (8-y)*(8-x)
imul
r4d
,
r5d
; y *(x*255+8) = y *x<<8 | y *(8-x)
...
...
@@ -616,17 +615,16 @@ cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
movd
m7
,
r6d
movd
m6
,
r4d
movq
m5
,
[
pw_32
]
movd
m0
,
[
r1
]
pshufw
m7
,
m7
,
0
punpcklbw
m0
,
[
r1
+
1
]
pshufw
m6
,
m6
,
0
movd
m0
,
[
r1
]
punpcklbw
m0
,
[
r1
+
1
]
add
r1
,
r2
.
next2rows
movd
m1
,
[
r1
]
movd
m3
,
[
r1
+
r2
]
punpcklbw
m1
,
[
r1
+
1
]
punpcklbw
m3
,
[
r1
+
r2
+
1
]
movd
m1
,
[
r1
+
r2
*
1
]
movd
m3
,
[
r1
+
r2
*
2
]
punpcklbw
m1
,
[
r1
+
r2
*
1
+
1
]
punpcklbw
m3
,
[
r1
+
r2
*
2
+
1
]
lea
r1
,
[
r1
+
r2
*
2
]
movq
m2
,
m1
movq
m4
,
m3
...
...
@@ -638,8 +636,8 @@ cglobal %1_%2_chroma_mc4_%3, 6, 7, 0
paddw
m2
,
m5
paddw
m1
,
m0
paddw
m3
,
m2
movq
m0
,
m4
psrlw
m1
,
6
movq
m0
,
m4
psrlw
m3
,
6
packuswb
m1
,
m1
packuswb
m3
,
m3
...
...
libavcodec/x86/h264_deblock.asm
View file @
a3bf7b86
...
...
@@ -240,17 +240,17 @@ cextern pb_A1
; out: m1=p0' m2=q0'
; clobbers: m0,3-6
%macro
DEBLOCK_P0_Q0
0
pxor
m5
,
m1
,
m2
; p0^q0
pand
m5
,
[
pb_1
]
; (p0^q0)&1
pcmpeqb
m4
,
m4
pxor
m5
,
m1
,
m2
; p0^q0
pxor
m3
,
m4
pand
m5
,
[
pb_1
]
; (p0^q0)&1
pavgb
m3
,
m0
; (p1 - q1 + 256)>>1
pavgb
m3
,
[
pb_3
]
; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
pxor
m4
,
m1
pavgb
m3
,
[
pb_3
]
; (((p1 - q1 + 256)>>1)+4)>>1 = 64+2+(p1-q1)>>2
pavgb
m4
,
m2
; (q0 - p0 + 256)>>1
pavgb
m3
,
m5
paddusb
m3
,
m4
; d+128+33
mova
m6
,
[
pb_A1
]
paddusb
m3
,
m4
; d+128+33
psubusb
m6
,
m3
psubusb
m3
,
[
pb_A1
]
pminub
m6
,
m7
...
...
@@ -411,16 +411,16 @@ cglobal deblock_%2_luma_8_%1, 5,5
LOAD_MASK
r2
,
r3
mov
r3
,
r4mp
pcmpeqb
m3
,
m3
movd
m4
,
[r3]
; tc0
punpcklbw
m4
,
m4
punpcklbw
m4
,
m4
; tc = 4x tc0[3], 4x tc0[2], 4x tc0[1], 4x tc0[0]
mova
[
esp
+
%3
]
,
m4
; tc
pcmpeqb
m3
,
m3
pcmpgtb
m4
,
m3
mova
m3
,
[r4]
; p2
pand
m4
,
m7
mova
[esp],
m4
; mask
mova
m3
,
[r4]
; p2
DIFF_GT2
m1
,
m3
,
m5
,
m6
,
m7
; |p2-p0| > beta-1
pand
m6
,
m4
pand
m4
,
[
esp
+
%3
]
; tc
...
...
@@ -430,11 +430,10 @@ cglobal deblock_%2_luma_8_%1, 5,5
mova
m4
,
[
r0
+
2
*
r1
]
; q2
DIFF_GT2
m2
,
m4
,
m5
,
m6
,
m3
; |q2-q0| > beta-1
mova
m5
,
[esp]
; mask
pand
m6
,
m5
pand
m6
,
[esp]
; mask
mova
m5
,
[
esp
+
%3
]
; tc
pand
m5
,
m6
psubb
m7
,
m6
pand
m5
,
m6
mova
m3
,
[
r0
+
r1
]
LUMA_Q1
m3
,
m4
,
[
r0
+
2
*
r1
]
,
[
r0
+
r1
]
,
m5
,
m6
...
...
@@ -482,10 +481,10 @@ cglobal deblock_h_luma_8_%1, 0,5
; transpose 16x4 -> original space (only the middle 4 rows were changed by the filter)
mov
r0
,
r0mp
sub
r0
,
2
lea
r1
,
[
r0
+
r4
]
movq
m0
,
[
pix_tmp
+
0x10
]
movq
m1
,
[
pix_tmp
+
0x20
]
lea
r1
,
[
r0
+
r4
]
movq
m2
,
[
pix_tmp
+
0x30
]
movq
m3
,
[
pix_tmp
+
0x40
]
TRANSPOSE8x4B_STORE
PASS8ROWS
(
r0
,
r1
,
r3
,
r4
)
...
...
libavcodec/x86/h264_idct.asm
View file @
a3bf7b86
...
...
@@ -82,10 +82,10 @@ cglobal h264_idct_add_8_mmx, 3, 3, 0
RET
%macro
IDCT8_1D
2
mova
m4
,
m5
mova
m0
,
m1
psraw
m4
,
1
psraw
m1
,
1
mova
m4
,
m5
psraw
m4
,
1
paddw
m4
,
m5
paddw
m1
,
m0
paddw
m4
,
m7
...
...
@@ -95,16 +95,16 @@ cglobal h264_idct_add_8_mmx, 3, 3, 0
psubw
m0
,
m3
psubw
m5
,
m3
psraw
m3
,
1
paddw
m0
,
m7
psubw
m5
,
m7
psraw
m3
,
1
psraw
m7
,
1
psubw
m0
,
m3
psubw
m5
,
m7
mova
m3
,
m4
mova
m7
,
m1
psraw
m1
,
2
mova
m3
,
m4
psraw
m3
,
2
paddw
m3
,
m0
psraw
m0
,
2
...
...
@@ -113,12 +113,12 @@ cglobal h264_idct_add_8_mmx, 3, 3, 0
psubw
m0
,
m4
psubw
m7
,
m5
mova
m4
,
m2
mova
m5
,
m6
psraw
m4
,
1
psraw
m6
,
1
psubw
m4
,
m5
mova
m4
,
m2
psraw
m4
,
1
paddw
m6
,
m2
psubw
m4
,
m5
mova
m2
,
%1
mova
m5
,
%2
...
...
@@ -337,7 +337,7 @@ cglobal h264_idct8_add4_8_mmx, 5, 7, 0
test
r6
,
r6
jz
.
skipblock
mov
r6d
,
dword
[
r1
+
r5
*
4
]
lea
r6
,
[
r0
+
r6
]
add
r6
,
r0
add
word
[r2],
32
IDCT8_ADD_MMX_START
r2
,
rsp
IDCT8_ADD_MMX_START
r2
+
8
,
rsp
+
64
...
...
@@ -391,7 +391,7 @@ cglobal h264_idct_add16_8_mmx2, 5, 7, 0
REP_RET
.
no_dc
mov
r6d
,
dword
[
r1
+
r5
*
4
]
lea
r6
,
[
r0
+
r6
]
add
r6
,
r0
IDCT4_ADD
r6
,
r2
,
r3
.
skipblock
inc
r5
...
...
@@ -414,7 +414,7 @@ cglobal h264_idct_add16intra_8_mmx, 5, 7, 0
test
r6
,
r6
jz
.
skipblock
mov
r6d
,
dword
[
r1
+
r5
*
4
]
lea
r6
,
[
r0
+
r6
]
add
r6
,
r0
IDCT4_ADD
r6
,
r2
,
r3
.
skipblock
inc
r5
...
...
@@ -456,7 +456,7 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 7, 0
%define
dst_regd
r1d
%endif
mov
dst_regd
,
dword
[
r1
+
r5
*
4
]
lea
dst_reg
,
[
r0
+
dst_reg
]
add
dst_reg
,
r0
DC_ADD_MMX2_OP
movh
,
dst_reg
,
r3
,
r6
%ifndef
ARCH_X86_64
mov
r1
,
r1m
...
...
@@ -513,7 +513,7 @@ cglobal h264_idct8_add4_8_mmx2, 5, 7, 0
RET
.
no_dc
mov
r6d
,
dword
[
r1
+
r5
*
4
]
lea
r6
,
[
r0
+
r6
]
add
r6
,
r0
add
word
[r2],
32
IDCT8_ADD_MMX_START
r2
,
rsp
IDCT8_ADD_MMX_START
r2
+
8
,
rsp
+
64
...
...
@@ -558,7 +558,7 @@ INIT_MMX
%define
dst_regd
r1d
%endif
mov
dst_regd
,
dword
[
r1
+
r5
*
4
]
lea
dst_reg
,
[
r0
+
dst_reg
]
add
dst_reg
,
r0
DC_ADD_MMX2_OP
mova
,
dst_reg
,
r3
,
r6
lea
dst_reg
,
[
dst_reg
+
r3
*
4
]
DC_ADD_MMX2_OP
mova
,
dst_reg
,
r3
,
r6
...
...
@@ -573,7 +573,7 @@ INIT_MMX
.
no_dc
INIT_XMM
mov
dst_regd
,
dword
[
r1
+
r5
*
4
]
lea
dst_reg
,
[
r0
+
dst_reg
]
add
dst_reg
,
r0
IDCT8_ADD_SSE
dst_reg
,
r2
,
r3
,
r6
%ifndef
ARCH_X86_64
mov
r1
,
r1m
...
...
libavcodec/x86/x86util.asm
View file @
a3bf7b86
...
...
@@ -497,10 +497,10 @@
%macro
STORE_DIFFx2
8
; add1, add2, reg1, reg2, zero, shift, source, stride
movh
%3
,
[
%7
]
movh
%4
,
[
%7
+
%8
]
punpcklbw
%3
,
%5
punpcklbw
%4
,
%5
psraw
%1
,
%6
psraw
%2
,
%6
punpcklbw
%3
,
%5
punpcklbw
%4
,
%5
paddw
%3
,
%1
paddw
%4
,
%2
packuswb
%3
,
%5
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment