Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
2b2617da
Commit
2b2617da
authored
Sep 20, 2011
by
Yuriy Kaminskiy
Committed by
Michael Niedermayer
Sep 20, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
libpostproc: mmx code uses stack below %esp, fix that
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
0bbb1cdc
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
33 deletions
+28
-33
postprocess_template.c
libpostproc/postprocess_template.c
+28
-33
No files found.
libpostproc/postprocess_template.c
View file @
2b2617da
...
@@ -25,8 +25,6 @@
...
@@ -25,8 +25,6 @@
#include "libavutil/x86_cpu.h"
#include "libavutil/x86_cpu.h"
#define ALIGN_MASK "$-8"
#undef REAL_PAVGB
#undef REAL_PAVGB
#undef PAVGB
#undef PAVGB
#undef PMINUB
#undef PMINUB
...
@@ -767,11 +765,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
...
@@ -767,11 +765,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
}
}
*/
*/
#elif HAVE_MMX
#elif HAVE_MMX
DECLARE_ALIGNED
(
8
,
uint64_t
,
tmp
)[
4
];
// make space for 4 8-byte vars
src
+=
stride
*
4
;
src
+=
stride
*
4
;
__asm__
volatile
(
__asm__
volatile
(
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"lea -40(%%"
REG_SP
"), %%"
REG_c
"
\n\t
"
// make space for 4 8-byte vars
"and "
ALIGN_MASK
", %%"
REG_c
"
\n\t
"
// align
// 0 1 2 3 4 5 6 7
// 0 1 2 3 4 5 6 7
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1
...
@@ -813,8 +810,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
...
@@ -813,8 +810,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2 - H3
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2 - H3
"psubw %%mm2, %%mm0
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"psubw %%mm2, %%mm0
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq %%mm0, (%
%"
REG_c
")
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm0, (%
3)
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, 8(%
%"
REG_c
")
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq %%mm1, 8(%
3)
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
...
@@ -823,8 +820,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
...
@@ -823,8 +820,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm0, %%mm2
\n\t
"
// L3 - L4
"psubw %%mm0, %%mm2
\n\t
"
// L3 - L4
"psubw %%mm1, %%mm3
\n\t
"
// H3 - H4
"psubw %%mm1, %%mm3
\n\t
"
// H3 - H4
"movq %%mm2, 16(%
%"
REG_c
")
\n\t
"
// L3 - L4
"movq %%mm2, 16(%
3)
\n\t
"
// L3 - L4
"movq %%mm3, 24(%
%"
REG_c
")
\n\t
"
// H3 - H4
"movq %%mm3, 24(%
3)
\n\t
"
// H3 - H4
"paddw %%mm4, %%mm4
\n\t
"
// 2L2
"paddw %%mm4, %%mm4
\n\t
"
// 2L2
"paddw %%mm5, %%mm5
\n\t
"
// 2H2
"paddw %%mm5, %%mm5
\n\t
"
// 2H2
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - L3 + L4
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - L3 + L4
...
@@ -872,8 +869,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
...
@@ -872,8 +869,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm2, %%mm0
\n\t
"
// 2L4 - 5L5 + 5L6 - 2L7
"psubw %%mm2, %%mm0
\n\t
"
// 2L4 - 5L5 + 5L6 - 2L7
"psubw %%mm3, %%mm1
\n\t
"
// 2H4 - 5H5 + 5H6 - 2H7
"psubw %%mm3, %%mm1
\n\t
"
// 2H4 - 5H5 + 5H6 - 2H7
"movq (%
%"
REG_c
"), %%mm2
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq (%
3), %%mm2
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq 8(%
%"
REG_c
"), %%mm3
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq 8(%
3), %%mm3
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
#if HAVE_MMX2
#if HAVE_MMX2
"movq %%mm7, %%mm6
\n\t
"
// 0
"movq %%mm7, %%mm6
\n\t
"
// 0
...
@@ -951,8 +948,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
...
@@ -951,8 +948,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psrlw $6, %%mm4
\n\t
"
"psrlw $6, %%mm4
\n\t
"
"psrlw $6, %%mm5
\n\t
"
"psrlw $6, %%mm5
\n\t
"
"movq 16(%
%"
REG_c
"), %%mm0
\n\t
"
// L3 - L4
"movq 16(%
3), %%mm0
\n\t
"
// L3 - L4
"movq 24(%
%"
REG_c
"), %%mm1
\n\t
"
// H3 - H4
"movq 24(%
3), %%mm1
\n\t
"
// H3 - H4
"pxor %%mm2, %%mm2
\n\t
"
"pxor %%mm2, %%mm2
\n\t
"
"pxor %%mm3, %%mm3
\n\t
"
"pxor %%mm3, %%mm3
\n\t
"
...
@@ -995,8 +992,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
...
@@ -995,8 +992,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"movq %%mm0, (%0, %1)
\n\t
"
"movq %%mm0, (%0, %1)
\n\t
"
:
"+r"
(
src
)
:
"+r"
(
src
)
:
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
)
:
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
)
,
"r"
(
tmp
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
REG_a
);
);
#else //HAVE_MMX2 || HAVE_AMD3DNOW
#else //HAVE_MMX2 || HAVE_AMD3DNOW
const
int
l1
=
stride
;
const
int
l1
=
stride
;
...
@@ -1044,6 +1041,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
...
@@ -1044,6 +1041,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
static
inline
void
RENAME
(
dering
)(
uint8_t
src
[],
int
stride
,
PPContext
*
c
)
static
inline
void
RENAME
(
dering
)(
uint8_t
src
[],
int
stride
,
PPContext
*
c
)
{
{
#if HAVE_MMX2 || HAVE_AMD3DNOW
#if HAVE_MMX2 || HAVE_AMD3DNOW
DECLARE_ALIGNED
(
8
,
uint64_t
,
tmp
)[
3
];
__asm__
volatile
(
__asm__
volatile
(
"pxor %%mm6, %%mm6
\n\t
"
"pxor %%mm6, %%mm6
\n\t
"
"pcmpeqb %%mm7, %%mm7
\n\t
"
"pcmpeqb %%mm7, %%mm7
\n\t
"
...
@@ -1137,13 +1135,11 @@ FIND_MIN_MAX((%0, %1, 8))
...
@@ -1137,13 +1135,11 @@ FIND_MIN_MAX((%0, %1, 8))
"movd %%mm6, %%ecx
\n\t
"
"movd %%mm6, %%ecx
\n\t
"
"cmpb "
MANGLE
(
deringThreshold
)
", %%cl
\n\t
"
"cmpb "
MANGLE
(
deringThreshold
)
", %%cl
\n\t
"
" jb 1f
\n\t
"
" jb 1f
\n\t
"
"lea -24(%%"
REG_SP
"), %%"
REG_c
"
\n\t
"
"and "
ALIGN_MASK
", %%"
REG_c
"
\n\t
"
PAVGB
(
%%
mm0
,
%%
mm7
)
// a=(max + min)/2
PAVGB
(
%%
mm0
,
%%
mm7
)
// a=(max + min)/2
"punpcklbw %%mm7, %%mm7
\n\t
"
"punpcklbw %%mm7, %%mm7
\n\t
"
"punpcklbw %%mm7, %%mm7
\n\t
"
"punpcklbw %%mm7, %%mm7
\n\t
"
"punpcklbw %%mm7, %%mm7
\n\t
"
"punpcklbw %%mm7, %%mm7
\n\t
"
"movq %%mm7, (%
%"
REG_c
")
\n\t
"
"movq %%mm7, (%
4)
\n\t
"
"movq (%0), %%mm0
\n\t
"
// L10
"movq (%0), %%mm0
\n\t
"
// L10
"movq %%mm0, %%mm1
\n\t
"
// L10
"movq %%mm0, %%mm1
\n\t
"
// L10
...
@@ -1207,8 +1203,8 @@ FIND_MIN_MAX((%0, %1, 8))
...
@@ -1207,8 +1203,8 @@ FIND_MIN_MAX((%0, %1, 8))
PAVGB(t0, lx)
/* (src[-1] + src[+1])/2 */
\
PAVGB(t0, lx)
/* (src[-1] + src[+1])/2 */
\
PAVGB(sx, lx)
/* (src[-1] + 2src[0] + src[+1])/4 */
\
PAVGB(sx, lx)
/* (src[-1] + 2src[0] + src[+1])/4 */
\
PAVGB(lx, pplx) \
PAVGB(lx, pplx) \
"movq " #lx ", 8(%
%"REG_c")
\n\t"\
"movq " #lx ", 8(%
4)
\n\t"\
"movq (%
%"REG_c"), " #lx "
\n\t"\
"movq (%
4), " #lx "
\n\t"\
"psubusb " #lx ", " #t1 " \n\t"\
"psubusb " #lx ", " #t1 " \n\t"\
"psubusb " #lx ", " #t0 " \n\t"\
"psubusb " #lx ", " #t0 " \n\t"\
"psubusb " #lx ", " #sx " \n\t"\
"psubusb " #lx ", " #sx " \n\t"\
...
@@ -1235,7 +1231,7 @@ FIND_MIN_MAX((%0, %1, 8))
...
@@ -1235,7 +1231,7 @@ FIND_MIN_MAX((%0, %1, 8))
"pandn " #dst ", " #ppsx " \n\t"\
"pandn " #dst ", " #ppsx " \n\t"\
"por " #pplx ", " #ppsx " \n\t"\
"por " #pplx ", " #ppsx " \n\t"\
"movq " #ppsx ", " #dst " \n\t"\
"movq " #ppsx ", " #dst " \n\t"\
"movq 8(%
%"REG_c"), " #lx "
\n\t"
"movq 8(%
4), " #lx "
\n\t"
#define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
#define DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1) \
REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
REAL_DERING_CORE(dst,src,ppsx,psx,sx,pplx,plx,lx,t0,t1)
...
@@ -1265,7 +1261,7 @@ DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,
...
@@ -1265,7 +1261,7 @@ DERING_CORE((%%REGd, %1, 2),(%0, %1, 8) ,%%mm0,%%mm2,%%mm4,%%mm1,%%mm3,%%mm5,
DERING_CORE
((
%
0
,
%
1
,
8
)
,(
%%
REGd
,
%
1
,
4
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
8
)
,(
%%
REGd
,
%
1
,
4
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
"1:
\n\t
"
"1:
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
),
"m"
(
c
->
pQPb2
)
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
),
"m"
(
c
->
pQPb2
)
,
"r"
(
tmp
)
:
"%"
REG_a
,
"%"
REG_d
,
"%"
REG_c
:
"%"
REG_a
,
"%"
REG_d
,
"%"
REG_c
);
);
#else //HAVE_MMX2 || HAVE_AMD3DNOW
#else //HAVE_MMX2 || HAVE_AMD3DNOW
...
@@ -2762,10 +2758,9 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
...
@@ -2762,10 +2758,9 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
if
(
eq_mask
!=
-
1LL
){
if
(
eq_mask
!=
-
1LL
){
uint8_t
*
temp_src
=
src
;
uint8_t
*
temp_src
=
src
;
DECLARE_ALIGNED
(
8
,
uint64_t
,
tmp
)[
4
];
// make space for 4 8-byte vars
__asm__
volatile
(
__asm__
volatile
(
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"lea -40(%%"
REG_SP
"), %%"
REG_c
"
\n\t
"
// make space for 4 8-byte vars
"and "
ALIGN_MASK
", %%"
REG_c
"
\n\t
"
// align
// 0 1 2 3 4 5 6 7 8 9
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %1+8%1 ecx+4%1
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %1+8%1 ecx+4%1
...
@@ -2806,8 +2801,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
...
@@ -2806,8 +2801,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2 - H3
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2 - H3
"psubw %%mm2, %%mm0
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"psubw %%mm2, %%mm0
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq %%mm0, (%
%"
REG_c
")
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm0, (%
4)
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, 8(%
%"
REG_c
")
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq %%mm1, 8(%
4)
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
...
@@ -2816,8 +2811,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
...
@@ -2816,8 +2811,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm0, %%mm2
\n\t
"
// L3 - L4
"psubw %%mm0, %%mm2
\n\t
"
// L3 - L4
"psubw %%mm1, %%mm3
\n\t
"
// H3 - H4
"psubw %%mm1, %%mm3
\n\t
"
// H3 - H4
"movq %%mm2, 16(%
%"
REG_c
")
\n\t
"
// L3 - L4
"movq %%mm2, 16(%
4)
\n\t
"
// L3 - L4
"movq %%mm3, 24(%
%"
REG_c
")
\n\t
"
// H3 - H4
"movq %%mm3, 24(%
4)
\n\t
"
// H3 - H4
"paddw %%mm4, %%mm4
\n\t
"
// 2L2
"paddw %%mm4, %%mm4
\n\t
"
// 2L2
"paddw %%mm5, %%mm5
\n\t
"
// 2H2
"paddw %%mm5, %%mm5
\n\t
"
// 2H2
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - L3 + L4
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - L3 + L4
...
@@ -2865,8 +2860,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
...
@@ -2865,8 +2860,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm2, %%mm0
\n\t
"
// 2L4 - 5L5 + 5L6 - 2L7
"psubw %%mm2, %%mm0
\n\t
"
// 2L4 - 5L5 + 5L6 - 2L7
"psubw %%mm3, %%mm1
\n\t
"
// 2H4 - 5H5 + 5H6 - 2H7
"psubw %%mm3, %%mm1
\n\t
"
// 2H4 - 5H5 + 5H6 - 2H7
"movq (%
%"
REG_c
"), %%mm2
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq (%
4), %%mm2
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq 8(%
%"
REG_c
"), %%mm3
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq 8(%
4), %%mm3
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
#if HAVE_MMX2
#if HAVE_MMX2
"movq %%mm7, %%mm6
\n\t
"
// 0
"movq %%mm7, %%mm6
\n\t
"
// 0
...
@@ -2944,8 +2939,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
...
@@ -2944,8 +2939,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psrlw $6, %%mm4
\n\t
"
"psrlw $6, %%mm4
\n\t
"
"psrlw $6, %%mm5
\n\t
"
"psrlw $6, %%mm5
\n\t
"
"movq 16(%
%"
REG_c
"), %%mm0
\n\t
"
// L3 - L4
"movq 16(%
4), %%mm0
\n\t
"
// L3 - L4
"movq 24(%
%"
REG_c
"), %%mm1
\n\t
"
// H3 - H4
"movq 24(%
4), %%mm1
\n\t
"
// H3 - H4
"pxor %%mm2, %%mm2
\n\t
"
"pxor %%mm2, %%mm2
\n\t
"
"pxor %%mm3, %%mm3
\n\t
"
"pxor %%mm3, %%mm3
\n\t
"
...
@@ -2990,8 +2985,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
...
@@ -2990,8 +2985,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"movq %%mm0, (%0, %1)
\n\t
"
"movq %%mm0, (%0, %1)
\n\t
"
:
"+r"
(
temp_src
)
:
"+r"
(
temp_src
)
:
"r"
((
x86_reg
)
step
),
"m"
(
c
->
pQPb
),
"m"
(
eq_mask
)
:
"r"
((
x86_reg
)
step
),
"m"
(
c
->
pQPb
),
"m"
(
eq_mask
)
,
"r"
(
tmp
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
REG_a
);
);
}
}
/*if(step==16){
/*if(step==16){
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment