Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
9eb3da2f
Commit
9eb3da2f
authored
Jun 27, 2016
by
Matthieu Bouron
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
asm: FF_-prefix internal macros used in inline assembly
See merge commit '
39d6d361
'.
parent
39d6d361
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
1113 additions
and
1113 deletions
+1113
-1113
cabac.h
libavcodec/x86/cabac.h
+10
-10
h264_i386.h
libavcodec/x86/h264_i386.h
+6
-6
hpeldsp_rnd_template.c
libavcodec/x86/hpeldsp_rnd_template.c
+28
-28
me_cmp_init.c
libavcodec/x86/me_cmp_init.c
+22
-22
mpegvideo.c
libavcodec/x86/mpegvideo.c
+44
-44
mpegvideoenc_template.c
libavcodec/x86/mpegvideoenc_template.c
+18
-18
rnd_template.c
libavcodec/x86/rnd_template.c
+22
-22
snowdsp.c
libavcodec/x86/snowdsp.c
+90
-90
vc1dsp_mmx.c
libavcodec/x86/vc1dsp_mmx.c
+3
-3
vf_noise.c
libavfilter/x86/vf_noise.c
+20
-20
asm.h
libavutil/x86/asm.h
+33
-33
cpu.c
libavutil/x86/cpu.c
+2
-2
postprocess_template.c
libpostproc/postprocess_template.c
+322
-322
hscale_fast_bilinear_simd.c
libswscale/x86/hscale_fast_bilinear_simd.c
+62
-62
rgb2rgb_template.c
libswscale/x86/rgb2rgb_template.c
+193
-193
swscale.c
libswscale/x86/swscale.c
+15
-15
swscale_template.c
libswscale/x86/swscale_template.c
+223
-223
No files found.
libavcodec/x86/cabac.h
View file @
9eb3da2f
...
...
@@ -45,7 +45,7 @@
#define END_CHECK(end) ""
#else
#define END_CHECK(end) \
"cmp "end" , %%"
REG_c"
\n\t"\
"cmp "end" , %%"
FF_REG_c"
\n\t"\
"jge 1f \n\t"
#endif
...
...
@@ -92,11 +92,11 @@
"mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\
"jnz 2f \n\t"\
"mov "byte" , %%"
REG_c"
\n\t"\
"mov "byte" , %%"
FF_REG_c"
\n\t"\
END_CHECK(end)\
"add"
OPSIZE" $2
, "byte" \n\t"\
"add"
FF_OPSIZE" $2
, "byte" \n\t"\
"1: \n\t"\
"movzwl (%%"
REG_c") , "tmp"
\n\t"\
"movzwl (%%"
FF_REG_c") , "tmp"
\n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\
...
...
@@ -153,11 +153,11 @@
"mov "tmpbyte" , "statep" \n\t"\
"test "lowword" , "lowword" \n\t"\
" jnz 2f \n\t"\
"mov "byte" , %%"
REG_c"
\n\t"\
"mov "byte" , %%"
FF_REG_c"
\n\t"\
END_CHECK(end)\
"add"
OPSIZE" $2
, "byte" \n\t"\
"add"
FF_OPSIZE" $2
, "byte" \n\t"\
"1: \n\t"\
"movzwl (%%"
REG_c") , "tmp"
\n\t"\
"movzwl (%%"
FF_REG_c") , "tmp"
\n\t"\
"lea -1("low") , %%ecx \n\t"\
"xor "low" , %%ecx \n\t"\
"shr $15 , %%ecx \n\t"\
...
...
@@ -203,7 +203,7 @@ static av_always_inline int get_cabac_inline_x86(CABACContext *c,
"i"
(
offsetof
(
CABACContext
,
bytestream_end
))
TABLES_ARG
,
"1"
(
c
->
low
),
"2"
(
c
->
range
)
:
"%"
REG_c
,
"memory"
:
"%"
FF_
REG_c
,
"memory"
);
return
bit
&
1
;
}
...
...
@@ -240,7 +240,7 @@ static av_always_inline int get_cabac_bypass_sign_x86(CABACContext *c, int val)
"addl %%edx, %%eax
\n\t
"
"cmp %c5(%2), %1
\n\t
"
"jge 1f
\n\t
"
"add"
OPSIZE
"
$2, %c4(%2)
\n\t
"
"add"
FF_OPSIZE
"
$2, %c4(%2)
\n\t
"
#endif
"1:
\n\t
"
"movl %%eax, %c3(%2)
\n\t
"
...
...
@@ -281,7 +281,7 @@ static av_always_inline int get_cabac_bypass_x86(CABACContext *c)
"addl %%ecx, %%eax
\n\t
"
"cmp %c5(%2), %1
\n\t
"
"jge 1f
\n\t
"
"add"
OPSIZE
"
$2, %c4(%2)
\n\t
"
"add"
FF_OPSIZE
"
$2, %c4(%2)
\n\t
"
"1:
\n\t
"
"movl %%eax, %c3(%2)
\n\t
"
...
...
libavcodec/x86/h264_i386.h
View file @
9eb3da2f
...
...
@@ -91,13 +91,13 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"sub %10, %1
\n\t
"
"mov %2, %0
\n\t
"
"movl %7, %%ecx
\n\t
"
"add %1, %%"
REG_c
"
\n\t
"
"add %1, %%"
FF_REG_c
"
\n\t
"
"movl %%ecx, (%0)
\n\t
"
"test $1, %4
\n\t
"
" jnz 5f
\n\t
"
"add"
OPSIZE
" $4, %2
\n\t
"
"add"
FF_OPSIZE
" $4, %2
\n\t
"
"4:
\n\t
"
"add $1, %1
\n\t
"
...
...
@@ -105,7 +105,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
" jb 3b
\n\t
"
"mov %2, %0
\n\t
"
"movl %7, %%ecx
\n\t
"
"add %1, %%"
REG_c
"
\n\t
"
"add %1, %%"
FF_REG_c
"
\n\t
"
"movl %%ecx, (%0)
\n\t
"
"5:
\n\t
"
"add %9, %k0
\n\t
"
...
...
@@ -116,7 +116,7 @@ static int decode_significance_x86(CABACContext *c, int max_coeff,
"i"
(
offsetof
(
CABACContext
,
bytestream
)),
"i"
(
offsetof
(
CABACContext
,
bytestream_end
))
TABLES_ARG
:
"%"
REG_c
,
"memory"
:
"%"
FF_
REG_c
,
"memory"
);
return
coeff_count
;
}
...
...
@@ -183,7 +183,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"test $1, %4
\n\t
"
" jnz 5f
\n\t
"
"add"
OPSIZE
" $4, %2
\n\t
"
"add"
FF_OPSIZE
" $4, %2
\n\t
"
"4:
\n\t
"
"add $1, %6
\n\t
"
...
...
@@ -202,7 +202,7 @@ static int decode_significance_8x8_x86(CABACContext *c,
"i"
(
offsetof
(
CABACContext
,
bytestream
)),
"i"
(
offsetof
(
CABACContext
,
bytestream_end
)),
"i"
(
H264_LAST_COEFF_FLAG_OFFSET_8x8_OFFSET
)
TABLES_ARG
:
"%"
REG_c
,
"memory"
:
"%"
FF_
REG_c
,
"memory"
);
return
coeff_count
;
}
...
...
libavcodec/x86/hpeldsp_rnd_template.c
View file @
9eb3da2f
...
...
@@ -32,7 +32,7 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
{
MOVQ_BFE
(
mm6
);
__asm__
volatile
(
"lea (%3, %3), %%"
REG_a
"
\n\t
"
"lea (%3, %3), %%"
FF_REG_a
"
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
...
@@ -42,8 +42,8 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, (%2)
\n\t
"
"movq %%mm5, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq 1(%1), %%mm1
\n\t
"
"movq (%1, %3), %%mm2
\n\t
"
...
...
@@ -51,20 +51,20 @@ av_unused static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, (%2)
\n\t
"
"movq %%mm5, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"subl $4, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
),
"+D"
(
block
)
:
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
av_unused
static
void
DEF
(
put
,
pixels16_x2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
MOVQ_BFE
(
mm6
);
__asm__
volatile
(
"lea
(%3, %3), %%"
REG_a
"
\n\t
"
"lea
(%3, %3), %%"
FF_REG_a
"
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
...
...
@@ -81,8 +81,8 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, 8(%2)
\n\t
"
"movq %%mm5, 8(%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq 1(%1), %%mm1
\n\t
"
"movq (%1, %3), %%mm2
\n\t
"
...
...
@@ -97,42 +97,42 @@ av_unused static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixel
PAVGBP
(
%%
mm0
,
%%
mm1
,
%%
mm4
,
%%
mm2
,
%%
mm3
,
%%
mm5
)
"movq %%mm4, 8(%2)
\n\t
"
"movq %%mm5, 8(%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"subl $4, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
),
"+D"
(
block
)
:
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
av_unused
static
void
DEF
(
put
,
pixels8_y2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
{
MOVQ_BFE
(
mm6
);
__asm__
volatile
(
"lea (%3, %3), %%"
REG_a
"
\n\t
"
"lea (%3, %3), %%"
FF_REG_a
"
\n\t
"
"movq (%1), %%mm0
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"),%%mm2
\n\t
"
"movq (%1, %%"
FF_REG_a
"),%%mm2
\n\t
"
PAVGBP
(
%%
mm1
,
%%
mm0
,
%%
mm4
,
%%
mm2
,
%%
mm1
,
%%
mm5
)
"movq %%mm4, (%2)
\n\t
"
"movq %%mm5, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"),%%mm0
\n\t
"
"movq (%1, %%"
FF_REG_a
"),%%mm0
\n\t
"
PAVGBP
(
%%
mm1
,
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm1
,
%%
mm5
)
"movq %%mm4, (%2)
\n\t
"
"movq %%mm5, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"subl $4, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
),
"+D"
(
block
)
:
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
av_unused
static
void
DEF
(
avg
,
pixels16_x2
)(
uint8_t
*
block
,
const
uint8_t
*
pixels
,
ptrdiff_t
line_size
,
int
h
)
...
...
@@ -166,12 +166,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
{
MOVQ_BFE
(
mm6
);
__asm__
volatile
(
"lea (%3, %3), %%"
REG_a
"
\n\t
"
"lea (%3, %3), %%"
FF_REG_a
"
\n\t
"
"movq (%1), %%mm0
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm2
\n\t
"
PAVGBP
(
%%
mm1
,
%%
mm0
,
%%
mm4
,
%%
mm2
,
%%
mm1
,
%%
mm5
)
"movq (%2), %%mm3
\n\t
"
PAVGB_MMX
(
%%
mm3
,
%%
mm4
,
%%
mm0
,
%%
mm6
)
...
...
@@ -179,11 +179,11 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
PAVGB_MMX
(
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
)
"movq %%mm0, (%2)
\n\t
"
"movq %%mm1, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm0
\n\t
"
PAVGBP
(
%%
mm1
,
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm1
,
%%
mm5
)
"movq (%2), %%mm3
\n\t
"
PAVGB_MMX
(
%%
mm3
,
%%
mm4
,
%%
mm2
,
%%
mm6
)
...
...
@@ -191,12 +191,12 @@ av_unused static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels
PAVGB_MMX
(
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
)
"movq %%mm2, (%2)
\n\t
"
"movq %%mm1, (%2, %3)
\n\t
"
"add %%"
REG_a
", %1
\n\t
"
"add %%"
REG_a
", %2
\n\t
"
"add %%"
FF_REG_a
", %1
\n\t
"
"add %%"
FF_REG_a
", %2
\n\t
"
"subl $4, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
),
"+D"
(
block
)
:
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
libavcodec/x86/me_cmp_init.c
View file @
9eb3da2f
...
...
@@ -283,15 +283,15 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm4
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm4
\n\t
"
"add %3, %%"
FF_REG_a
"
\n\t
"
"psubusb %%mm0, %%mm2
\n\t
"
"psubusb %%mm4, %%mm0
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm5
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm5, %%mm1
\n\t
"
"por %%mm2, %%mm0
\n\t
"
...
...
@@ -306,7 +306,7 @@ static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"add %3, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk2
-
len
),
"r"
(
stride
));
...
...
@@ -319,18 +319,18 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"paddw %%mm0, %%mm1
\n\t
"
"paddw %%mm2, %%mm3
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%3, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
FF_REG_a
"), %%mm2
\n\t
"
"paddw %%mm5, %%mm1
\n\t
"
"paddw %%mm5, %%mm3
\n\t
"
"psrlw $1, %%mm1
\n\t
"
...
...
@@ -344,7 +344,7 @@ static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2,
"punpckhbw %%mm7, %%mm1
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"add %4, %%"
REG_a
"
\n\t
"
"add %4, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1a
-
len
),
"r"
(
blk1b
-
len
),
"r"
(
blk2
-
len
),
...
...
@@ -356,8 +356,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
{
x86_reg
len
=
-
stride
*
h
;
__asm__
volatile
(
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
FF_REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
...
...
@@ -368,8 +368,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq 1(%2, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%2, %%"
FF_REG_a
"), %%mm2
\n\t
"
"movq 1(%2, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -383,8 +383,8 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm3, %%mm1
\n\t
"
"paddw %%mm5, %%mm0
\n\t
"
"paddw %%mm5, %%mm1
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%3, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
FF_REG_a
"), %%mm5
\n\t
"
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
...
...
@@ -398,7 +398,7 @@ static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2,
"paddw %%mm4, %%mm6
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"movq %%mm3, %%mm1
\n\t
"
"add %4, %%"
REG_a
"
\n\t
"
"add %4, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk1
-
len
+
stride
),
"r"
(
blk2
-
len
),
...
...
libavcodec/x86/mpegvideo.c
View file @
9eb3da2f
...
...
@@ -188,13 +188,13 @@ __asm__ volatile(
"movd %2, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
FF_REG_a
"), %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
// q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5
\n\t
"
// q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2
\n\t
"
...
...
@@ -209,8 +209,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1
\n\t
"
// abs(block[i])*q
"pxor %%mm4, %%mm4
\n\t
"
"pxor %%mm5, %%mm5
\n\t
"
// FIXME slow
"pcmpeqw (%0, %%"
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw (%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
FF_
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"psraw $3, %%mm0
\n\t
"
"psraw $3, %%mm1
\n\t
"
"psubw %%mm7, %%mm0
\n\t
"
...
...
@@ -223,13 +223,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1
\n\t
"
"pandn %%mm0, %%mm4
\n\t
"
"pandn %%mm1, %%mm5
\n\t
"
"movq %%mm4, (%0, %%"
REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
REG_a
")
\n\t
"
"movq %%mm4, (%0, %%"
FF_REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
FF_REG_a
")
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"js 1b
\n\t
"
::
"r"
(
block
+
nCoeffs
),
"r"
(
quant_matrix
+
nCoeffs
),
"rm"
(
qscale
),
"g"
(
-
2
*
nCoeffs
)
:
"%"
REG_a
,
"memory"
:
"%"
FF_
REG_a
,
"memory"
);
block
[
0
]
=
block0
;
}
...
...
@@ -251,13 +251,13 @@ __asm__ volatile(
"movd %2, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
FF_REG_a
"), %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
// q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5
\n\t
"
// q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2
\n\t
"
...
...
@@ -276,8 +276,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1
\n\t
"
// (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4
\n\t
"
"pxor %%mm5, %%mm5
\n\t
"
// FIXME slow
"pcmpeqw (%0, %%"
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw (%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
FF_
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"psraw $4, %%mm0
\n\t
"
"psraw $4, %%mm1
\n\t
"
"psubw %%mm7, %%mm0
\n\t
"
...
...
@@ -290,13 +290,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1
\n\t
"
"pandn %%mm0, %%mm4
\n\t
"
"pandn %%mm1, %%mm5
\n\t
"
"movq %%mm4, (%0, %%"
REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
REG_a
")
\n\t
"
"movq %%mm4, (%0, %%"
FF_REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
FF_REG_a
")
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"js 1b
\n\t
"
::
"r"
(
block
+
nCoeffs
),
"r"
(
quant_matrix
+
nCoeffs
),
"rm"
(
qscale
),
"g"
(
-
2
*
nCoeffs
)
:
"%"
REG_a
,
"memory"
:
"%"
FF_
REG_a
,
"memory"
);
}
...
...
@@ -326,13 +326,13 @@ __asm__ volatile(
"movd %2, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
FF_REG_a
"), %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
// q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5
\n\t
"
// q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2
\n\t
"
...
...
@@ -347,8 +347,8 @@ __asm__ volatile(
"pmullw %%mm5, %%mm1
\n\t
"
// abs(block[i])*q
"pxor %%mm4, %%mm4
\n\t
"
"pxor %%mm5, %%mm5
\n\t
"
// FIXME slow
"pcmpeqw (%0, %%"
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw (%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
FF_
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"psraw $4, %%mm0
\n\t
"
"psraw $4, %%mm1
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
...
...
@@ -357,13 +357,13 @@ __asm__ volatile(
"psubw %%mm3, %%mm1
\n\t
"
"pandn %%mm0, %%mm4
\n\t
"
"pandn %%mm1, %%mm5
\n\t
"
"movq %%mm4, (%0, %%"
REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
REG_a
")
\n\t
"
"movq %%mm4, (%0, %%"
FF_REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
FF_REG_a
")
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"jng 1b
\n\t
"
::
"r"
(
block
+
nCoeffs
),
"r"
(
quant_matrix
+
nCoeffs
),
"rm"
(
qscale
),
"g"
(
-
2
*
nCoeffs
)
:
"%"
REG_a
,
"memory"
:
"%"
FF_
REG_a
,
"memory"
);
block
[
0
]
=
block0
;
//Note, we do not do mismatch control for intra as errors cannot accumulate
...
...
@@ -390,13 +390,13 @@ __asm__ volatile(
"movd %2, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"packssdw %%mm6, %%mm6
\n\t
"
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq 8(%0, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm4
\n\t
"
"movq 8(%1, %%"
FF_REG_a
"), %%mm5
\n\t
"
"pmullw %%mm6, %%mm4
\n\t
"
// q=qscale*quant_matrix[i]
"pmullw %%mm6, %%mm5
\n\t
"
// q=qscale*quant_matrix[i]
"pxor %%mm2, %%mm2
\n\t
"
...
...
@@ -415,8 +415,8 @@ __asm__ volatile(
"paddw %%mm5, %%mm1
\n\t
"
// (abs(block[i])*2 + 1)*q
"pxor %%mm4, %%mm4
\n\t
"
"pxor %%mm5, %%mm5
\n\t
"
// FIXME slow
"pcmpeqw (%0, %%"
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw (%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
// block[i] == 0 ? -1 : 0
"pcmpeqw 8(%0, %%"
FF_
REG_a
"), %%mm5
\n\t
"
// block[i] == 0 ? -1 : 0
"psrlw $5, %%mm0
\n\t
"
"psrlw $5, %%mm1
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
...
...
@@ -427,10 +427,10 @@ __asm__ volatile(
"pandn %%mm1, %%mm5
\n\t
"
"pxor %%mm4, %%mm7
\n\t
"
"pxor %%mm5, %%mm7
\n\t
"
"movq %%mm4, (%0, %%"
REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
REG_a
")
\n\t
"
"movq %%mm4, (%0, %%"
FF_REG_a
")
\n\t
"
"movq %%mm5, 8(%0, %%"
FF_REG_a
")
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"jng 1b
\n\t
"
"movd 124(%0, %3), %%mm0
\n\t
"
"movq %%mm7, %%mm6
\n\t
"
...
...
@@ -445,7 +445,7 @@ __asm__ volatile(
"movd %%mm0, 124(%0, %3)
\n\t
"
::
"r"
(
block
+
nCoeffs
),
"r"
(
quant_matrix
+
nCoeffs
),
"rm"
(
qscale
),
"r"
(
-
2
*
nCoeffs
)
:
"%"
REG_a
,
"memory"
:
"%"
FF_
REG_a
,
"memory"
);
}
...
...
libavcodec/x86/mpegvideoenc_template.c
View file @
9eb3da2f
...
...
@@ -150,32 +150,32 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
if
((
s
->
out_format
==
FMT_H263
||
s
->
out_format
==
FMT_H261
)
&&
s
->
mpeg_quant
==
0
){
__asm__
volatile
(
"movd %%"
REG_a
", "
MM
"3
\n\t
"
// last_non_zero_p1
"movd %%"
FF_REG_a
", "
MM
"3
\n\t
"
// last_non_zero_p1
SPREADW
(
MM
"3"
)
"pxor "
MM
"7, "
MM
"7
\n\t
"
// 0
"pxor "
MM
"4, "
MM
"4
\n\t
"
// 0
MOVQ
" (%2), "
MM
"5
\n\t
"
// qmat[0]
"pxor "
MM
"6, "
MM
"6
\n\t
"
"psubw (%3), "
MM
"6
\n\t
"
// -bias[0]
"mov $-128, %%"
REG_a
"
\n\t
"
"mov $-128, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
MOVQ
" (%1, %%"
REG_a
"), "
MM
"0
\n\t
"
// block[i]
MOVQ
" (%1, %%"
FF_REG_a
"), "
MM
"0
\n\t
"
// block[i]
SAVE_SIGN
(
MM
"1"
,
MM
"0"
)
// ABS(block[i])
"psubusw "
MM
"6, "
MM
"0
\n\t
"
// ABS(block[i]) + bias[0]
"pmulhw "
MM
"5, "
MM
"0
\n\t
"
// (ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16
"por "
MM
"0, "
MM
"4
\n\t
"
RESTORE_SIGN
(
MM
"1"
,
MM
"0"
)
// out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ
" "
MM
"0, (%5, %%"
REG_a
")
\n\t
"
MOVQ
" "
MM
"0, (%5, %%"
FF_REG_a
")
\n\t
"
"pcmpeqw "
MM
"7, "
MM
"0
\n\t
"
// out==0 ? 0xFF : 0x00
MOVQ
" (%4, %%"
REG_a
"), "
MM
"1
\n\t
"
MOVQ
" "
MM
"7, (%1, %%"
REG_a
")
\n\t
"
// 0
MOVQ
" (%4, %%"
FF_REG_a
"), "
MM
"1
\n\t
"
MOVQ
" "
MM
"7, (%1, %%"
FF_REG_a
")
\n\t
"
// 0
"pandn "
MM
"1, "
MM
"0
\n\t
"
PMAXW
(
MM
"0"
,
MM
"3"
)
"add $"
MMREG_WIDTH
", %%"
REG_a
"
\n\t
"
"add $"
MMREG_WIDTH
", %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
PMAX
(
MM
"3"
,
MM
"0"
)
"movd "
MM
"3, %%"
REG_a
"
\n\t
"
"movd "
MM
"3, %%"
FF_REG_a
"
\n\t
"
"movzbl %%al, %%eax
\n\t
"
// last_non_zero_p1
:
"+a"
(
last_non_zero_p1
)
:
"r"
(
block
+
64
),
"r"
(
qmat
),
"r"
(
bias
),
...
...
@@ -185,31 +185,31 @@ static int RENAME(dct_quantize)(MpegEncContext *s,
);
}
else
{
// FMT_H263
__asm__
volatile
(
"movd %%"
REG_a
", "
MM
"3
\n\t
"
// last_non_zero_p1
"movd %%"
FF_REG_a
", "
MM
"3
\n\t
"
// last_non_zero_p1
SPREADW
(
MM
"3"
)
"pxor "
MM
"7, "
MM
"7
\n\t
"
// 0
"pxor "
MM
"4, "
MM
"4
\n\t
"
// 0
"mov $-128, %%"
REG_a
"
\n\t
"
"mov $-128, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
MOVQ
" (%1, %%"
REG_a
"), "
MM
"0
\n\t
"
// block[i]
MOVQ
" (%1, %%"
FF_REG_a
"), "
MM
"0
\n\t
"
// block[i]
SAVE_SIGN
(
MM
"1"
,
MM
"0"
)
// ABS(block[i])
MOVQ
" (%3, %%"
REG_a
"), "
MM
"6
\n\t
"
// bias[0]
MOVQ
" (%3, %%"
FF_REG_a
"), "
MM
"6
\n\t
"
// bias[0]
"paddusw "
MM
"6, "
MM
"0
\n\t
"
// ABS(block[i]) + bias[0]
MOVQ
" (%2, %%"
REG_a
"), "
MM
"5
\n\t
"
// qmat[i]
MOVQ
" (%2, %%"
FF_REG_a
"), "
MM
"5
\n\t
"
// qmat[i]
"pmulhw "
MM
"5, "
MM
"0
\n\t
"
// (ABS(block[i])*qmat[0] + bias[0]*qmat[0])>>16
"por "
MM
"0, "
MM
"4
\n\t
"
RESTORE_SIGN
(
MM
"1"
,
MM
"0"
)
// out=((ABS(block[i])*qmat[0] - bias[0]*qmat[0])>>16)*sign(block[i])
MOVQ
" "
MM
"0, (%5, %%"
REG_a
")
\n\t
"
MOVQ
" "
MM
"0, (%5, %%"
FF_REG_a
")
\n\t
"
"pcmpeqw "
MM
"7, "
MM
"0
\n\t
"
// out==0 ? 0xFF : 0x00
MOVQ
" (%4, %%"
REG_a
"), "
MM
"1
\n\t
"
MOVQ
" "
MM
"7, (%1, %%"
REG_a
")
\n\t
"
// 0
MOVQ
" (%4, %%"
FF_REG_a
"), "
MM
"1
\n\t
"
MOVQ
" "
MM
"7, (%1, %%"
FF_REG_a
")
\n\t
"
// 0
"pandn "
MM
"1, "
MM
"0
\n\t
"
PMAXW
(
MM
"0"
,
MM
"3"
)
"add $"
MMREG_WIDTH
", %%"
REG_a
"
\n\t
"
"add $"
MMREG_WIDTH
", %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
PMAX
(
MM
"3"
,
MM
"0"
)
"movd "
MM
"3, %%"
REG_a
"
\n\t
"
"movd "
MM
"3, %%"
FF_REG_a
"
\n\t
"
"movzbl %%al, %%eax
\n\t
"
// last_non_zero_p1
:
"+a"
(
last_non_zero_p1
)
:
"r"
(
block
+
64
),
"r"
(
qmat
+
64
),
"r"
(
bias
+
64
),
...
...
libavcodec/x86/rnd_template.c
View file @
9eb3da2f
...
...
@@ -46,12 +46,12 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddusw %%mm0, %%mm4
\n\t
"
"paddusw %%mm1, %%mm5
\n\t
"
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"add %3, %1
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
...
...
@@ -67,11 +67,11 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"psrlw $2, %%mm4
\n\t
"
"psrlw $2, %%mm5
\n\t
"
"packuswb %%mm5, %%mm4
\n\t
"
"movq %%mm4, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq %%mm4, (%2, %%"
FF_
REG_a
")
\n\t
"
"add %3, %%"
FF_
REG_a
"
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -87,14 +87,14 @@ av_unused STATIC void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq %%mm0, (%2, %%"
FF_
REG_a
")
\n\t
"
"add %3, %%"
FF_REG_a
"
\n\t
"
"subl $2, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
)
:
"D"
(
block
),
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
// avg_pixels
...
...
@@ -115,12 +115,12 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddusw %%mm0, %%mm4
\n\t
"
"paddusw %%mm1, %%mm5
\n\t
"
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"add %3, %1
\n\t
"
".p2align 3
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
...
...
@@ -135,16 +135,16 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"paddusw %%mm1, %%mm5
\n\t
"
"psrlw $2, %%mm4
\n\t
"
"psrlw $2, %%mm5
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
FF_
REG_a
"), %%mm3
\n\t
"
"packuswb %%mm5, %%mm4
\n\t
"
"pcmpeqd %%mm2, %%mm2
\n\t
"
"paddb %%mm2, %%mm2
\n\t
"
PAVGB_MMX
(
%%
mm3
,
%%
mm4
,
%%
mm5
,
%%
mm2
)
"movq %%mm5, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq %%mm5, (%2, %%"
FF_
REG_a
")
\n\t
"
"add %3, %%"
FF_REG_a
"
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// 0 <-> 2 1 <-> 3
"movq 1(%1, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -159,17 +159,17 @@ av_unused STATIC void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixel
"paddusw %%mm5, %%mm1
\n\t
"
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
FF_
REG_a
"), %%mm3
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
"pcmpeqd %%mm2, %%mm2
\n\t
"
"paddb %%mm2, %%mm2
\n\t
"
PAVGB_MMX
(
%%
mm3
,
%%
mm0
,
%%
mm1
,
%%
mm2
)
"movq %%mm1, (%2, %%"
REG_a
")
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"movq %%mm1, (%2, %%"
FF_
REG_a
")
\n\t
"
"add %3, %%"
FF_
REG_a
"
\n\t
"
"subl $2, %0
\n\t
"
"jnz 1b
\n\t
"
:
"+g"
(
h
),
"+S"
(
pixels
)
:
"D"
(
block
),
"r"
((
x86_reg
)
line_size
)
:
REG_a
,
"memory"
);
:
FF_
REG_a
,
"memory"
);
}
libavcodec/x86/snowdsp.c
View file @
9eb3da2f
...
...
@@ -390,10 +390,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w
#if HAVE_7REGS
#define snow_vertical_compose_sse2_load_add(op,r,t0,t1,t2,t3)\
""op" ("r",%%"REG_d"), %%"t0" \n\t"\
""op" 16("r",%%"REG_d"), %%"t1" \n\t"\
""op" 32("r",%%"REG_d"), %%"t2" \n\t"\
""op" 48("r",%%"REG_d"), %%"t3" \n\t"
""op" ("r",%%"
FF_
REG_d"), %%"t0" \n\t"\
""op" 16("r",%%"
FF_
REG_d"), %%"t1" \n\t"\
""op" 32("r",%%"
FF_
REG_d"), %%"t2" \n\t"\
""op" 48("r",%%"
FF_
REG_d"), %%"t3" \n\t"
#define snow_vertical_compose_sse2_load(r,t0,t1,t2,t3)\
snow_vertical_compose_sse2_load_add("movdqa",r,t0,t1,t2,t3)
...
...
@@ -408,10 +408,10 @@ static void ff_snow_horizontal_compose97i_mmx(IDWTELEM *b, IDWTELEM *temp, int w
"psubw %%"s3", %%"t3" \n\t"
#define snow_vertical_compose_sse2_store(w,s0,s1,s2,s3)\
"movdqa %%"s0", ("w",%%"
REG_d")
\n\t"\
"movdqa %%"s1", 16("w",%%"
REG_d")
\n\t"\
"movdqa %%"s2", 32("w",%%"
REG_d")
\n\t"\
"movdqa %%"s3", 48("w",%%"
REG_d")
\n\t"
"movdqa %%"s0", ("w",%%"
FF_REG_d")
\n\t"\
"movdqa %%"s1", 16("w",%%"
FF_REG_d")
\n\t"\
"movdqa %%"s2", 32("w",%%"
FF_REG_d")
\n\t"\
"movdqa %%"s3", 48("w",%%"
FF_REG_d")
\n\t"
#define snow_vertical_compose_sra(n,t0,t1,t2,t3)\
"psraw $"n", %%"t0" \n\t"\
...
...
@@ -477,14 +477,14 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
"psrlw $13, %%xmm5
\n\t
"
"paddw %%xmm7, %%xmm5
\n\t
"
snow_vertical_compose_r2r_add
(
"xmm5"
,
"xmm5"
,
"xmm5"
,
"xmm5"
,
"xmm0"
,
"xmm2"
,
"xmm4"
,
"xmm6"
)
"movq (%2,%%"
REG_d
"), %%xmm1
\n\t
"
"movq 8(%2,%%"
REG_d
"), %%xmm3
\n\t
"
"movq (%2,%%"
FF_REG_d
"), %%xmm1
\n\t
"
"movq 8(%2,%%"
FF_REG_d
"), %%xmm3
\n\t
"
"paddw %%xmm7, %%xmm1
\n\t
"
"paddw %%xmm7, %%xmm3
\n\t
"
"pavgw %%xmm1, %%xmm0
\n\t
"
"pavgw %%xmm3, %%xmm2
\n\t
"
"movq 16(%2,%%"
REG_d
"), %%xmm1
\n\t
"
"movq 24(%2,%%"
REG_d
"), %%xmm3
\n\t
"
"movq 16(%2,%%"
FF_REG_d
"), %%xmm1
\n\t
"
"movq 24(%2,%%"
FF_REG_d
"), %%xmm3
\n\t
"
"paddw %%xmm7, %%xmm1
\n\t
"
"paddw %%xmm7, %%xmm3
\n\t
"
"pavgw %%xmm1, %%xmm4
\n\t
"
...
...
@@ -504,17 +504,17 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
snow_vertical_compose_sse2_store
(
"%2"
,
"xmm0"
,
"xmm2"
,
"xmm4"
,
"xmm6"
)
"2:
\n\t
"
"sub $64, %%"
REG_d
"
\n\t
"
"sub $64, %%"
FF_REG_d
"
\n\t
"
"jge 1b
\n\t
"
:
"+d"
(
i
)
:
"r"
(
b0
),
"r"
(
b1
),
"r"
(
b2
),
"r"
(
b3
),
"r"
(
b4
),
"r"
(
b5
));
}
#define snow_vertical_compose_mmx_load_add(op,r,t0,t1,t2,t3)\
""op" ("r",%%"REG_d"), %%"t0" \n\t"\
""op" 8("r",%%"REG_d"), %%"t1" \n\t"\
""op" 16("r",%%"REG_d"), %%"t2" \n\t"\
""op" 24("r",%%"REG_d"), %%"t3" \n\t"
""op" ("r",%%"
FF_
REG_d"), %%"t0" \n\t"\
""op" 8("r",%%"
FF_
REG_d"), %%"t1" \n\t"\
""op" 16("r",%%"
FF_
REG_d"), %%"t2" \n\t"\
""op" 24("r",%%"
FF_
REG_d"), %%"t3" \n\t"
#define snow_vertical_compose_mmx_load(r,t0,t1,t2,t3)\
snow_vertical_compose_mmx_load_add("movq",r,t0,t1,t2,t3)
...
...
@@ -523,10 +523,10 @@ static void ff_snow_vertical_compose97i_sse2(IDWTELEM *b0, IDWTELEM *b1, IDWTELE
snow_vertical_compose_mmx_load_add("paddw",r,t0,t1,t2,t3)
#define snow_vertical_compose_mmx_store(w,s0,s1,s2,s3)\
"movq %%"s0", ("w",%%"REG_d") \n\t"\
"movq %%"s1", 8("w",%%"REG_d") \n\t"\
"movq %%"s2", 16("w",%%"REG_d") \n\t"\
"movq %%"s3", 24("w",%%"REG_d") \n\t"
"movq %%"s0", ("w",%%"
FF_
REG_d") \n\t"\
"movq %%"s1", 8("w",%%"
FF_
REG_d") \n\t"\
"movq %%"s2", 16("w",%%"
FF_
REG_d") \n\t"\
"movq %%"s3", 24("w",%%"
FF_
REG_d") \n\t"
#define snow_vertical_compose_mmx_move(s0,s1,s2,s3,t0,t1,t2,t3)\
"movq %%"s0", %%"t0" \n\t"\
...
...
@@ -571,14 +571,14 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
"psrlw $13, %%mm5
\n\t
"
"paddw %%mm7, %%mm5
\n\t
"
snow_vertical_compose_r2r_add
(
"mm5"
,
"mm5"
,
"mm5"
,
"mm5"
,
"mm0"
,
"mm2"
,
"mm4"
,
"mm6"
)
"movq (%2,%%"
REG_d
"), %%mm1
\n\t
"
"movq 8(%2,%%"
REG_d
"), %%mm3
\n\t
"
"movq (%2,%%"
FF_REG_d
"), %%mm1
\n\t
"
"movq 8(%2,%%"
FF_REG_d
"), %%mm3
\n\t
"
"paddw %%mm7, %%mm1
\n\t
"
"paddw %%mm7, %%mm3
\n\t
"
"pavgw %%mm1, %%mm0
\n\t
"
"pavgw %%mm3, %%mm2
\n\t
"
"movq 16(%2,%%"
REG_d
"), %%mm1
\n\t
"
"movq 24(%2,%%"
REG_d
"), %%mm3
\n\t
"
"movq 16(%2,%%"
FF_REG_d
"), %%mm1
\n\t
"
"movq 24(%2,%%"
FF_REG_d
"), %%mm3
\n\t
"
"paddw %%mm7, %%mm1
\n\t
"
"paddw %%mm7, %%mm3
\n\t
"
"pavgw %%mm1, %%mm4
\n\t
"
...
...
@@ -598,7 +598,7 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
snow_vertical_compose_mmx_store
(
"%2"
,
"mm0"
,
"mm2"
,
"mm4"
,
"mm6"
)
"2:
\n\t
"
"sub $32, %%"
REG_d
"
\n\t
"
"sub $32, %%"
FF_REG_d
"
\n\t
"
"jge 1b
\n\t
"
:
"+d"
(
i
)
:
"r"
(
b0
),
"r"
(
b1
),
"r"
(
b2
),
"r"
(
b3
),
"r"
(
b4
),
"r"
(
b5
));
...
...
@@ -610,39 +610,39 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\
__asm__ volatile(\
"mov %7, %%"
REG_c"
\n\t"\
"mov %7, %%"
FF_REG_c"
\n\t"\
"mov %6, %2 \n\t"\
"mov %4, %%"
REG_S"
\n\t"\
"mov %4, %%"
FF_REG_S"
\n\t"\
"pxor %%xmm7, %%xmm7 \n\t"
/* 0 */
\
"pcmpeqd %%xmm3, %%xmm3 \n\t"\
"psllw $15, %%xmm3 \n\t"\
"psrlw $12, %%xmm3 \n\t"
/* FRAC_BITS >> 1 */
\
"1: \n\t"\
"mov %1, %%"
REG_D"
\n\t"\
"mov (%%"
REG_D"), %%"REG_D"
\n\t"\
"add %3, %%"
REG_D"
\n\t"
"mov %1, %%"
FF_REG_D"
\n\t"\
"mov (%%"
FF_REG_D"), %%"FF_REG_D"
\n\t"\
"add %3, %%"
FF_REG_D"
\n\t"
#define snow_inner_add_yblock_sse2_start_8(out_reg1, out_reg2, ptr_offset, s_offset)\
"mov "
PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"
REG_d"; \n\t"\
"movq (%%"
REG_d"), %%"out_reg1"
\n\t"\
"movq (%%"
REG_d", %%"REG_c"), %%"out_reg2"
\n\t"\
"mov "
FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_
REG_d"; \n\t"\
"movq (%%"
FF_REG_d"), %%"out_reg1"
\n\t"\
"movq (%%"
FF_REG_d", %%"FF_REG_c"), %%"out_reg2"
\n\t"\
"punpcklbw %%xmm7, %%"out_reg1" \n\t"\
"punpcklbw %%xmm7, %%"out_reg2" \n\t"\
"movq "s_offset"(%%"
REG_S"), %%xmm0
\n\t"\
"movq "s_offset"+16(%%"REG_S"), %%xmm4 \n\t"\
"movq "s_offset"(%%"
FF_REG_S"), %%xmm0
\n\t"\
"movq "s_offset"+16(%%"
FF_
REG_S"), %%xmm4 \n\t"\
"punpcklbw %%xmm7, %%xmm0 \n\t"\
"punpcklbw %%xmm7, %%xmm4 \n\t"\
"pmullw %%xmm0, %%"out_reg1" \n\t"\
"pmullw %%xmm4, %%"out_reg2" \n\t"
#define snow_inner_add_yblock_sse2_start_16(out_reg1, out_reg2, ptr_offset, s_offset)\
"mov "
PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"
REG_d"; \n\t"\
"movq (%%"
REG_d"), %%"out_reg1"
\n\t"\
"movq 8(%%"
REG_d"), %%"out_reg2"
\n\t"\
"mov "
FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_
REG_d"; \n\t"\
"movq (%%"
FF_REG_d"), %%"out_reg1"
\n\t"\
"movq 8(%%"
FF_REG_d"), %%"out_reg2"
\n\t"\
"punpcklbw %%xmm7, %%"out_reg1" \n\t"\
"punpcklbw %%xmm7, %%"out_reg2" \n\t"\
"movq "s_offset"(%%"
REG_S"), %%xmm0
\n\t"\
"movq "s_offset"+8(%%"REG_S"), %%xmm4 \n\t"\
"movq "s_offset"(%%"
FF_REG_S"), %%xmm0
\n\t"\
"movq "s_offset"+8(%%"
FF_
REG_S"), %%xmm4 \n\t"\
"punpcklbw %%xmm7, %%xmm0 \n\t"\
"punpcklbw %%xmm7, %%xmm4 \n\t"\
"pmullw %%xmm0, %%"out_reg1" \n\t"\
...
...
@@ -659,12 +659,12 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
"paddusw %%xmm6, %%xmm5 \n\t"
#define snow_inner_add_yblock_sse2_end_common1\
"add $32, %%"
REG_S"
\n\t"\
"add %%"
REG_c", %0
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*3(%%"REG_a");
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*2(%%"REG_a");
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*1(%%"REG_a");
\n\t"\
"add %%"
REG_c", (%%"REG_a")
\n\t"
"add $32, %%"
FF_REG_S"
\n\t"\
"add %%"
FF_REG_c", %0
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", (%%"FF_REG_a")
\n\t"
#define snow_inner_add_yblock_sse2_end_common2\
"jnz 1b \n\t"\
...
...
@@ -672,18 +672,18 @@ static void ff_snow_vertical_compose97i_mmx(IDWTELEM *b0, IDWTELEM *b1, IDWTELEM
:\
"rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm4", "%xmm5", "%xmm6", "%xmm7", )\
"%"
REG_c"","%"REG_S"","%"REG_D"","%"
REG_d"");
"%"
FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_
REG_d"");
#define snow_inner_add_yblock_sse2_end_8\
"sal $1, %%"
REG_c"
\n\t"\
"add"
OPSIZE" $"PTR_SIZE"*2, %1
\n\t"\
"sal $1, %%"
FF_REG_c"
\n\t"\
"add"
FF_OPSIZE" $"FF_PTR_SIZE"*2, %1
\n\t"\
snow_inner_add_yblock_sse2_end_common1\
"sar $1, %%"
REG_c"
\n\t"\
"sar $1, %%"
FF_REG_c"
\n\t"\
"sub $2, %2 \n\t"\
snow_inner_add_yblock_sse2_end_common2
#define snow_inner_add_yblock_sse2_end_16\
"add"
OPSIZE" $"PTR_SIZE"*1, %1
\n\t"\
"add"
FF_OPSIZE" $"FF_PTR_SIZE"*1, %1
\n\t"\
snow_inner_add_yblock_sse2_end_common1\
"dec %2 \n\t"\
snow_inner_add_yblock_sse2_end_common2
...
...
@@ -696,28 +696,28 @@ snow_inner_add_yblock_sse2_accum_8("2", "8")
snow_inner_add_yblock_sse2_accum_8
(
"1"
,
"128"
)
snow_inner_add_yblock_sse2_accum_8
(
"0"
,
"136"
)
"mov %0, %%"
REG_d
"
\n\t
"
"movdqa (%%"
REG_D
"), %%xmm0
\n\t
"
"mov %0, %%"
FF_REG_d
"
\n\t
"
"movdqa (%%"
FF_REG_D
"), %%xmm0
\n\t
"
"movdqa %%xmm1, %%xmm2
\n\t
"
"punpckhwd %%xmm7, %%xmm1
\n\t
"
"punpcklwd %%xmm7, %%xmm2
\n\t
"
"paddd %%xmm2, %%xmm0
\n\t
"
"movdqa 16(%%"
REG_D
"), %%xmm2
\n\t
"
"movdqa 16(%%"
FF_REG_D
"), %%xmm2
\n\t
"
"paddd %%xmm1, %%xmm2
\n\t
"
"paddd %%xmm3, %%xmm0
\n\t
"
"paddd %%xmm3, %%xmm2
\n\t
"
"mov %1, %%"
REG_D
"
\n\t
"
"mov "
PTR_SIZE
"(%%"
REG_D
"), %%"
REG_D
";
\n\t
"
"add %3, %%"
REG_D
"
\n\t
"
"mov %1, %%"
FF_REG_D
"
\n\t
"
"mov "
FF_PTR_SIZE
"(%%"
FF_REG_D
"), %%"
FF_REG_D
";
\n\t
"
"add %3, %%"
FF_REG_D
"
\n\t
"
"movdqa (%%"
REG_D
"), %%xmm4
\n\t
"
"movdqa (%%"
FF_REG_D
"), %%xmm4
\n\t
"
"movdqa %%xmm5, %%xmm6
\n\t
"
"punpckhwd %%xmm7, %%xmm5
\n\t
"
"punpcklwd %%xmm7, %%xmm6
\n\t
"
"paddd %%xmm6, %%xmm4
\n\t
"
"movdqa 16(%%"
REG_D
"), %%xmm6
\n\t
"
"movdqa 16(%%"
FF_REG_D
"), %%xmm6
\n\t
"
"paddd %%xmm5, %%xmm6
\n\t
"
"paddd %%xmm3, %%xmm4
\n\t
"
"paddd %%xmm3, %%xmm6
\n\t
"
...
...
@@ -726,13 +726,13 @@ snow_inner_add_yblock_sse2_accum_8("0", "136")
"psrad $8, %%xmm2
\n\t
"
/* FRAC_BITS. */
"packssdw %%xmm2, %%xmm0
\n\t
"
"packuswb %%xmm7, %%xmm0
\n\t
"
"movq %%xmm0, (%%"
REG_d
")
\n\t
"
"movq %%xmm0, (%%"
FF_REG_d
")
\n\t
"
"psrad $8, %%xmm4
\n\t
"
/* FRAC_BITS. */
"psrad $8, %%xmm6
\n\t
"
/* FRAC_BITS. */
"packssdw %%xmm6, %%xmm4
\n\t
"
"packuswb %%xmm7, %%xmm4
\n\t
"
"movq %%xmm4, (%%"
REG_d
",%%"
REG_c
");
\n\t
"
"movq %%xmm4, (%%"
FF_REG_d
",%%"
FF_REG_c
");
\n\t
"
snow_inner_add_yblock_sse2_end_8
}
...
...
@@ -744,18 +744,18 @@ snow_inner_add_yblock_sse2_accum_16("2", "16")
snow_inner_add_yblock_sse2_accum_16
(
"1"
,
"512"
)
snow_inner_add_yblock_sse2_accum_16
(
"0"
,
"528"
)
"mov %0, %%"
REG_d
"
\n\t
"
"mov %0, %%"
FF_REG_d
"
\n\t
"
"psrlw $4, %%xmm1
\n\t
"
"psrlw $4, %%xmm5
\n\t
"
"paddw (%%"
REG_D
"), %%xmm1
\n\t
"
"paddw 16(%%"
REG_D
"), %%xmm5
\n\t
"
"paddw (%%"
FF_REG_D
"), %%xmm1
\n\t
"
"paddw 16(%%"
FF_REG_D
"), %%xmm5
\n\t
"
"paddw %%xmm3, %%xmm1
\n\t
"
"paddw %%xmm3, %%xmm5
\n\t
"
"psraw $4, %%xmm1
\n\t
"
/* FRAC_BITS. */
"psraw $4, %%xmm5
\n\t
"
/* FRAC_BITS. */
"packuswb %%xmm5, %%xmm1
\n\t
"
"movdqu %%xmm1, (%%"
REG_d
")
\n\t
"
"movdqu %%xmm1, (%%"
FF_REG_d
")
\n\t
"
snow_inner_add_yblock_sse2_end_16
}
...
...
@@ -764,26 +764,26 @@ snow_inner_add_yblock_sse2_end_16
IDWTELEM * * dst_array = sb->line + src_y;\
x86_reg tmp;\
__asm__ volatile(\
"mov %7, %%"
REG_c"
\n\t"\
"mov %7, %%"
FF_REG_c"
\n\t"\
"mov %6, %2 \n\t"\
"mov %4, %%"
REG_S"
\n\t"\
"mov %4, %%"
FF_REG_S"
\n\t"\
"pxor %%mm7, %%mm7 \n\t"
/* 0 */
\
"pcmpeqd %%mm3, %%mm3 \n\t"\
"psllw $15, %%mm3 \n\t"\
"psrlw $12, %%mm3 \n\t"
/* FRAC_BITS >> 1 */
\
"1: \n\t"\
"mov %1, %%"
REG_D"
\n\t"\
"mov (%%"
REG_D"), %%"REG_D"
\n\t"\
"add %3, %%"
REG_D"
\n\t"
"mov %1, %%"
FF_REG_D"
\n\t"\
"mov (%%"
FF_REG_D"), %%"FF_REG_D"
\n\t"\
"add %3, %%"
FF_REG_D"
\n\t"
#define snow_inner_add_yblock_mmx_start(out_reg1, out_reg2, ptr_offset, s_offset, d_offset)\
"mov "
PTR_SIZE"*"ptr_offset"(%%"REG_a"), %%"
REG_d"; \n\t"\
"movd "d_offset"(%%"
REG_d"), %%"out_reg1"
\n\t"\
"movd "d_offset"+4(%%"
REG_d"), %%"out_reg2"
\n\t"\
"mov "
FF_PTR_SIZE"*"ptr_offset"(%%"FF_REG_a"), %%"FF_
REG_d"; \n\t"\
"movd "d_offset"(%%"
FF_REG_d"), %%"out_reg1"
\n\t"\
"movd "d_offset"+4(%%"
FF_REG_d"), %%"out_reg2"
\n\t"\
"punpcklbw %%mm7, %%"out_reg1" \n\t"\
"punpcklbw %%mm7, %%"out_reg2" \n\t"\
"movd "s_offset"(%%"
REG_S"), %%mm0
\n\t"\
"movd "s_offset"+4(%%"REG_S"), %%mm4 \n\t"\
"movd "s_offset"(%%"
FF_REG_S"), %%mm0
\n\t"\
"movd "s_offset"+4(%%"
FF_
REG_S"), %%mm4 \n\t"\
"punpcklbw %%mm7, %%mm0 \n\t"\
"punpcklbw %%mm7, %%mm4 \n\t"\
"pmullw %%mm0, %%"out_reg1" \n\t"\
...
...
@@ -795,32 +795,32 @@ snow_inner_add_yblock_sse2_end_16
"paddusw %%mm6, %%mm5 \n\t"
#define snow_inner_add_yblock_mmx_mix(read_offset, write_offset)\
"mov %0, %%"
REG_d"
\n\t"\
"mov %0, %%"
FF_REG_d"
\n\t"\
"psrlw $4, %%mm1 \n\t"\
"psrlw $4, %%mm5 \n\t"\
"paddw "read_offset"(%%"
REG_D"), %%mm1
\n\t"\
"paddw "read_offset"+8(%%"REG_D"), %%mm5 \n\t"\
"paddw "read_offset"(%%"
FF_REG_D"), %%mm1
\n\t"\
"paddw "read_offset"+8(%%"
FF_
REG_D"), %%mm5 \n\t"\
"paddw %%mm3, %%mm1 \n\t"\
"paddw %%mm3, %%mm5 \n\t"\
"psraw $4, %%mm1 \n\t"\
"psraw $4, %%mm5 \n\t"\
"packuswb %%mm5, %%mm1 \n\t"\
"movq %%mm1, "write_offset"(%%"REG_d") \n\t"
"movq %%mm1, "write_offset"(%%"
FF_
REG_d") \n\t"
#define snow_inner_add_yblock_mmx_end(s_step)\
"add $"s_step", %%"
REG_S"
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*3(%%"REG_a");
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*2(%%"REG_a");
\n\t"\
"add %%"
REG_c", "PTR_SIZE"*1(%%"REG_a");
\n\t"\
"add %%"
REG_c", (%%"REG_a")
\n\t"\
"add"
OPSIZE " $"PTR_SIZE"*1, %1
\n\t"\
"add %%"
REG_c", %0
\n\t"\
"add $"s_step", %%"
FF_REG_S"
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*3(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*2(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", "FF_PTR_SIZE"*1(%%"FF_REG_a");
\n\t"\
"add %%"
FF_REG_c", (%%"FF_REG_a")
\n\t"\
"add"
FF_OPSIZE " $"FF_PTR_SIZE"*1, %1
\n\t"\
"add %%"
FF_REG_c", %0
\n\t"\
"dec %2 \n\t"\
"jnz 1b \n\t"\
:"+m"(dst8),"+m"(dst_array),"=&r"(tmp)\
:\
"rm"((x86_reg)(src_x<<1)),"m"(obmc),"a"(block),"m"(b_h),"m"(src_stride):\
"%"
REG_c"","%"REG_S"","%"REG_D"","%"
REG_d"");
"%"
FF_REG_c"","%"FF_REG_S"","%"FF_REG_D"","%"FF_
REG_d"");
static
void
inner_add_yblock_bw_8_obmc_16_mmx
(
const
uint8_t
*
obmc
,
const
x86_reg
obmc_stride
,
uint8_t
*
*
block
,
int
b_w
,
x86_reg
b_h
,
int
src_x
,
int
src_y
,
x86_reg
src_stride
,
slice_buffer
*
sb
,
int
add
,
uint8_t
*
dst8
){
...
...
libavcodec/x86/vc1dsp_mmx.c
View file @
9eb3da2f
...
...
@@ -84,7 +84,7 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
{\
rnd = 8-rnd;\
__asm__ volatile(\
"mov $8, %%"
REG_c"
\n\t"\
"mov $8, %%"
FF_REG_c"
\n\t"\
LOAD_ROUNDER_MMX("%5")\
"movq "MANGLE(ff_pw_9)", %%mm6\n\t"\
"1: \n\t"\
...
...
@@ -119,13 +119,13 @@ static void OPNAME ## vc1_shift2_mmx(uint8_t *dst, const uint8_t *src,\
"movq %%mm3, (%1) \n\t"\
"add %6, %0 \n\t"\
"add %4, %1 \n\t"\
"dec %%"
REG_c"
\n\t"\
"dec %%"
FF_REG_c"
\n\t"\
"jnz 1b \n\t"\
: "+r"(src), "+r"(dst)\
: "r"(offset), "r"(-2*offset), "g"(stride), "m"(rnd),\
"g"(stride-offset)\
NAMED_CONSTRAINTS_ADD(ff_pw_9)\
: "%"REG_c, "memory"\
: "%"
FF_
REG_c, "memory"\
);\
}
...
...
libavfilter/x86/vf_noise.c
View file @
9eb3da2f
...
...
@@ -32,22 +32,22 @@ static void line_noise_mmx(uint8_t *dst, const uint8_t *src,
noise
+=
shift
;
__asm__
volatile
(
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
"pcmpeqb %%mm7, %%mm7
\n\t
"
"psllw $15, %%mm7
\n\t
"
"packsswb %%mm7, %%mm7
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm1
\n\t
"
"pxor %%mm7, %%mm0
\n\t
"
"paddsb %%mm1, %%mm0
\n\t
"
"pxor %%mm7, %%mm0
\n\t
"
"movq %%mm0, (%2, %%"
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"movq %%mm0, (%2, %%"
FF_REG_a
")
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
::
"r"
(
src
+
mmx_len
),
"r"
(
noise
+
mmx_len
),
"r"
(
dst
+
mmx_len
),
"g"
(
-
mmx_len
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
(
mmx_len
!=
len
)
ff_line_noise_c
(
dst
+
mmx_len
,
src
+
mmx_len
,
noise
+
mmx_len
,
len
-
mmx_len
,
0
);
...
...
@@ -60,13 +60,13 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
x86_reg
mmx_len
=
len
&
(
~
7
);
__asm__
volatile
(
"mov %5, %%"
REG_a
"
\n\t
"
"mov %5, %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"paddb (%2, %%"
REG_a
"), %%mm1
\n\t
"
"paddb (%3, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"paddb (%2, %%"
FF_REG_a
"), %%mm1
\n\t
"
"paddb (%3, %%"
FF_REG_a
"), %%mm1
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
"movq %%mm1, %%mm3
\n\t
"
"punpcklbw %%mm0, %%mm0
\n\t
"
...
...
@@ -82,12 +82,12 @@ static void line_noise_avg_mmx(uint8_t *dst, const uint8_t *src,
"psrlw $8, %%mm1
\n\t
"
"psrlw $8, %%mm3
\n\t
"
"packuswb %%mm3, %%mm1
\n\t
"
"movq %%mm1, (%4, %%"
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"movq %%mm1, (%4, %%"
FF_REG_a
")
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
::
"r"
(
src
+
mmx_len
),
"r"
(
shift
[
0
]
+
mmx_len
),
"r"
(
shift
[
1
]
+
mmx_len
),
"r"
(
shift
[
2
]
+
mmx_len
),
"r"
(
dst
+
mmx_len
),
"g"
(
-
mmx_len
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
(
mmx_len
!=
len
){
...
...
@@ -104,22 +104,22 @@ static void line_noise_mmxext(uint8_t *dst, const uint8_t *src,
noise
+=
shift
;
__asm__
volatile
(
"mov %3, %%"
REG_a
"
\n\t
"
"mov %3, %%"
FF_REG_a
"
\n\t
"
"pcmpeqb %%mm7, %%mm7
\n\t
"
"psllw $15, %%mm7
\n\t
"
"packsswb %%mm7, %%mm7
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%0, %%"
FF_REG_a
"), %%mm0
\n\t
"
"movq (%1, %%"
FF_REG_a
"), %%mm1
\n\t
"
"pxor %%mm7, %%mm0
\n\t
"
"paddsb %%mm1, %%mm0
\n\t
"
"pxor %%mm7, %%mm0
\n\t
"
"movntq %%mm0, (%2, %%"
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"movntq %%mm0, (%2, %%"
FF_REG_a
")
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
::
"r"
(
src
+
mmx_len
),
"r"
(
noise
+
mmx_len
),
"r"
(
dst
+
mmx_len
),
"g"
(
-
mmx_len
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
(
mmx_len
!=
len
)
ff_line_noise_c
(
dst
+
mmx_len
,
src
+
mmx_len
,
noise
+
mmx_len
,
len
-
mmx_len
,
0
);
...
...
libavutil/x86/asm.h
View file @
9eb3da2f
...
...
@@ -28,46 +28,46 @@ typedef struct xmm_reg { uint64_t a, b; } xmm_reg;
typedef
struct
ymm_reg
{
uint64_t
a
,
b
,
c
,
d
;
}
ymm_reg
;
#if ARCH_X86_64
# define OPSIZE "q"
# define REG_a "rax"
# define REG_b "rbx"
# define REG_c "rcx"
# define REG_d "rdx"
# define REG_D "rdi"
# define REG_S "rsi"
# define PTR_SIZE "8"
# define
FF_
OPSIZE "q"
# define
FF_
REG_a "rax"
# define
FF_
REG_b "rbx"
# define
FF_
REG_c "rcx"
# define
FF_
REG_d "rdx"
# define
FF_
REG_D "rdi"
# define
FF_
REG_S "rsi"
# define
FF_
PTR_SIZE "8"
typedef
int64_t
x86_reg
;
/*
REG_SP is defined in Solaris sys headers, so use
REG_sp */
# define REG_sp "rsp"
# define REG_BP "rbp"
# define REGBP rbp
# define REGa rax
# define REGb rbx
# define REGc rcx
# define REGd rdx
# define REGSP rsp
/*
FF_REG_SP is defined in Solaris sys headers, so use FF_
REG_sp */
# define
FF_
REG_sp "rsp"
# define
FF_
REG_BP "rbp"
# define
FF_
REGBP rbp
# define
FF_
REGa rax
# define
FF_
REGb rbx
# define
FF_
REGc rcx
# define
FF_
REGd rdx
# define
FF_
REGSP rsp
#elif ARCH_X86_32
# define OPSIZE "l"
# define REG_a "eax"
# define REG_b "ebx"
# define REG_c "ecx"
# define REG_d "edx"
# define REG_D "edi"
# define REG_S "esi"
# define PTR_SIZE "4"
# define
FF_
OPSIZE "l"
# define
FF_
REG_a "eax"
# define
FF_
REG_b "ebx"
# define
FF_
REG_c "ecx"
# define
FF_
REG_d "edx"
# define
FF_
REG_D "edi"
# define
FF_
REG_S "esi"
# define
FF_
PTR_SIZE "4"
typedef
int32_t
x86_reg
;
# define REG_sp "esp"
# define REG_BP "ebp"
# define REGBP ebp
# define REGa eax
# define REGb ebx
# define REGc ecx
# define REGd edx
# define REGSP esp
# define
FF_
REG_sp "esp"
# define
FF_
REG_BP "ebp"
# define
FF_
REGBP ebp
# define
FF_
REGa eax
# define
FF_
REGb ebx
# define
FF_
REGc ecx
# define
FF_
REGd edx
# define
FF_
REGSP esp
#else
typedef
int
x86_reg
;
#endif
...
...
libavutil/x86/cpu.c
View file @
9eb3da2f
...
...
@@ -41,9 +41,9 @@
/* ebx saving is necessary for PIC. gcc seems unable to see it alone */
#define cpuid(index, eax, ebx, ecx, edx) \
__asm__ volatile ( \
"mov %%"
REG_b", %%"REG_S" \n\t"
\
"mov %%"
FF_REG_b", %%"FF_REG_S" \n\t"
\
"cpuid \n\t" \
"xchg %%"
REG_b", %%"REG_S
\
"xchg %%"
FF_REG_b", %%"FF_REG_S
\
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx) \
: "0" (index), "2"(0))
...
...
libpostproc/postprocess_template.c
View file @
9eb3da2f
...
...
@@ -118,12 +118,12 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
);
__asm__
volatile
(
"lea (%2, %3), %%"
REG_a
"
\n\t
"
"lea (%2, %3), %%"
FF_REG_a
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
"movq (%2), %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
"movq %%mm0, %%mm3
\n\t
"
"movq %%mm0, %%mm4
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
...
...
@@ -132,7 +132,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"paddb %%mm7, %%mm0
\n\t
"
"pcmpgtb %%mm6, %%mm0
\n\t
"
"movq (%%"
REG_a
",%3), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
",%3), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
PMINUB
(
%%
mm2
,
%%
mm3
,
%%
mm5
)
"psubb %%mm2, %%mm1
\n\t
"
...
...
@@ -140,7 +140,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 2), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -148,7 +148,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm2
\n\t
"
"paddb %%mm2, %%mm0
\n\t
"
"lea (%%"
REG_a
", %3, 4), %%"
REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %3, 4), %%"
FF_REG_a
"
\n\t
"
"movq (%2, %3, 4), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
...
...
@@ -158,7 +158,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -166,7 +166,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm2
\n\t
"
"paddb %%mm2, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
", %3), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
PMINUB
(
%%
mm2
,
%%
mm3
,
%%
mm5
)
"psubb %%mm2, %%mm1
\n\t
"
...
...
@@ -174,7 +174,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 2), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -207,7 +207,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex
:
"=r"
(
numEq
),
"=r"
(
dcOk
)
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
numEq
=
(
-
numEq
)
&
0xFF
;
...
...
@@ -248,9 +248,9 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
"por %%mm2, %%mm6
\n\t
"
// First Line to Filter
"movq (%0, %1, 8), %%mm5
\n\t
"
"lea (%0, %1, 4), %%"
REG_a
"
\n\t
"
"lea (%0, %1, 8), %%"
REG_c
"
\n\t
"
"sub %1, %%"
REG_c
"
\n\t
"
"lea (%0, %1, 4), %%"
FF_REG_a
"
\n\t
"
"lea (%0, %1, 8), %%"
FF_REG_c
"
\n\t
"
"sub %1, %%"
FF_REG_c
"
\n\t
"
"add %1, %0
\n\t
"
// %0 points to line 1 not 0
"movq (%0, %1, 8), %%mm7
\n\t
"
"movq %%mm5, %%mm1
\n\t
"
...
...
@@ -279,7 +279,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
"movq (%0, %1, 4), %%mm2
\n\t
"
// 1
"movq %%mm2, %%mm5
\n\t
"
// 1
PAVGB
((
%%
REGa
),
%%
mm2
)
// 11 /2
PAVGB
((
%%
FF_REGa
),
%%
mm2
)
// 11 /2
PAVGB
((
%
0
,
%
1
,
2
),
%%
mm2
)
// 211 /4
"movq %%mm2, %%mm3
\n\t
"
// 211 /4
"movq (%0), %%mm4
\n\t
"
// 1
...
...
@@ -291,15 +291,15 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
PAVGB
(
%%
mm6
,
%%
mm0
)
//1 1 /2
"movq %%mm4, %%mm3
\n\t
"
// 1
PAVGB
((
%
0
,
%
1
,
2
),
%%
mm3
)
// 1 1 /2
PAVGB
((
%%
REGa
,
%
1
,
2
),
%%
mm5
)
// 11 /2
PAVGB
((
%%
REGa
),
%%
mm5
)
// 211 /4
PAVGB
((
%%
FF_REGa
,
%
1
,
2
),
%%
mm5
)
// 11 /2
PAVGB
((
%%
FF_REGa
),
%%
mm5
)
// 211 /4
PAVGB
(
%%
mm5
,
%%
mm3
)
// 2 2211 /8
PAVGB
(
%%
mm0
,
%%
mm3
)
//4242211 /16
"movq %%mm3, (%0,%1)
\n\t
"
// X
// mm1=2 mm2=3(211) mm4=1 mm5=4(211) mm6=0 mm7=9
PAVGB
(
%%
mm4
,
%%
mm6
)
//11 /2
"movq (%%"
REG_c
"), %%mm0
\n\t
"
// 1
PAVGB
((
%%
REGa
,
%
1
,
2
),
%%
mm0
)
// 11/2
"movq (%%"
FF_REG_c
"), %%mm0
\n\t
"
// 1
PAVGB
((
%%
FF_REGa
,
%
1
,
2
),
%%
mm0
)
// 11/2
"movq %%mm0, %%mm3
\n\t
"
// 11/2
PAVGB
(
%%
mm1
,
%%
mm0
)
// 2 11/4
PAVGB
(
%%
mm6
,
%%
mm0
)
//222 11/8
...
...
@@ -307,17 +307,17 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
"movq (%0, %1, 2), %%mm2
\n\t
"
// 1
"movq %%mm0, (%0, %1, 2)
\n\t
"
// X
// mm1=2 mm2=3 mm3=6(11) mm4=1 mm5=4(211) mm6=0(11) mm7=9
"movq (%%"
REG_a
", %1, 4), %%mm0
\n\t
"
// 1
PAVGB
((
%%
REGc
),
%%
mm0
)
// 11 /2
"movq (%%"
FF_REG_a
", %1, 4), %%mm0
\n\t
"
// 1
PAVGB
((
%%
FF_REGc
),
%%
mm0
)
// 11 /2
PAVGB
(
%%
mm0
,
%%
mm6
)
//11 11 /4
PAVGB
(
%%
mm1
,
%%
mm4
)
// 11 /2
PAVGB
(
%%
mm2
,
%%
mm1
)
// 11 /2
PAVGB
(
%%
mm1
,
%%
mm6
)
//1122 11 /8
PAVGB
(
%%
mm5
,
%%
mm6
)
//112242211 /16
"movq (%%"
REG_a
"), %%mm5
\n\t
"
// 1
"movq %%mm6, (%%"
REG_a
")
\n\t
"
// X
"movq (%%"
FF_REG_a
"), %%mm5
\n\t
"
// 1
"movq %%mm6, (%%"
FF_REG_a
")
\n\t
"
// X
// mm0=7(11) mm1=2(11) mm2=3 mm3=6(11) mm4=1(11) mm5=4 mm7=9
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
// 1
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
// 1
PAVGB
(
%%
mm7
,
%%
mm6
)
// 11 /2
PAVGB
(
%%
mm4
,
%%
mm6
)
// 11 11 /4
PAVGB
(
%%
mm3
,
%%
mm6
)
// 11 2211 /8
...
...
@@ -330,29 +330,29 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c)
PAVGB
(
%%
mm7
,
%%
mm1
)
// 11 2 /4
PAVGB
(
%%
mm4
,
%%
mm5
)
// 11 /2
PAVGB
(
%%
mm5
,
%%
mm0
)
// 11 11 /4
"movq (%%"
REG_a
", %1, 2), %%mm6
\n\t
"
// 1
"movq (%%"
FF_REG_a
", %1, 2), %%mm6
\n\t
"
// 1
PAVGB
(
%%
mm6
,
%%
mm1
)
// 11 4 2 /8
PAVGB
(
%%
mm0
,
%%
mm1
)
// 11224222 /16
"movq %%mm1, (%%"
REG_a
", %1, 2)
\n\t
"
// X
"movq %%mm1, (%%"
FF_REG_a
", %1, 2)
\n\t
"
// X
// mm2=3(112) mm3=6(11) mm4=5 mm5=4(11) mm6=6 mm7=9
PAVGB
((
%%
REGc
),
%%
mm2
)
// 112 4 /8
"movq (%%"
REG_a
", %1, 4), %%mm0
\n\t
"
// 1
PAVGB
((
%%
FF_REGc
),
%%
mm2
)
// 112 4 /8
"movq (%%"
FF_REG_a
", %1, 4), %%mm0
\n\t
"
// 1
PAVGB
(
%%
mm0
,
%%
mm6
)
// 1 1 /2
PAVGB
(
%%
mm7
,
%%
mm6
)
// 1 12 /4
PAVGB
(
%%
mm2
,
%%
mm6
)
// 1122424 /4
"movq %%mm6, (%%"
REG_c
")
\n\t
"
// X
"movq %%mm6, (%%"
FF_REG_c
")
\n\t
"
// X
// mm0=8 mm3=6(11) mm4=5 mm5=4(11) mm7=9
PAVGB
(
%%
mm7
,
%%
mm5
)
// 11 2 /4
PAVGB
(
%%
mm7
,
%%
mm5
)
// 11 6 /8
PAVGB
(
%%
mm3
,
%%
mm0
)
// 112 /4
PAVGB
(
%%
mm0
,
%%
mm5
)
// 112246 /16
"movq %%mm5, (%%"
REG_a
", %1, 4)
\n\t
"
// X
"movq %%mm5, (%%"
FF_REG_a
", %1, 4)
\n\t
"
// X
"sub %1, %0
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
FF_REG_a
,
"%"
FF_
REG_c
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
const
int
l1
=
stride
;
...
...
@@ -411,18 +411,18 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
__asm__
volatile
(
"pxor %%mm7, %%mm7
\n\t
"
// 0
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_c
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_c
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
// line 3
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
// line 3
"movq (%0, %1, 4), %%mm1
\n\t
"
// line 4
"movq %%mm1, %%mm2
\n\t
"
// line 4
"psubusb %%mm0, %%mm1
\n\t
"
"psubusb %%mm2, %%mm0
\n\t
"
"por %%mm1, %%mm0
\n\t
"
// |l2 - l3|
"movq (%%"
REG_c
"), %%mm3
\n\t
"
// line 5
"movq (%%"
REG_c
", %1), %%mm4
\n\t
"
// line 6
"movq (%%"
FF_REG_c
"), %%mm3
\n\t
"
// line 5
"movq (%%"
FF_REG_c
", %1), %%mm4
\n\t
"
// line 6
"movq %%mm3, %%mm5
\n\t
"
// line 5
"psubusb %%mm4, %%mm3
\n\t
"
"psubusb %%mm5, %%mm4
\n\t
"
...
...
@@ -454,44 +454,44 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co)
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%0, %1, 4)
\n\t
"
// line 4
"movq (%%"
REG_c
"), %%mm0
\n\t
"
// line 5
"movq (%%"
FF_REG_c
"), %%mm0
\n\t
"
// line 5
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l5-1 : l5
"paddusb %%mm3, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_c
")
\n\t
"
// line 5
"movq %%mm0, (%%"
FF_REG_c
")
\n\t
"
// line 5
PAVGB
(
%%
mm7
,
%%
mm1
)
// d/4
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
// line 3
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
// line 3
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l4-1 : l4
"psubusb %%mm1, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_a
", %1, 2)
\n\t
"
// line 3
"movq %%mm0, (%%"
FF_REG_a
", %1, 2)
\n\t
"
// line 3
"movq (%%"
REG_c
", %1), %%mm0
\n\t
"
// line 6
"movq (%%"
FF_REG_c
", %1), %%mm0
\n\t
"
// line 6
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l5-1 : l5
"paddusb %%mm1, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_c
", %1)
\n\t
"
// line 6
"movq %%mm0, (%%"
FF_REG_c
", %1)
\n\t
"
// line 6
PAVGB
(
%%
mm7
,
%%
mm1
)
// d/8
"movq (%%"
REG_a
", %1), %%mm0
\n\t
"
// line 2
"movq (%%"
FF_REG_a
", %1), %%mm0
\n\t
"
// line 2
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l2-1 : l2
"psubusb %%mm1, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_a
", %1)
\n\t
"
// line 2
"movq %%mm0, (%%"
FF_REG_a
", %1)
\n\t
"
// line 2
"movq (%%"
REG_c
", %1, 2), %%mm0
\n\t
"
// line 7
"movq (%%"
FF_REG_c
", %1, 2), %%mm0
\n\t
"
// line 7
"pxor %%mm2, %%mm0
\n\t
"
//(l4 - l5) <= 0 ? -l7-1 : l7
"paddusb %%mm1, %%mm0
\n\t
"
"pxor %%mm2, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_c
", %1, 2)
\n\t
"
// line 7
"movq %%mm0, (%%"
FF_REG_c
", %1, 2)
\n\t
"
// line 7
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
co
->
pQPb
)
NAMED_CONSTRAINTS_ADD
(
b01
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
FF_REG_a
,
"%"
FF_
REG_c
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
...
...
@@ -553,8 +553,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
#if 0 //slightly more accurate and slightly slower
"pxor %%mm7, %%mm7 \n\t" // 0
"lea (%0, %1), %%"
REG_a"
\n\t"
"lea (%%"
REG_a", %1, 4), %%"REG_c"
\n\t"
"lea (%0, %1), %%"
FF_REG_a"
\n\t"
"lea (%%"
FF_REG_a", %1, 4), %%"FF_REG_c"
\n\t"
// 0 1 2 3 4 5 6 7
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1
...
...
@@ -567,8 +567,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
PAVGB(%%mm1, %%mm0) // ~(l2 + 2l0)/4
PAVGB(%%mm2, %%mm0) // ~(5l2 + 2l0)/8
"movq (%%"
REG_a"), %%mm1
\n\t" // l1
"movq (%%"
REG_a", %1, 2), %%mm3
\n\t" // l3
"movq (%%"
FF_REG_a"), %%mm1
\n\t" // l1
"movq (%%"
FF_REG_a", %1, 2), %%mm3
\n\t" // l3
"movq %%mm1, %%mm4 \n\t" // l1
PAVGB(%%mm7, %%mm1) // ~l1/2
PAVGB(%%mm3, %%mm1) // ~(l1 + 2l3)/4
...
...
@@ -586,7 +586,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
PAVGB(%%mm2, %%mm0) // ~(l4 + 2l2)/4
PAVGB(%%mm4, %%mm0) // ~(5l4 + 2l2)/8
"movq (%%"
REG_c"), %%mm2
\n\t" // l5
"movq (%%"
FF_REG_c"), %%mm2
\n\t" // l5
"movq %%mm3, %%mm5 \n\t" // l3
PAVGB(%%mm7, %%mm3) // ~l3/2
PAVGB(%%mm2, %%mm3) // ~(l3 + 2l5)/4
...
...
@@ -599,13 +599,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"pcmpeqb %%mm7, %%mm0 \n\t" // SIGN(2l2 - 5l3 + 5l4 - 2l5)
// mm0= SIGN(menergy), mm1= |lenergy|, mm2= l5, mm3= |menergy|, mm4=l4, mm5= l3, mm7=0
"movq (%%"
REG_c", %1), %%mm6
\n\t" // l6
"movq (%%"
FF_REG_c", %1), %%mm6
\n\t" // l6
"movq %%mm6, %%mm5 \n\t" // l6
PAVGB(%%mm7, %%mm6) // ~l6/2
PAVGB(%%mm4, %%mm6) // ~(l6 + 2l4)/4
PAVGB(%%mm5, %%mm6) // ~(5l6 + 2l4)/8
"movq (%%"
REG_c", %1, 2), %%mm5
\n\t" // l7
"movq (%%"
FF_REG_c", %1, 2), %%mm5
\n\t" // l7
"movq %%mm2, %%mm4 \n\t" // l5
PAVGB(%%mm7, %%mm2) // ~l5/2
PAVGB(%%mm5, %%mm2) // ~(l5 + 2l7)/4
...
...
@@ -632,7 +632,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"paddusb %%mm1, %%mm3 \n\t"
// "paddusb "MANGLE(b01)", %%mm3 \n\t"
"movq (%%"
REG_a", %1, 2), %%mm6
\n\t" //l3
"movq (%%"
FF_REG_a", %1, 2), %%mm6
\n\t" //l3
"movq (%0, %1, 4), %%mm5 \n\t" //l4
"movq (%0, %1, 4), %%mm4 \n\t" //l4
"psubusb %%mm6, %%mm5 \n\t"
...
...
@@ -646,7 +646,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubusb "MANGLE(b01)", %%mm3 \n\t"
PAVGB(%%mm7, %%mm3)
"movq (%%"
REG_a", %1, 2), %%mm0
\n\t"
"movq (%%"
FF_REG_a", %1, 2), %%mm0
\n\t"
"movq (%0, %1, 4), %%mm2 \n\t"
"pxor %%mm6, %%mm0 \n\t"
"pxor %%mm6, %%mm2 \n\t"
...
...
@@ -654,36 +654,36 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"paddb %%mm3, %%mm2 \n\t"
"pxor %%mm6, %%mm0 \n\t"
"pxor %%mm6, %%mm2 \n\t"
"movq %%mm0, (%%"
REG_a", %1, 2)
\n\t"
"movq %%mm0, (%%"
FF_REG_a", %1, 2)
\n\t"
"movq %%mm2, (%0, %1, 4) \n\t"
#endif //0
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"pcmpeqb %%mm6, %%mm6
\n\t
"
// -1
// 0 1 2 3 4 5 6 7
// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 ecx+%1 ecx+2%1
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1
"movq (%%"
REG_a
", %1, 2), %%mm1
\n\t
"
// l3
"movq (%%"
FF_REG_a
", %1, 2), %%mm1
\n\t
"
// l3
"movq (%0, %1, 4), %%mm0
\n\t
"
// l4
"pxor %%mm6, %%mm1
\n\t
"
// -l3-1
PAVGB
(
%%
mm1
,
%%
mm0
)
// -q+128 = (l4-l3+256)/2
// mm1=-l3-1, mm0=128-q
"movq (%%"
REG_a
", %1, 4), %%mm2
\n\t
"
// l5
"movq (%%"
REG_a
", %1), %%mm3
\n\t
"
// l2
"movq (%%"
FF_REG_a
", %1, 4), %%mm2
\n\t
"
// l5
"movq (%%"
FF_REG_a
", %1), %%mm3
\n\t
"
// l2
"pxor %%mm6, %%mm2
\n\t
"
// -l5-1
"movq %%mm2, %%mm5
\n\t
"
// -l5-1
"movq "
MANGLE
(
b80
)
", %%mm4
\n\t
"
// 128
"lea (%%"
REG_a
", %1, 4), %%"
REG_c
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_c
"
\n\t
"
PAVGB
(
%%
mm3
,
%%
mm2
)
// (l2-l5+256)/2
PAVGB
(
%%
mm0
,
%%
mm4
)
// ~(l4-l3)/4 + 128
PAVGB
(
%%
mm2
,
%%
mm4
)
// ~(l2-l5)/4 +(l4-l3)/8 + 128
PAVGB
(
%%
mm0
,
%%
mm4
)
// ~(l2-l5)/8 +5(l4-l3)/16 + 128
// mm1=-l3-1, mm0=128-q, mm3=l2, mm4=menergy/16 + 128, mm5= -l5-1
"movq (%%"
REG_a
"), %%mm2
\n\t
"
// l1
"movq (%%"
FF_REG_a
"), %%mm2
\n\t
"
// l1
"pxor %%mm6, %%mm2
\n\t
"
// -l1-1
PAVGB
(
%%
mm3
,
%%
mm2
)
// (l2-l1+256)/2
PAVGB
((
%
0
),
%%
mm1
)
// (l0-l3+256)/2
...
...
@@ -693,8 +693,8 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
PAVGB
(
%%
mm2
,
%%
mm3
)
// ~(l0-l3)/8 +5(l2-l1)/16 + 128
// mm0=128-q, mm3=lenergy/16 + 128, mm4= menergy/16 + 128, mm5= -l5-1
PAVGB
((
%%
REGc
,
%
1
),
%%
mm5
)
// (l6-l5+256)/2
"movq (%%"
REG_c
", %1, 2), %%mm1
\n\t
"
// l7
PAVGB
((
%%
FF_REGc
,
%
1
),
%%
mm5
)
// (l6-l5+256)/2
"movq (%%"
FF_REG_c
", %1, 2), %%mm1
\n\t
"
// l7
"pxor %%mm6, %%mm1
\n\t
"
// -l7-1
PAVGB
((
%
0
,
%
1
,
4
),
%%
mm1
)
// (l4-l7+256)/2
"movq "
MANGLE
(
b80
)
", %%mm2
\n\t
"
// 128
...
...
@@ -743,7 +743,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"pxor %%mm1, %%mm7
\n\t
"
// SIGN(d*q)
"pand %%mm7, %%mm4
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%0, %1, 4), %%mm2
\n\t
"
"pxor %%mm1, %%mm0
\n\t
"
"pxor %%mm1, %%mm2
\n\t
"
...
...
@@ -751,13 +751,13 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubb %%mm4, %%mm2
\n\t
"
"pxor %%mm1, %%mm0
\n\t
"
"pxor %%mm1, %%mm2
\n\t
"
"movq %%mm0, (%%"
REG_a
", %1, 2)
\n\t
"
"movq %%mm0, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movq %%mm2, (%0, %1, 4)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
)
NAMED_CONSTRAINTS_ADD
(
b80
,
b00
,
b01
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
FF_REG_a
,
"%"
FF_
REG_c
);
/*
...
...
@@ -830,12 +830,12 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"punpckhbw %%mm7, %%mm1
\n\t
"
// high part of line 0
"movq (%0, %1), %%mm2
\n\t
"
"lea (%0, %1, 2), %%"
REG_a
"
\n\t
"
"lea (%0, %1, 2), %%"
FF_REG_a
"
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
// low part of line 1
"punpckhbw %%mm7, %%mm3
\n\t
"
// high part of line 1
"movq (%%"
REG_a
"), %%mm4
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm4
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
// low part of line 2
"punpckhbw %%mm7, %%mm5
\n\t
"
// high part of line 2
...
...
@@ -852,7 +852,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm2, %%mm0
\n\t
"
// 2L0 - 5L1 + 5L2
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2
"movq (%%"
REG_a
", %1), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm2
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
// L3
"punpckhbw %%mm7, %%mm3
\n\t
"
// H3
...
...
@@ -864,7 +864,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"movq %%mm0, (%3)
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, 8(%3)
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
// L4
"punpckhbw %%mm7, %%mm1
\n\t
"
// H4
...
...
@@ -878,7 +878,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - L3 + L4
"psubw %%mm3, %%mm5
\n\t
"
// 2H2 - H3 + H4
"lea (%%"
REG_a
", %1), %0
\n\t
"
"lea (%%"
FF_REG_a
", %1), %0
\n\t
"
"psllw $2, %%mm2
\n\t
"
// 4L3 - 4L4
"psllw $2, %%mm3
\n\t
"
// 4H3 - 4H4
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - 5L3 + 5L4
...
...
@@ -893,10 +893,10 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - 5L3 + 5L4 - 2L5
"psubw %%mm3, %%mm5
\n\t
"
// 2H2 - 5H3 + 5H4 - 2H5
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
"punpcklbw %%mm7, %%mm6
\n\t
"
// L6
"psubw %%mm6, %%mm2
\n\t
"
// L5 - L6
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
"punpckhbw %%mm7, %%mm6
\n\t
"
// H6
"psubw %%mm6, %%mm3
\n\t
"
// H5 - H6
...
...
@@ -1045,7 +1045,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext
:
"+r"
(
src
)
:
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
),
"r"
(
tmp
)
NAMED_CONSTRAINTS_ADD
(
w05
,
w20
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
const
int
l1
=
stride
;
...
...
@@ -1104,8 +1104,8 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
"packuswb %%mm0, %%mm0
\n\t
"
"movq %%mm0, %3
\n\t
"
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
...
...
@@ -1128,13 +1128,13 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c)
#endif
#define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr)
FIND_MIN_MAX
((
%%
REGa
))
FIND_MIN_MAX
((
%%
REGa
,
%
1
))
FIND_MIN_MAX
((
%%
REGa
,
%
1
,
2
))
FIND_MIN_MAX
((
%%
FF_
REGa
))
FIND_MIN_MAX
((
%%
FF_
REGa
,
%
1
))
FIND_MIN_MAX
((
%%
FF_
REGa
,
%
1
,
2
))
FIND_MIN_MAX
((
%
0
,
%
1
,
4
))
FIND_MIN_MAX
((
%%
REGd
))
FIND_MIN_MAX
((
%%
REGd
,
%
1
))
FIND_MIN_MAX
((
%%
REGd
,
%
1
,
2
))
FIND_MIN_MAX
((
%%
FF_
REGd
))
FIND_MIN_MAX
((
%%
FF_
REGd
,
%
1
))
FIND_MIN_MAX
((
%%
FF_
REGd
,
%
1
,
2
))
FIND_MIN_MAX
((
%
0
,
%
1
,
8
))
"movq %%mm7, %%mm4
\n\t
"
...
...
@@ -1218,13 +1218,13 @@ FIND_MIN_MAX((%0, %1, 8))
"paddb %%mm2, %%mm0
\n\t
"
"paddb %%mm3, %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm2
\n\t
"
// L11
"movq (%%"
FF_REG_a
"), %%mm2
\n\t
"
// L11
"movq %%mm2, %%mm3
\n\t
"
// L11
"movq %%mm2, %%mm4
\n\t
"
// L11
"psllq $8, %%mm3
\n\t
"
"psrlq $8, %%mm4
\n\t
"
"movd -4(%%"
REG_a
"), %%mm5
\n\t
"
"movd 8(%%"
REG_a
"), %%mm6
\n\t
"
"movd -4(%%"
FF_REG_a
"), %%mm5
\n\t
"
"movd 8(%%"
FF_REG_a
"), %%mm6
\n\t
"
"psrlq $24, %%mm5
\n\t
"
"psllq $56, %%mm6
\n\t
"
"por %%mm5, %%mm3
\n\t
"
// L01
...
...
@@ -1305,19 +1305,19 @@ FIND_MIN_MAX((%0, %1, 8))
*/
//DERING_CORE(dst ,src ,ppsx ,psx ,sx ,pplx ,plx ,lx ,t0 ,t1)
DERING_CORE
((
%%
REGa
)
,(
%%
REGa
,
%
1
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGa
,
%
1
)
,(
%%
REGa
,
%
1
,
2
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGa
,
%
1
,
2
),(
%
0
,
%
1
,
4
)
,
%%
mm4
,
%%
mm0
,
%%
mm2
,
%%
mm5
,
%%
mm1
,
%%
mm3
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
4
)
,(
%%
REGd
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGd
)
,(
%%
REGd
,
%
1
)
,
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGd
,
%
1
)
,(
%%
REGd
,
%
1
,
2
),
%%
mm4
,
%%
mm0
,
%%
mm2
,
%%
mm5
,
%%
mm1
,
%%
mm3
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
REGd
,
%
1
,
2
),(
%
0
,
%
1
,
8
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
8
)
,(
%%
REGd
,
%
1
,
4
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGa
)
,(
%%
FF_
REGa
,
%
1
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGa
,
%
1
)
,(
%%
FF_
REGa
,
%
1
,
2
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGa
,
%
1
,
2
),(
%
0
,
%
1
,
4
)
,
%%
mm4
,
%%
mm0
,
%%
mm2
,
%%
mm5
,
%%
mm1
,
%%
mm3
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
4
)
,(
%%
FF_
REGd
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGd
)
,(
%%
FF_
REGd
,
%
1
)
,
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGd
,
%
1
)
,(
%%
FF_
REGd
,
%
1
,
2
),
%%
mm4
,
%%
mm0
,
%%
mm2
,
%%
mm5
,
%%
mm1
,
%%
mm3
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%%
FF_REGd
,
%
1
,
2
),(
%
0
,
%
1
,
8
)
,
%%
mm0
,
%%
mm2
,
%%
mm4
,
%%
mm1
,
%%
mm3
,
%%
mm5
,
%%
mm6
,
%%
mm7
)
DERING_CORE
((
%
0
,
%
1
,
8
)
,(
%%
FF_
REGd
,
%
1
,
4
),
%%
mm2
,
%%
mm4
,
%%
mm0
,
%%
mm3
,
%%
mm5
,
%%
mm1
,
%%
mm6
,
%%
mm7
)
"1:
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"m"
(
c
->
pQPb
),
"m"
(
c
->
pQPb2
),
"q"
(
tmp
)
NAMED_CONSTRAINTS_ADD
(
deringThreshold
,
b00
,
b02
,
b08
)
:
"%"
REG_a
,
"%"
REG_d
,
"%"
REG_sp
:
"%"
FF_REG_a
,
"%"
FF_REG_d
,
"%"
FF_
REG_sp
);
#else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW)
int
y
;
...
...
@@ -1452,27 +1452,27 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src
+=
4
*
stride
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_c
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_c
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 ecx ecx+%1 ecx+2%1 %0+8%1 ecx+4%1
"movq (%0), %%mm0
\n\t
"
"movq (%%"
REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm1
\n\t
"
PAVGB
(
%%
mm1
,
%%
mm0
)
"movq %%mm0, (%%"
REG_a
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_a
")
\n\t
"
"movq (%0, %1, 4), %%mm0
\n\t
"
PAVGB
(
%%
mm0
,
%%
mm1
)
"movq %%mm1, (%%"
REG_a
", %1, 2)
\n\t
"
"movq (%%"
REG_c
", %1), %%mm1
\n\t
"
"movq %%mm1, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movq (%%"
FF_REG_c
", %1), %%mm1
\n\t
"
PAVGB
(
%%
mm1
,
%%
mm0
)
"movq %%mm0, (%%"
REG_c
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_c
")
\n\t
"
"movq (%0, %1, 8), %%mm0
\n\t
"
PAVGB
(
%%
mm0
,
%%
mm1
)
"movq %%mm1, (%%"
REG_c
", %1, 2)
\n\t
"
"movq %%mm1, (%%"
FF_REG_c
", %1, 2)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
)
:
"%"
REG_a
,
"%"
REG_c
:
"%"
FF_REG_a
,
"%"
FF_
REG_c
);
#else
int
a
,
b
,
x
;
...
...
@@ -1505,10 +1505,10 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
#if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src
+=
stride
*
3
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%%"
REG_d
", %1, 4), %%"
REG_c
"
\n\t
"
"add %1, %%"
REG_c
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
"lea (%%"
FF_REG_d
", %1, 4), %%"
FF_REG_c
"
\n\t
"
"add %1, %%"
FF_REG_c
"
\n\t
"
#if TEMPLATE_PP_SSE2
"pxor %%xmm7, %%xmm7
\n\t
"
#define REAL_DEINT_CUBIC(a,b,c,d,e)\
...
...
@@ -1554,17 +1554,17 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride
#endif //TEMPLATE_PP_SSE2
#define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e)
DEINT_CUBIC
((
%
0
)
,
(
%%
REGa
,
%
1
),
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
,
%
1
))
DEINT_CUBIC
((
%%
REGa
,
%
1
),
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
),
(
%
0
,
%
1
,
8
))
DEINT_CUBIC
((
%
0
,
%
1
,
4
)
,
(
%%
REGd
,
%
1
),
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
REGc
))
DEINT_CUBIC
((
%%
REGd
,
%
1
),
(
%
0
,
%
1
,
8
)
,
(
%%
REGd
,
%
1
,
4
),
(
%%
REGc
)
,
(
%%
REGc
,
%
1
,
2
))
DEINT_CUBIC
((
%
0
)
,
(
%%
FF_REGa
,
%
1
),
(
%%
FF_REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
FF_
REGd
,
%
1
))
DEINT_CUBIC
((
%%
FF_REGa
,
%
1
),
(
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
)
,
(
%%
FF_
REGd
,
%
1
),
(
%
0
,
%
1
,
8
))
DEINT_CUBIC
((
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
,
%
1
),
(
%%
FF_REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
FF_
REGc
))
DEINT_CUBIC
((
%%
FF_REGd
,
%
1
),
(
%
0
,
%
1
,
8
)
,
(
%%
FF_REGd
,
%
1
,
4
),
(
%%
FF_REGc
)
,
(
%%
FF_
REGc
,
%
1
,
2
))
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
)
:
#if TEMPLATE_PP_SSE2
XMM_CLOBBERS
(
"%xmm0"
,
"%xmm1"
,
"%xmm2"
,
"%xmm3"
,
"%xmm7"
,)
#endif
"%"
REG_a
,
"%"
REG_d
,
"%"
REG_c
"%"
FF_REG_a
,
"%"
FF_REG_d
,
"%"
FF_
REG_c
);
#undef REAL_DEINT_CUBIC
#else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
...
...
@@ -1592,8 +1592,8 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src
+=
stride
*
4
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"movq (%2), %%mm0
\n\t
"
// 0 1 2 3 4 5 6 7 8 9 10
...
...
@@ -1629,14 +1629,14 @@ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp
#define DEINT_FF(a,b,c,d) REAL_DEINT_FF(a,b,c,d)
DEINT_FF
((
%
0
)
,
(
%%
REGa
)
,
(
%%
REGa
,
%
1
),
(
%%
REGa
,
%
1
,
2
))
DEINT_FF
((
%%
REGa
,
%
1
),
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
)
DEINT_FF
((
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
),
(
%%
REGd
,
%
1
,
2
))
DEINT_FF
((
%%
REGd
,
%
1
),
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
REGd
,
%
1
,
4
))
DEINT_FF
((
%
0
)
,
(
%%
FF_REGa
)
,
(
%%
FF_REGa
,
%
1
),
(
%%
FF_
REGa
,
%
1
,
2
))
DEINT_FF
((
%%
FF_REGa
,
%
1
),
(
%%
FF_REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
FF_
REGd
)
)
DEINT_FF
((
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
)
,
(
%%
FF_REGd
,
%
1
),
(
%%
FF_
REGd
,
%
1
,
2
))
DEINT_FF
((
%%
FF_REGd
,
%
1
),
(
%%
FF_REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
FF_
REGd
,
%
1
,
4
))
"movq %%mm0, (%2)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"r"
(
tmp
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
int
x
;
...
...
@@ -1671,8 +1671,8 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp
#if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
src
+=
stride
*
4
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"movq (%2), %%mm0
\n\t
"
"movq (%3), %%mm1
\n\t
"
...
...
@@ -1714,19 +1714,19 @@ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp
#define DEINT_L5(t1,t2,a,b,c) REAL_DEINT_L5(t1,t2,a,b,c)
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%
0
)
,
(
%%
REGa
)
,
(
%%
REGa
,
%
1
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
REGa
)
,
(
%%
REGa
,
%
1
)
,
(
%%
REGa
,
%
1
,
2
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%%
REGa
,
%
1
)
,
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
)
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
)
,
(
%%
REGd
,
%
1
,
2
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%%
REGd
,
%
1
)
,
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
REGd
,
%
1
,
4
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%
0
)
,
(
%%
FF_REGa
)
,
(
%%
FF_
REGa
,
%
1
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
FF_REGa
)
,
(
%%
FF_REGa
,
%
1
)
,
(
%%
FF_
REGa
,
%
1
,
2
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%%
FF_REGa
,
%
1
)
,
(
%%
FF_
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
FF_REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
)
,
(
%%
FF_
REGd
)
)
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
)
,
(
%%
FF_
REGd
,
%
1
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
FF_REGd
)
,
(
%%
FF_REGd
,
%
1
)
,
(
%%
FF_
REGd
,
%
1
,
2
))
DEINT_L5
(
%%
mm0
,
%%
mm1
,
(
%%
FF_REGd
,
%
1
)
,
(
%%
FF_
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
)
DEINT_L5
(
%%
mm1
,
%%
mm0
,
(
%%
FF_REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
)
,
(
%%
FF_
REGd
,
%
1
,
4
))
"movq %%mm0, (%2)
\n\t
"
"movq %%mm1, (%3)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"r"
(
tmp
),
"r"
(
tmp2
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
int
x
;
...
...
@@ -1772,49 +1772,49 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin
#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
src
+=
4
*
stride
;
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"movq (%2), %%mm0
\n\t
"
// L0
"movq (%%"
REG_a
"), %%mm1
\n\t
"
// L2
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
// L2
PAVGB
(
%%
mm1
,
%%
mm0
)
// L0+L2
"movq (%0), %%mm2
\n\t
"
// L1
PAVGB
(
%%
mm2
,
%%
mm0
)
"movq %%mm0, (%0)
\n\t
"
"movq (%%"
REG_a
", %1), %%mm0
\n\t
"
// L3
"movq (%%"
FF_REG_a
", %1), %%mm0
\n\t
"
// L3
PAVGB
(
%%
mm0
,
%%
mm2
)
// L1+L3
PAVGB
(
%%
mm1
,
%%
mm2
)
// 2L2 + L1 + L3
"movq %%mm2, (%%"
REG_a
")
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm2
\n\t
"
// L4
"movq %%mm2, (%%"
FF_REG_a
")
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm2
\n\t
"
// L4
PAVGB
(
%%
mm2
,
%%
mm1
)
// L2+L4
PAVGB
(
%%
mm0
,
%%
mm1
)
// 2L3 + L2 + L4
"movq %%mm1, (%%"
REG_a
", %1)
\n\t
"
"movq %%mm1, (%%"
FF_REG_a
", %1)
\n\t
"
"movq (%0, %1, 4), %%mm1
\n\t
"
// L5
PAVGB
(
%%
mm1
,
%%
mm0
)
// L3+L5
PAVGB
(
%%
mm2
,
%%
mm0
)
// 2L4 + L3 + L5
"movq %%mm0, (%%"
REG_a
", %1, 2)
\n\t
"
"movq (%%"
REG_d
"), %%mm0
\n\t
"
// L6
"movq %%mm0, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movq (%%"
FF_REG_d
"), %%mm0
\n\t
"
// L6
PAVGB
(
%%
mm0
,
%%
mm2
)
// L4+L6
PAVGB
(
%%
mm1
,
%%
mm2
)
// 2L5 + L4 + L6
"movq %%mm2, (%0, %1, 4)
\n\t
"
"movq (%%"
REG_d
", %1), %%mm2
\n\t
"
// L7
"movq (%%"
FF_REG_d
", %1), %%mm2
\n\t
"
// L7
PAVGB
(
%%
mm2
,
%%
mm1
)
// L5+L7
PAVGB
(
%%
mm0
,
%%
mm1
)
// 2L6 + L5 + L7
"movq %%mm1, (%%"
REG_d
")
\n\t
"
"movq (%%"
REG_d
", %1, 2), %%mm1
\n\t
"
// L8
"movq %%mm1, (%%"
FF_REG_d
")
\n\t
"
"movq (%%"
FF_REG_d
", %1, 2), %%mm1
\n\t
"
// L8
PAVGB
(
%%
mm1
,
%%
mm0
)
// L6+L8
PAVGB
(
%%
mm2
,
%%
mm0
)
// 2L7 + L6 + L8
"movq %%mm0, (%%"
REG_d
", %1)
\n\t
"
"movq %%mm0, (%%"
FF_REG_d
", %1)
\n\t
"
"movq (%0, %1, 8), %%mm0
\n\t
"
// L9
PAVGB
(
%%
mm0
,
%%
mm2
)
// L7+L9
PAVGB
(
%%
mm1
,
%%
mm2
)
// 2L8 + L7 + L9
"movq %%mm2, (%%"
REG_d
", %1, 2)
\n\t
"
"movq %%mm2, (%%"
FF_REG_d
", %1, 2)
\n\t
"
"movq %%mm1, (%2)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
),
"r"
(
tmp
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW
int
a
,
b
,
c
,
x
;
...
...
@@ -1874,57 +1874,57 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
src
+=
4
*
stride
;
#if TEMPLATE_PP_MMXEXT
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"movq (%0), %%mm0
\n\t
"
"movq (%%"
REG_a
", %1), %%mm2
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
"movq %%mm0, %%mm3
\n\t
"
"pmaxub %%mm1, %%mm0
\n\t
"
"pminub %%mm3, %%mm1
\n\t
"
"pmaxub %%mm2, %%mm1
\n\t
"
"pminub %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_a
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_a
")
\n\t
"
"movq (%0, %1, 4), %%mm0
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"pmaxub %%mm1, %%mm2
\n\t
"
"pminub %%mm3, %%mm1
\n\t
"
"pmaxub %%mm0, %%mm1
\n\t
"
"pminub %%mm1, %%mm2
\n\t
"
"movq %%mm2, (%%"
REG_a
", %1, 2)
\n\t
"
"movq %%mm2, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movq (%%"
REG_d
"), %%mm2
\n\t
"
"movq (%%"
REG_d
", %1), %%mm1
\n\t
"
"movq (%%"
FF_REG_d
"), %%mm2
\n\t
"
"movq (%%"
FF_REG_d
", %1), %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"pmaxub %%mm0, %%mm2
\n\t
"
"pminub %%mm3, %%mm0
\n\t
"
"pmaxub %%mm1, %%mm0
\n\t
"
"pminub %%mm0, %%mm2
\n\t
"
"movq %%mm2, (%%"
REG_d
")
\n\t
"
"movq %%mm2, (%%"
FF_REG_d
")
\n\t
"
"movq (%%"
REG_d
", %1, 2), %%mm2
\n\t
"
"movq (%%"
FF_REG_d
", %1, 2), %%mm2
\n\t
"
"movq (%0, %1, 8), %%mm0
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"pmaxub %%mm0, %%mm2
\n\t
"
"pminub %%mm3, %%mm0
\n\t
"
"pmaxub %%mm1, %%mm0
\n\t
"
"pminub %%mm0, %%mm2
\n\t
"
"movq %%mm2, (%%"
REG_d
", %1, 2)
\n\t
"
"movq %%mm2, (%%"
FF_REG_d
", %1, 2)
\n\t
"
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else // MMX without MMX2
__asm__
volatile
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"pxor %%mm7, %%mm7
\n\t
"
...
...
@@ -1954,13 +1954,13 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride)
"movq %%mm0, " #b " \n\t"
#define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c)
MEDIAN
((
%
0
)
,
(
%%
REGa
)
,
(
%%
REGa
,
%
1
))
MEDIAN
((
%%
REGa
,
%
1
),
(
%%
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
))
MEDIAN
((
%
0
,
%
1
,
4
)
,
(
%%
REGd
)
,
(
%%
REGd
,
%
1
))
MEDIAN
((
%%
REGd
,
%
1
),
(
%%
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
))
MEDIAN
((
%
0
)
,
(
%%
FF_REGa
)
,
(
%%
FF_
REGa
,
%
1
))
MEDIAN
((
%%
FF_REGa
,
%
1
),
(
%%
FF_
REGa
,
%
1
,
2
),
(
%
0
,
%
1
,
4
))
MEDIAN
((
%
0
,
%
1
,
4
)
,
(
%%
FF_REGd
)
,
(
%%
FF_
REGd
,
%
1
))
MEDIAN
((
%%
FF_REGd
,
%
1
),
(
%%
FF_
REGd
,
%
1
,
2
),
(
%
0
,
%
1
,
8
))
:
:
"r"
(
src
),
"r"
((
x86_reg
)
stride
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#endif //TEMPLATE_PP_MMXEXT
#else //TEMPLATE_PP_MMX
...
...
@@ -1992,17 +1992,17 @@ MEDIAN((%%REGd, %1), (%%REGd, %1, 2), (%0, %1, 8))
static
inline
void
RENAME
(
transpose1
)(
uint8_t
*
dst1
,
uint8_t
*
dst2
,
const
uint8_t
*
src
,
int
srcStride
)
{
__asm__
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"movq (%0), %%mm0
\n\t
"
// 12345678
"movq (%%"
REG_a
"), %%mm1
\n\t
"
// abcdefgh
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
// abcdefgh
"movq %%mm0, %%mm2
\n\t
"
// 12345678
"punpcklbw %%mm1, %%mm0
\n\t
"
// 1a2b3c4d
"punpckhbw %%mm1, %%mm2
\n\t
"
// 5e6f7g8h
"movq (%%"
REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm3
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm3
\n\t
"
"movq %%mm1, %%mm4
\n\t
"
"punpcklbw %%mm3, %%mm1
\n\t
"
"punpckhbw %%mm3, %%mm4
\n\t
"
...
...
@@ -2029,16 +2029,16 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
"psrlq $32, %%mm1
\n\t
"
"movd %%mm1, 112(%3)
\n\t
"
"lea (%%"
REG_a
", %1, 4), %%"
REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %1, 4), %%"
FF_REG_a
"
\n\t
"
"movq (%0, %1, 4), %%mm0
\n\t
"
// 12345678
"movq (%%"
REG_a
"), %%mm1
\n\t
"
// abcdefgh
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
// abcdefgh
"movq %%mm0, %%mm2
\n\t
"
// 12345678
"punpcklbw %%mm1, %%mm0
\n\t
"
// 1a2b3c4d
"punpckhbw %%mm1, %%mm2
\n\t
"
// 5e6f7g8h
"movq (%%"
REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
REG_a
", %1, 2), %%mm3
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm3
\n\t
"
"movq %%mm1, %%mm4
\n\t
"
"punpcklbw %%mm3, %%mm1
\n\t
"
"punpckhbw %%mm3, %%mm4
\n\t
"
...
...
@@ -2067,7 +2067,7 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
::
"r"
(
src
),
"r"
((
x86_reg
)
srcStride
),
"r"
(
dst1
),
"r"
(
dst2
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
}
...
...
@@ -2077,8 +2077,8 @@ static inline void RENAME(transpose1)(uint8_t *dst1, uint8_t *dst2, const uint8_
static
inline
void
RENAME
(
transpose2
)(
uint8_t
*
dst
,
int
dstStride
,
const
uint8_t
*
src
)
{
__asm__
(
"lea (%0, %1), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_a
",%1,4), %%"
REG_d
"
\n\t
"
"lea (%0, %1), %%"
FF_REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
",%1,4), %%"
FF_REG_d
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1
"movq (%2), %%mm0
\n\t
"
// 12345678
...
...
@@ -2102,16 +2102,16 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t
"movd %%mm0, (%0)
\n\t
"
"psrlq $32, %%mm0
\n\t
"
"movd %%mm0, (%%"
REG_a
")
\n\t
"
"movd %%mm3, (%%"
REG_a
", %1)
\n\t
"
"movd %%mm0, (%%"
FF_REG_a
")
\n\t
"
"movd %%mm3, (%%"
FF_REG_a
", %1)
\n\t
"
"psrlq $32, %%mm3
\n\t
"
"movd %%mm3, (%%"
REG_a
", %1, 2)
\n\t
"
"movd %%mm3, (%%"
FF_REG_a
", %1, 2)
\n\t
"
"movd %%mm2, (%0, %1, 4)
\n\t
"
"psrlq $32, %%mm2
\n\t
"
"movd %%mm2, (%%"
REG_d
")
\n\t
"
"movd %%mm1, (%%"
REG_d
", %1)
\n\t
"
"movd %%mm2, (%%"
FF_REG_d
")
\n\t
"
"movd %%mm1, (%%"
FF_REG_d
", %1)
\n\t
"
"psrlq $32, %%mm1
\n\t
"
"movd %%mm1, (%%"
REG_d
", %1, 2)
\n\t
"
"movd %%mm1, (%%"
FF_REG_d
", %1, 2)
\n\t
"
"movq 64(%2), %%mm0
\n\t
"
// 12345678
...
...
@@ -2135,19 +2135,19 @@ static inline void RENAME(transpose2)(uint8_t *dst, int dstStride, const uint8_t
"movd %%mm0, 4(%0)
\n\t
"
"psrlq $32, %%mm0
\n\t
"
"movd %%mm0, 4(%%"
REG_a
")
\n\t
"
"movd %%mm3, 4(%%"
REG_a
", %1)
\n\t
"
"movd %%mm0, 4(%%"
FF_REG_a
")
\n\t
"
"movd %%mm3, 4(%%"
FF_REG_a
", %1)
\n\t
"
"psrlq $32, %%mm3
\n\t
"
"movd %%mm3, 4(%%"
REG_a
", %1, 2)
\n\t
"
"movd %%mm3, 4(%%"
FF_REG_a
", %1, 2)
\n\t
"
"movd %%mm2, 4(%0, %1, 4)
\n\t
"
"psrlq $32, %%mm2
\n\t
"
"movd %%mm2, 4(%%"
REG_d
")
\n\t
"
"movd %%mm1, 4(%%"
REG_d
", %1)
\n\t
"
"movd %%mm2, 4(%%"
FF_REG_d
")
\n\t
"
"movd %%mm1, 4(%%"
FF_REG_d
", %1)
\n\t
"
"psrlq $32, %%mm1
\n\t
"
"movd %%mm1, 4(%%"
REG_d
", %1, 2)
\n\t
"
"movd %%mm1, 4(%%"
FF_REG_d
", %1, 2)
\n\t
"
::
"r"
(
dst
),
"r"
((
x86_reg
)
dstStride
),
"r"
(
src
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
}
#endif //TEMPLATE_PP_MMX
...
...
@@ -2166,9 +2166,9 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
//#define L1_DIFF //u should change the thresholds too if u try that one
#if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
__asm__
volatile
(
"lea (%2, %2, 2), %%"
REG_a
"
\n\t
"
// 3*stride
"lea (%2, %2, 4), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%2, %2, 2), %%"
FF_REG_a
"
\n\t
"
// 3*stride
"lea (%2, %2, 4), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
// 0 1 2 3 4 5 6 7 8 9
// %x %x+%2 %x+2%2 %x+eax %x+4%2 %x+edx %x+2eax %x+ecx %x+8%2
//FIXME reorder?
...
...
@@ -2179,21 +2179,21 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
"psadbw (%1, %2), %%mm1
\n\t
"
// |L1-R1|
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
"psadbw (%1, %2, 2), %%mm2
\n\t
"
// |L2-R2|
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
"psadbw (%1, %%"
REG_a
"), %%mm3
\n\t
"
// |L3-R3|
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
"psadbw (%1, %%"
FF_REG_a
"), %%mm3
\n\t
"
// |L3-R3|
"movq (%0, %2, 4), %%mm4
\n\t
"
// L4
"paddw %%mm1, %%mm0
\n\t
"
"psadbw (%1, %2, 4), %%mm4
\n\t
"
// |L4-R4|
"movq (%0, %%"
REG_d
"), %%mm5
\n\t
"
// L5
"movq (%0, %%"
FF_REG_d
"), %%mm5
\n\t
"
// L5
"paddw %%mm2, %%mm0
\n\t
"
"psadbw (%1, %%"
REG_d
"), %%mm5
\n\t
"
// |L5-R5|
"movq (%0, %%"
REG_a
", 2), %%mm6
\n\t
"
// L6
"psadbw (%1, %%"
FF_REG_d
"), %%mm5
\n\t
"
// |L5-R5|
"movq (%0, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// L6
"paddw %%mm3, %%mm0
\n\t
"
"psadbw (%1, %%"
REG_a
", 2), %%mm6
\n\t
"
// |L6-R6|
"movq (%0, %%"
REG_c
"), %%mm7
\n\t
"
// L7
"psadbw (%1, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// |L6-R6|
"movq (%0, %%"
FF_REG_c
"), %%mm7
\n\t
"
// L7
"paddw %%mm4, %%mm0
\n\t
"
"psadbw (%1, %%"
REG_c
"), %%mm7
\n\t
"
// |L7-R7|
"psadbw (%1, %%"
FF_REG_c
"), %%mm7
\n\t
"
// |L7-R7|
"paddw %%mm5, %%mm6
\n\t
"
"paddw %%mm7, %%mm6
\n\t
"
"paddw %%mm6, %%mm0
\n\t
"
...
...
@@ -2242,11 +2242,11 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride,
L2_DIFF_CORE
((
%
0
)
,
(
%
1
))
L2_DIFF_CORE
((
%
0
,
%
2
)
,
(
%
1
,
%
2
))
L2_DIFF_CORE
((
%
0
,
%
2
,
2
)
,
(
%
1
,
%
2
,
2
))
L2_DIFF_CORE
((
%
0
,
%%
REGa
)
,
(
%
1
,
%%
REGa
))
L2_DIFF_CORE
((
%
0
,
%%
FF_REGa
)
,
(
%
1
,
%%
FF_
REGa
))
L2_DIFF_CORE
((
%
0
,
%
2
,
4
)
,
(
%
1
,
%
2
,
4
))
L2_DIFF_CORE
((
%
0
,
%%
REGd
)
,
(
%
1
,
%%
REGd
))
L2_DIFF_CORE
((
%
0
,
%%
REGa
,
2
),
(
%
1
,
%%
REGa
,
2
))
L2_DIFF_CORE
((
%
0
,
%%
REGc
)
,
(
%
1
,
%%
REGc
))
L2_DIFF_CORE
((
%
0
,
%%
FF_REGd
)
,
(
%
1
,
%%
FF_
REGd
))
L2_DIFF_CORE
((
%
0
,
%%
FF_REGa
,
2
),
(
%
1
,
%%
FF_
REGa
,
2
))
L2_DIFF_CORE
((
%
0
,
%%
FF_REGc
)
,
(
%
1
,
%%
FF_
REGc
))
#endif //L1_DIFF
...
...
@@ -2255,94 +2255,94 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
"paddd %%mm0, %%mm4
\n\t
"
"movd %%mm4, %%ecx
\n\t
"
"shll $2, %%ecx
\n\t
"
"mov %3, %%"
REG_d
"
\n\t
"
"addl -4(%%"
REG_d
"), %%ecx
\n\t
"
"addl 4(%%"
REG_d
"), %%ecx
\n\t
"
"addl -1024(%%"
REG_d
"), %%ecx
\n\t
"
"mov %3, %%"
FF_REG_d
"
\n\t
"
"addl -4(%%"
FF_REG_d
"), %%ecx
\n\t
"
"addl 4(%%"
FF_REG_d
"), %%ecx
\n\t
"
"addl -1024(%%"
FF_REG_d
"), %%ecx
\n\t
"
"addl $4, %%ecx
\n\t
"
"addl 1024(%%"
REG_d
"), %%ecx
\n\t
"
"addl 1024(%%"
FF_REG_d
"), %%ecx
\n\t
"
"shrl $3, %%ecx
\n\t
"
"movl %%ecx, (%%"
REG_d
")
\n\t
"
"movl %%ecx, (%%"
FF_REG_d
")
\n\t
"
// "mov %3, %%"
REG_c"
\n\t"
// "mov %%"
REG_c", test
\n\t"
// "mov %3, %%"
FF_REG_c"
\n\t"
// "mov %%"
FF_REG_c", test
\n\t"
// "jmp 4f \n\t"
"cmpl 512(%%"
REG_d
"), %%ecx
\n\t
"
"cmpl 512(%%"
FF_REG_d
"), %%ecx
\n\t
"
" jb 2f
\n\t
"
"cmpl 516(%%"
REG_d
"), %%ecx
\n\t
"
"cmpl 516(%%"
FF_REG_d
"), %%ecx
\n\t
"
" jb 1f
\n\t
"
"lea (%%"
REG_a
", %2, 2), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%%"
FF_REG_a
", %2, 2), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
"movq (%0), %%mm0
\n\t
"
// L0
"movq (%0, %2), %%mm1
\n\t
"
// L1
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
"movq (%0, %2, 4), %%mm4
\n\t
"
// L4
"movq (%0, %%"
REG_d
"), %%mm5
\n\t
"
// L5
"movq (%0, %%"
REG_a
", 2), %%mm6
\n\t
"
// L6
"movq (%0, %%"
REG_c
"), %%mm7
\n\t
"
// L7
"movq (%0, %%"
FF_REG_d
"), %%mm5
\n\t
"
// L5
"movq (%0, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// L6
"movq (%0, %%"
FF_REG_c
"), %%mm7
\n\t
"
// L7
"movq %%mm0, (%1)
\n\t
"
// L0
"movq %%mm1, (%1, %2)
\n\t
"
// L1
"movq %%mm2, (%1, %2, 2)
\n\t
"
// L2
"movq %%mm3, (%1, %%"
REG_a
")
\n\t
"
// L3
"movq %%mm3, (%1, %%"
FF_REG_a
")
\n\t
"
// L3
"movq %%mm4, (%1, %2, 4)
\n\t
"
// L4
"movq %%mm5, (%1, %%"
REG_d
")
\n\t
"
// L5
"movq %%mm6, (%1, %%"
REG_a
", 2)
\n\t
"
// L6
"movq %%mm7, (%1, %%"
REG_c
")
\n\t
"
// L7
"movq %%mm5, (%1, %%"
FF_REG_d
")
\n\t
"
// L5
"movq %%mm6, (%1, %%"
FF_REG_a
", 2)
\n\t
"
// L6
"movq %%mm7, (%1, %%"
FF_REG_c
")
\n\t
"
// L7
"jmp 4f
\n\t
"
"1:
\n\t
"
"lea (%%"
REG_a
", %2, 2), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%%"
FF_REG_a
", %2, 2), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
"movq (%0), %%mm0
\n\t
"
// L0
PAVGB
((
%
1
),
%%
mm0
)
// L0
"movq (%0, %2), %%mm1
\n\t
"
// L1
PAVGB
((
%
1
,
%
2
),
%%
mm1
)
// L1
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
PAVGB
((
%
1
,
%
2
,
2
),
%%
mm2
)
// L2
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
PAVGB
((
%
1
,
%%
REGa
),
%%
mm3
)
// L3
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
PAVGB
((
%
1
,
%%
FF_REGa
),
%%
mm3
)
// L3
"movq (%0, %2, 4), %%mm4
\n\t
"
// L4
PAVGB
((
%
1
,
%
2
,
4
),
%%
mm4
)
// L4
"movq (%0, %%"
REG_d
"), %%mm5
\n\t
"
// L5
PAVGB
((
%
1
,
%%
REGd
),
%%
mm5
)
// L5
"movq (%0, %%"
REG_a
", 2), %%mm6
\n\t
"
// L6
PAVGB
((
%
1
,
%%
REGa
,
2
),
%%
mm6
)
// L6
"movq (%0, %%"
REG_c
"), %%mm7
\n\t
"
// L7
PAVGB
((
%
1
,
%%
REGc
),
%%
mm7
)
// L7
"movq (%0, %%"
FF_REG_d
"), %%mm5
\n\t
"
// L5
PAVGB
((
%
1
,
%%
FF_REGd
),
%%
mm5
)
// L5
"movq (%0, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// L6
PAVGB
((
%
1
,
%%
FF_REGa
,
2
),
%%
mm6
)
// L6
"movq (%0, %%"
FF_REG_c
"), %%mm7
\n\t
"
// L7
PAVGB
((
%
1
,
%%
FF_REGc
),
%%
mm7
)
// L7
"movq %%mm0, (%1)
\n\t
"
// R0
"movq %%mm1, (%1, %2)
\n\t
"
// R1
"movq %%mm2, (%1, %2, 2)
\n\t
"
// R2
"movq %%mm3, (%1, %%"
REG_a
")
\n\t
"
// R3
"movq %%mm3, (%1, %%"
FF_REG_a
")
\n\t
"
// R3
"movq %%mm4, (%1, %2, 4)
\n\t
"
// R4
"movq %%mm5, (%1, %%"
REG_d
")
\n\t
"
// R5
"movq %%mm6, (%1, %%"
REG_a
", 2)
\n\t
"
// R6
"movq %%mm7, (%1, %%"
REG_c
")
\n\t
"
// R7
"movq %%mm5, (%1, %%"
FF_REG_d
")
\n\t
"
// R5
"movq %%mm6, (%1, %%"
FF_REG_a
", 2)
\n\t
"
// R6
"movq %%mm7, (%1, %%"
FF_REG_c
")
\n\t
"
// R7
"movq %%mm0, (%0)
\n\t
"
// L0
"movq %%mm1, (%0, %2)
\n\t
"
// L1
"movq %%mm2, (%0, %2, 2)
\n\t
"
// L2
"movq %%mm3, (%0, %%"
REG_a
")
\n\t
"
// L3
"movq %%mm3, (%0, %%"
FF_REG_a
")
\n\t
"
// L3
"movq %%mm4, (%0, %2, 4)
\n\t
"
// L4
"movq %%mm5, (%0, %%"
REG_d
")
\n\t
"
// L5
"movq %%mm6, (%0, %%"
REG_a
", 2)
\n\t
"
// L6
"movq %%mm7, (%0, %%"
REG_c
")
\n\t
"
// L7
"movq %%mm5, (%0, %%"
FF_REG_d
")
\n\t
"
// L5
"movq %%mm6, (%0, %%"
FF_REG_a
", 2)
\n\t
"
// L6
"movq %%mm7, (%0, %%"
FF_REG_c
")
\n\t
"
// L7
"jmp 4f
\n\t
"
"2:
\n\t
"
"cmpl 508(%%"
REG_d
"), %%ecx
\n\t
"
"cmpl 508(%%"
FF_REG_d
"), %%ecx
\n\t
"
" jb 3f
\n\t
"
"lea (%%"
REG_a
", %2, 2), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%%"
FF_REG_a
", %2, 2), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
"movq (%0), %%mm0
\n\t
"
// L0
"movq (%0, %2), %%mm1
\n\t
"
// L1
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
"movq (%1), %%mm4
\n\t
"
// R0
"movq (%1, %2), %%mm5
\n\t
"
// R1
"movq (%1, %2, 2), %%mm6
\n\t
"
// R2
"movq (%1, %%"
REG_a
"), %%mm7
\n\t
"
// R3
"movq (%1, %%"
FF_REG_a
"), %%mm7
\n\t
"
// R3
PAVGB
(
%%
mm4
,
%%
mm0
)
PAVGB
(
%%
mm5
,
%%
mm1
)
PAVGB
(
%%
mm6
,
%%
mm2
)
...
...
@@ -2354,20 +2354,20 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
"movq %%mm0, (%1)
\n\t
"
// R0
"movq %%mm1, (%1, %2)
\n\t
"
// R1
"movq %%mm2, (%1, %2, 2)
\n\t
"
// R2
"movq %%mm3, (%1, %%"
REG_a
")
\n\t
"
// R3
"movq %%mm3, (%1, %%"
FF_REG_a
")
\n\t
"
// R3
"movq %%mm0, (%0)
\n\t
"
// L0
"movq %%mm1, (%0, %2)
\n\t
"
// L1
"movq %%mm2, (%0, %2, 2)
\n\t
"
// L2
"movq %%mm3, (%0, %%"
REG_a
")
\n\t
"
// L3
"movq %%mm3, (%0, %%"
FF_REG_a
")
\n\t
"
// L3
"movq (%0, %2, 4), %%mm0
\n\t
"
// L4
"movq (%0, %%"
REG_d
"), %%mm1
\n\t
"
// L5
"movq (%0, %%"
REG_a
", 2), %%mm2
\n\t
"
// L6
"movq (%0, %%"
REG_c
"), %%mm3
\n\t
"
// L7
"movq (%0, %%"
FF_REG_d
"), %%mm1
\n\t
"
// L5
"movq (%0, %%"
FF_REG_a
", 2), %%mm2
\n\t
"
// L6
"movq (%0, %%"
FF_REG_c
"), %%mm3
\n\t
"
// L7
"movq (%1, %2, 4), %%mm4
\n\t
"
// R4
"movq (%1, %%"
REG_d
"), %%mm5
\n\t
"
// R5
"movq (%1, %%"
REG_a
", 2), %%mm6
\n\t
"
// R6
"movq (%1, %%"
REG_c
"), %%mm7
\n\t
"
// R7
"movq (%1, %%"
FF_REG_d
"), %%mm5
\n\t
"
// R5
"movq (%1, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// R6
"movq (%1, %%"
FF_REG_c
"), %%mm7
\n\t
"
// R7
PAVGB
(
%%
mm4
,
%%
mm0
)
PAVGB
(
%%
mm5
,
%%
mm1
)
PAVGB
(
%%
mm6
,
%%
mm2
)
...
...
@@ -2377,26 +2377,26 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
PAVGB
(
%%
mm6
,
%%
mm2
)
PAVGB
(
%%
mm7
,
%%
mm3
)
"movq %%mm0, (%1, %2, 4)
\n\t
"
// R4
"movq %%mm1, (%1, %%"
REG_d
")
\n\t
"
// R5
"movq %%mm2, (%1, %%"
REG_a
", 2)
\n\t
"
// R6
"movq %%mm3, (%1, %%"
REG_c
")
\n\t
"
// R7
"movq %%mm1, (%1, %%"
FF_REG_d
")
\n\t
"
// R5
"movq %%mm2, (%1, %%"
FF_REG_a
", 2)
\n\t
"
// R6
"movq %%mm3, (%1, %%"
FF_REG_c
")
\n\t
"
// R7
"movq %%mm0, (%0, %2, 4)
\n\t
"
// L4
"movq %%mm1, (%0, %%"
REG_d
")
\n\t
"
// L5
"movq %%mm2, (%0, %%"
REG_a
", 2)
\n\t
"
// L6
"movq %%mm3, (%0, %%"
REG_c
")
\n\t
"
// L7
"movq %%mm1, (%0, %%"
FF_REG_d
")
\n\t
"
// L5
"movq %%mm2, (%0, %%"
FF_REG_a
", 2)
\n\t
"
// L6
"movq %%mm3, (%0, %%"
FF_REG_c
")
\n\t
"
// L7
"jmp 4f
\n\t
"
"3:
\n\t
"
"lea (%%"
REG_a
", %2, 2), %%"
REG_d
"
\n\t
"
// 5*stride
"lea (%%"
REG_d
", %2, 2), %%"
REG_c
"
\n\t
"
// 7*stride
"lea (%%"
FF_REG_a
", %2, 2), %%"
FF_REG_d
"
\n\t
"
// 5*stride
"lea (%%"
FF_REG_d
", %2, 2), %%"
FF_REG_c
"
\n\t
"
// 7*stride
"movq (%0), %%mm0
\n\t
"
// L0
"movq (%0, %2), %%mm1
\n\t
"
// L1
"movq (%0, %2, 2), %%mm2
\n\t
"
// L2
"movq (%0, %%"
REG_a
"), %%mm3
\n\t
"
// L3
"movq (%0, %%"
FF_REG_a
"), %%mm3
\n\t
"
// L3
"movq (%1), %%mm4
\n\t
"
// R0
"movq (%1, %2), %%mm5
\n\t
"
// R1
"movq (%1, %2, 2), %%mm6
\n\t
"
// R2
"movq (%1, %%"
REG_a
"), %%mm7
\n\t
"
// R3
"movq (%1, %%"
FF_REG_a
"), %%mm7
\n\t
"
// R3
PAVGB
(
%%
mm4
,
%%
mm0
)
PAVGB
(
%%
mm5
,
%%
mm1
)
PAVGB
(
%%
mm6
,
%%
mm2
)
...
...
@@ -2412,20 +2412,20 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
"movq %%mm0, (%1)
\n\t
"
// R0
"movq %%mm1, (%1, %2)
\n\t
"
// R1
"movq %%mm2, (%1, %2, 2)
\n\t
"
// R2
"movq %%mm3, (%1, %%"
REG_a
")
\n\t
"
// R3
"movq %%mm3, (%1, %%"
FF_REG_a
")
\n\t
"
// R3
"movq %%mm0, (%0)
\n\t
"
// L0
"movq %%mm1, (%0, %2)
\n\t
"
// L1
"movq %%mm2, (%0, %2, 2)
\n\t
"
// L2
"movq %%mm3, (%0, %%"
REG_a
")
\n\t
"
// L3
"movq %%mm3, (%0, %%"
FF_REG_a
")
\n\t
"
// L3
"movq (%0, %2, 4), %%mm0
\n\t
"
// L4
"movq (%0, %%"
REG_d
"), %%mm1
\n\t
"
// L5
"movq (%0, %%"
REG_a
", 2), %%mm2
\n\t
"
// L6
"movq (%0, %%"
REG_c
"), %%mm3
\n\t
"
// L7
"movq (%0, %%"
FF_REG_d
"), %%mm1
\n\t
"
// L5
"movq (%0, %%"
FF_REG_a
", 2), %%mm2
\n\t
"
// L6
"movq (%0, %%"
FF_REG_c
"), %%mm3
\n\t
"
// L7
"movq (%1, %2, 4), %%mm4
\n\t
"
// R4
"movq (%1, %%"
REG_d
"), %%mm5
\n\t
"
// R5
"movq (%1, %%"
REG_a
", 2), %%mm6
\n\t
"
// R6
"movq (%1, %%"
REG_c
"), %%mm7
\n\t
"
// R7
"movq (%1, %%"
FF_REG_d
"), %%mm5
\n\t
"
// R5
"movq (%1, %%"
FF_REG_a
", 2), %%mm6
\n\t
"
// R6
"movq (%1, %%"
FF_REG_c
"), %%mm7
\n\t
"
// R7
PAVGB
(
%%
mm4
,
%%
mm0
)
PAVGB
(
%%
mm5
,
%%
mm1
)
PAVGB
(
%%
mm6
,
%%
mm2
)
...
...
@@ -2439,19 +2439,19 @@ L2_DIFF_CORE((%0, %%REGc) , (%1, %%REGc))
PAVGB
(
%%
mm6
,
%%
mm2
)
PAVGB
(
%%
mm7
,
%%
mm3
)
"movq %%mm0, (%1, %2, 4)
\n\t
"
// R4
"movq %%mm1, (%1, %%"
REG_d
")
\n\t
"
// R5
"movq %%mm2, (%1, %%"
REG_a
", 2)
\n\t
"
// R6
"movq %%mm3, (%1, %%"
REG_c
")
\n\t
"
// R7
"movq %%mm1, (%1, %%"
FF_REG_d
")
\n\t
"
// R5
"movq %%mm2, (%1, %%"
FF_REG_a
", 2)
\n\t
"
// R6
"movq %%mm3, (%1, %%"
FF_REG_c
")
\n\t
"
// R7
"movq %%mm0, (%0, %2, 4)
\n\t
"
// L4
"movq %%mm1, (%0, %%"
REG_d
")
\n\t
"
// L5
"movq %%mm2, (%0, %%"
REG_a
", 2)
\n\t
"
// L6
"movq %%mm3, (%0, %%"
REG_c
")
\n\t
"
// L7
"movq %%mm1, (%0, %%"
FF_REG_d
")
\n\t
"
// L5
"movq %%mm2, (%0, %%"
FF_REG_a
", 2)
\n\t
"
// L6
"movq %%mm3, (%0, %%"
FF_REG_c
")
\n\t
"
// L7
"4:
\n\t
"
::
"r"
(
src
),
"r"
(
tempBlurred
),
"r"
((
x86_reg
)
stride
),
"m"
(
tempBlurredPast
)
NAMED_CONSTRAINTS_ADD
(
b80
)
:
"%"
REG_a
,
"%"
REG_d
,
"%"
REG_c
,
"memory"
:
"%"
FF_REG_a
,
"%"
FF_REG_d
,
"%"
FF_
REG_c
,
"memory"
);
#else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS
{
...
...
@@ -2556,19 +2556,19 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
);
__asm__
volatile
(
"lea (%2, %3), %%"
REG_a
"
\n\t
"
"lea (%2, %3), %%"
FF_REG_a
"
\n\t
"
// 0 1 2 3 4 5 6 7 8 9
// %1 eax eax+%2 eax+2%2 %1+4%2 ecx ecx+%2 ecx+2%2 %1+8%2 ecx+4%2
"movq (%2), %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
"movq %%mm1, %%mm3
\n\t
"
"movq %%mm1, %%mm4
\n\t
"
"psubb %%mm1, %%mm0
\n\t
"
// mm0 = difference
"paddb %%mm7, %%mm0
\n\t
"
"pcmpgtb %%mm6, %%mm0
\n\t
"
"movq (%%"
REG_a
",%3), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
",%3), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
PMINUB
(
%%
mm2
,
%%
mm3
,
%%
mm5
)
"psubb %%mm2, %%mm1
\n\t
"
...
...
@@ -2576,7 +2576,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 2), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -2584,7 +2584,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm2
\n\t
"
"paddb %%mm2, %%mm0
\n\t
"
"lea (%%"
REG_a
", %3, 4), %%"
REG_a
"
\n\t
"
"lea (%%"
FF_REG_a
", %3, 4), %%"
FF_REG_a
"
\n\t
"
"movq (%2, %3, 4), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
...
...
@@ -2594,7 +2594,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -2602,7 +2602,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm2
\n\t
"
"paddb %%mm2, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
", %3), %%mm2
\n\t
"
PMAXUB
(
%%
mm2
,
%%
mm4
)
PMINUB
(
%%
mm2
,
%%
mm3
,
%%
mm5
)
"psubb %%mm2, %%mm1
\n\t
"
...
...
@@ -2610,7 +2610,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 2), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 2), %%mm1
\n\t
"
PMAXUB
(
%%
mm1
,
%%
mm4
)
PMINUB
(
%%
mm1
,
%%
mm3
,
%%
mm5
)
"psubb %%mm1, %%mm2
\n\t
"
...
...
@@ -2626,7 +2626,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"pcmpgtb %%mm6, %%mm1
\n\t
"
"paddb %%mm1, %%mm0
\n\t
"
"movq (%%"
REG_a
", %3, 4), %%mm1
\n\t
"
"movq (%%"
FF_REG_a
", %3, 4), %%mm1
\n\t
"
"psubb %%mm1, %%mm2
\n\t
"
"paddb %%mm7, %%mm2
\n\t
"
"pcmpgtb %%mm6, %%mm2
\n\t
"
...
...
@@ -2651,7 +2651,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
:
"=m"
(
eq_mask
),
"=m"
(
dc_mask
)
:
"r"
(
src
),
"r"
((
x86_reg
)
step
),
"m"
(
c
->
pQPb
),
"m"
(
c
->
ppMode
.
flatnessThreshold
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
both_masks
=
dc_mask
&
eq_mask
;
...
...
@@ -2851,12 +2851,12 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"punpckhbw %%mm7, %%mm1
\n\t
"
// high part of line 0
"movq (%0, %1), %%mm2
\n\t
"
"lea (%0, %1, 2), %%"
REG_a
"
\n\t
"
"lea (%0, %1, 2), %%"
FF_REG_a
"
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
// low part of line 1
"punpckhbw %%mm7, %%mm3
\n\t
"
// high part of line 1
"movq (%%"
REG_a
"), %%mm4
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm4
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
// low part of line 2
"punpckhbw %%mm7, %%mm5
\n\t
"
// high part of line 2
...
...
@@ -2873,7 +2873,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm2, %%mm0
\n\t
"
// 2L0 - 5L1 + 5L2
"psubw %%mm3, %%mm1
\n\t
"
// 2H0 - 5H1 + 5H2
"movq (%%"
REG_a
", %1), %%mm2
\n\t
"
"movq (%%"
FF_REG_a
", %1), %%mm2
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
// L3
"punpckhbw %%mm7, %%mm3
\n\t
"
// H3
...
...
@@ -2885,7 +2885,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"movq %%mm0, (%4)
\n\t
"
// 2L0 - 5L1 + 5L2 - 2L3
"movq %%mm1, 8(%4)
\n\t
"
// 2H0 - 5H1 + 5H2 - 2H3
"movq (%%"
REG_a
", %1, 2), %%mm0
\n\t
"
"movq (%%"
FF_REG_a
", %1, 2), %%mm0
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
// L4
"punpckhbw %%mm7, %%mm1
\n\t
"
// H4
...
...
@@ -2899,7 +2899,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - L3 + L4
"psubw %%mm3, %%mm5
\n\t
"
// 2H2 - H3 + H4
"lea (%%"
REG_a
", %1), %0
\n\t
"
"lea (%%"
FF_REG_a
", %1), %0
\n\t
"
"psllw $2, %%mm2
\n\t
"
// 4L3 - 4L4
"psllw $2, %%mm3
\n\t
"
// 4H3 - 4H4
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - 5L3 + 5L4
...
...
@@ -2914,10 +2914,10 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
"psubw %%mm2, %%mm4
\n\t
"
// 2L2 - 5L3 + 5L4 - 2L5
"psubw %%mm3, %%mm5
\n\t
"
// 2H2 - 5H3 + 5H4 - 2H5
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
"punpcklbw %%mm7, %%mm6
\n\t
"
// L6
"psubw %%mm6, %%mm2
\n\t
"
// L5 - L6
"movq (%%"
REG_a
", %1, 4), %%mm6
\n\t
"
"movq (%%"
FF_REG_a
", %1, 4), %%mm6
\n\t
"
"punpckhbw %%mm7, %%mm6
\n\t
"
// H6
"psubw %%mm6, %%mm3
\n\t
"
// H5 - H6
...
...
@@ -3068,7 +3068,7 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st
:
"+r"
(
temp_src
)
:
"r"
((
x86_reg
)
step
),
"m"
(
c
->
pQPb
),
"m"
(
eq_mask
),
"r"
(
tmp
)
NAMED_CONSTRAINTS_ADD
(
w05
,
w20
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
}
/*if(step==16){
...
...
@@ -3099,10 +3099,10 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
if
(
levelFix
){
#if TEMPLATE_PP_MMX && HAVE_6REGS
__asm__
volatile
(
"movq (%%"
REG_a
"), %%mm2
\n\t
"
// packedYOffset
"movq 8(%%"
REG_a
"), %%mm3
\n\t
"
// packedYScale
"lea (%2,%4), %%"
REG_a
"
\n\t
"
"lea (%3,%5), %%"
REG_d
"
\n\t
"
"movq (%%"
FF_REG_a
"), %%mm2
\n\t
"
// packedYOffset
"movq 8(%%"
FF_REG_a
"), %%mm3
\n\t
"
// packedYScale
"lea (%2,%4), %%"
FF_REG_a
"
\n\t
"
"lea (%3,%5), %%"
FF_REG_d
"
\n\t
"
"pxor %%mm4, %%mm4
\n\t
"
#if TEMPLATE_PP_MMXEXT
#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \
...
...
@@ -3159,11 +3159,11 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t
REAL_SCALED_CPY(src1, src2, dst1, dst2)
SCALED_CPY
((
%
2
)
,
(
%
2
,
%
4
)
,
(
%
3
)
,
(
%
3
,
%
5
))
SCALED_CPY
((
%
2
,
%
4
,
2
),
(
%%
REGa
,
%
4
,
2
),
(
%
3
,
%
5
,
2
),
(
%%
REGd
,
%
5
,
2
))
SCALED_CPY
((
%
2
,
%
4
,
4
),
(
%%
REGa
,
%
4
,
4
),
(
%
3
,
%
5
,
4
),
(
%%
REGd
,
%
5
,
4
))
"lea (%%"
REG_a
",%4,4), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_d
",%5,4), %%"
REG_d
"
\n\t
"
SCALED_CPY
((
%%
REGa
,
%
4
),
(
%%
REGa
,
%
4
,
2
),
(
%%
REGd
,
%
5
),
(
%%
REGd
,
%
5
,
2
))
SCALED_CPY
((
%
2
,
%
4
,
2
),
(
%%
FF_REGa
,
%
4
,
2
),
(
%
3
,
%
5
,
2
),
(
%%
FF_
REGd
,
%
5
,
2
))
SCALED_CPY
((
%
2
,
%
4
,
4
),
(
%%
FF_REGa
,
%
4
,
4
),
(
%
3
,
%
5
,
4
),
(
%%
FF_
REGd
,
%
5
,
4
))
"lea (%%"
FF_REG_a
",%4,4), %%"
FF_
REG_a
"
\n\t
"
"lea (%%"
FF_REG_d
",%5,4), %%"
FF_
REG_d
"
\n\t
"
SCALED_CPY
((
%%
FF_REGa
,
%
4
),
(
%%
FF_REGa
,
%
4
,
2
),
(
%%
FF_REGd
,
%
5
),
(
%%
FF_
REGd
,
%
5
,
2
))
:
"=&a"
(
packedOffsetAndScale
)
...
...
@@ -3172,7 +3172,7 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
"r"
(
dst
),
"r"
((
x86_reg
)
srcStride
),
"r"
((
x86_reg
)
dstStride
)
:
"%"
REG_d
:
"%"
FF_
REG_d
);
#else //TEMPLATE_PP_MMX && HAVE_6REGS
for
(
i
=
0
;
i
<
8
;
i
++
)
...
...
@@ -3182,8 +3182,8 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
}
else
{
#if TEMPLATE_PP_MMX && HAVE_6REGS
__asm__
volatile
(
"lea (%0,%2), %%"
REG_a
"
\n\t
"
"lea (%1,%3), %%"
REG_d
"
\n\t
"
"lea (%0,%2), %%"
FF_REG_a
"
\n\t
"
"lea (%1,%3), %%"
FF_REG_d
"
\n\t
"
#define REAL_SIMPLE_CPY(src1, src2, dst1, dst2) \
"movq " #src1 ", %%mm0 \n\t"\
...
...
@@ -3195,17 +3195,17 @@ SCALED_CPY((%%REGa, %4), (%%REGa, %4, 2), (%%REGd, %5), (%%REGd, %5, 2))
REAL_SIMPLE_CPY(src1, src2, dst1, dst2)
SIMPLE_CPY
((
%
0
)
,
(
%
0
,
%
2
)
,
(
%
1
)
,
(
%
1
,
%
3
))
SIMPLE_CPY
((
%
0
,
%
2
,
2
),
(
%%
REGa
,
%
2
,
2
),
(
%
1
,
%
3
,
2
),
(
%%
REGd
,
%
3
,
2
))
SIMPLE_CPY
((
%
0
,
%
2
,
4
),
(
%%
REGa
,
%
2
,
4
),
(
%
1
,
%
3
,
4
),
(
%%
REGd
,
%
3
,
4
))
"lea (%%"
REG_a
",%2,4), %%"
REG_a
"
\n\t
"
"lea (%%"
REG_d
",%3,4), %%"
REG_d
"
\n\t
"
SIMPLE_CPY
((
%%
REGa
,
%
2
),
(
%%
REGa
,
%
2
,
2
),
(
%%
REGd
,
%
3
),
(
%%
REGd
,
%
3
,
2
))
SIMPLE_CPY
((
%
0
,
%
2
,
2
),
(
%%
FF_REGa
,
%
2
,
2
),
(
%
1
,
%
3
,
2
),
(
%%
FF_
REGd
,
%
3
,
2
))
SIMPLE_CPY
((
%
0
,
%
2
,
4
),
(
%%
FF_REGa
,
%
2
,
4
),
(
%
1
,
%
3
,
4
),
(
%%
FF_
REGd
,
%
3
,
4
))
"lea (%%"
FF_REG_a
",%2,4), %%"
FF_
REG_a
"
\n\t
"
"lea (%%"
FF_REG_d
",%3,4), %%"
FF_
REG_d
"
\n\t
"
SIMPLE_CPY
((
%%
FF_REGa
,
%
2
),
(
%%
FF_REGa
,
%
2
,
2
),
(
%%
FF_REGd
,
%
3
),
(
%%
FF_
REGd
,
%
3
,
2
))
:
:
"r"
(
src
),
"r"
(
dst
),
"r"
((
x86_reg
)
srcStride
),
"r"
((
x86_reg
)
dstStride
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
#else //TEMPLATE_PP_MMX && HAVE_6REGS
for
(
i
=
0
;
i
<
8
;
i
++
)
...
...
libswscale/x86/hscale_fast_bilinear_simd.c
View file @
9eb3da2f
...
...
@@ -55,9 +55,9 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"jmp 9f
\n\t
"
// Begin
"0:
\n\t
"
"movq (%%"
REG_d
", %%"
REG_a
"), %%mm3
\n\t
"
"movd (%%"
REG_c
", %%"
REG_S
"), %%mm0
\n\t
"
"movd 1(%%"
REG_c
", %%"
REG_S
"), %%mm1
\n\t
"
"movq (%%"
FF_REG_d
", %%"
FF_REG_a
"), %%mm3
\n\t
"
"movd (%%"
FF_REG_c
", %%"
FF_REG_S
"), %%mm0
\n\t
"
"movd 1(%%"
FF_REG_c
", %%"
FF_REG_S
"), %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"pshufw $0xFF, %%mm1, %%mm1
\n\t
"
...
...
@@ -65,14 +65,14 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"pshufw $0xFF, %%mm0, %%mm0
\n\t
"
"2:
\n\t
"
"psubw %%mm1, %%mm0
\n\t
"
"movl 8(%%"
REG_b
", %%"
REG_a
"), %%esi
\n\t
"
"movl 8(%%"
FF_REG_b
", %%"
FF_REG_a
"), %%esi
\n\t
"
"pmullw %%mm3, %%mm0
\n\t
"
"psllw $7, %%mm1
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_D
", %%"
REG_a
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_D
", %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
// End
"9:
\n\t
"
"lea "
LOCAL_MANGLE
(
0
b
)
", %0
\n\t
"
...
...
@@ -94,22 +94,22 @@ av_cold int ff_init_hscaler_mmxext(int dstW, int xInc, uint8_t *filterCode,
"jmp 9f
\n\t
"
// Begin
"0:
\n\t
"
"movq (%%"
REG_d
", %%"
REG_a
"), %%mm3
\n\t
"
"movd (%%"
REG_c
", %%"
REG_S
"), %%mm0
\n\t
"
"movq (%%"
FF_REG_d
", %%"
FF_REG_a
"), %%mm3
\n\t
"
"movd (%%"
FF_REG_c
", %%"
FF_REG_S
"), %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"pshufw $0xFF, %%mm0, %%mm1
\n\t
"
"1:
\n\t
"
"pshufw $0xFF, %%mm0, %%mm0
\n\t
"
"2:
\n\t
"
"psubw %%mm1, %%mm0
\n\t
"
"movl 8(%%"
REG_b
", %%"
REG_a
"), %%esi
\n\t
"
"movl 8(%%"
FF_REG_b
", %%"
FF_REG_a
"), %%esi
\n\t
"
"pmullw %%mm3, %%mm0
\n\t
"
"psllw $7, %%mm1
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"movq %%mm0, (%%"
REG_D
", %%"
REG_a
")
\n\t
"
"movq %%mm0, (%%"
FF_REG_D
", %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
// End
"9:
\n\t
"
"lea "
LOCAL_MANGLE
(
0
b
)
", %0
\n\t
"
...
...
@@ -206,39 +206,39 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
__asm__
volatile
(
#if ARCH_X86_64
"mov -8(%%rsp), %%"
REG_a
"
\n\t
"
"mov
%%"
REG_a
", %5
\n\t
"
// retsave
"mov -8(%%rsp), %%"
FF_REG_a
"
\n\t
"
"mov
%%"
FF_REG_a
", %5
\n\t
"
// retsave
#else
#if defined(PIC)
"mov
%%"
REG_b
", %5
\n\t
"
// ebxsave
"mov
%%"
FF_REG_b
", %5
\n\t
"
// ebxsave
#endif
#endif
"pxor %%mm7, %%mm7
\n\t
"
"mov %0, %%"
REG_c
"
\n\t
"
"mov %1, %%"
REG_D
"
\n\t
"
"mov %2, %%"
REG_d
"
\n\t
"
"mov %3, %%"
REG_b
"
\n\t
"
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
// i
PREFETCH
"
(%%"
REG_c
")
\n\t
"
PREFETCH
"
32(%%"
REG_c
")
\n\t
"
PREFETCH
"
64(%%"
REG_c
")
\n\t
"
"mov %0, %%"
FF_REG_c
"
\n\t
"
"mov %1, %%"
FF_REG_D
"
\n\t
"
"mov %2, %%"
FF_REG_d
"
\n\t
"
"mov %3, %%"
FF_REG_b
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
// i
PREFETCH
"
(%%"
FF_REG_c
")
\n\t
"
PREFETCH
"
32(%%"
FF_REG_c
")
\n\t
"
PREFETCH
"
64(%%"
FF_REG_c
")
\n\t
"
#if ARCH_X86_64
#define CALL_MMXEXT_FILTER_CODE \
"movl
(%%"REG_b"), %%esi
\n\t"\
"movl
(%%"FF_REG_b"), %%esi
\n\t"\
"call *%4 \n\t"\
"movl (%%"
REG_b", %%"REG_a"), %%esi
\n\t"\
"add
%%"REG_S", %%"
REG_c" \n\t"\
"add
%%"REG_a", %%"
REG_D" \n\t"\
"xor
%%"REG_a", %%"
REG_a" \n\t"\
"movl (%%"
FF_REG_b", %%"FF_REG_a"), %%esi
\n\t"\
"add
%%"FF_REG_S", %%"FF_
REG_c" \n\t"\
"add
%%"FF_REG_a", %%"FF_
REG_D" \n\t"\
"xor
%%"FF_REG_a", %%"FF_
REG_a" \n\t"\
#else
#define CALL_MMXEXT_FILTER_CODE \
"movl
(%%"
REG_b"), %%esi \n\t"\
"movl
(%%"FF_
REG_b"), %%esi \n\t"\
"call *%4 \n\t"\
"addl (%%"
REG_b", %%"REG_a"), %%"
REG_c" \n\t"\
"add
%%"REG_a", %%"
REG_D" \n\t"\
"xor
%%"REG_a", %%"
REG_a" \n\t"\
"addl (%%"
FF_REG_b", %%"FF_REG_a"), %%"FF_
REG_c" \n\t"\
"add
%%"FF_REG_a", %%"FF_
REG_D" \n\t"\
"xor
%%"FF_REG_a", %%"FF_
REG_a" \n\t"\
#endif
/* ARCH_X86_64 */
...
...
@@ -252,11 +252,11 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
CALL_MMXEXT_FILTER_CODE
#if ARCH_X86_64
"mov %5, %%"
REG_a
"
\n\t
"
"mov
%%"
REG_a
", -8(%%rsp)
\n\t
"
"mov %5, %%"
FF_
REG_a
"
\n\t
"
"mov
%%"
FF_REG_a
", -8(%%rsp)
\n\t
"
#else
#if defined(PIC)
"mov %5, %%"
REG_b
"
\n\t
"
"mov %5, %%"
FF_
REG_b
"
\n\t
"
#endif
#endif
::
"m"
(
src
),
"m"
(
dst
),
"m"
(
filter
),
"m"
(
filterPos
),
...
...
@@ -268,9 +268,9 @@ void ff_hyscale_fast_mmxext(SwsContext *c, int16_t *dst,
,
"m"
(
ebxsave
)
#endif
#endif
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_D
:
"%"
FF_REG_a
,
"%"
FF_REG_c
,
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_D
#if ARCH_X86_64 || !defined(PIC)
,
"%"
REG_b
,
"%"
FF_
REG_b
#endif
);
...
...
@@ -295,33 +295,33 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
#endif
__asm__
volatile
(
#if ARCH_X86_64
"mov -8(%%rsp), %%"
REG_a
"
\n\t
"
"mov
%%"
REG_a
", %7
\n\t
"
// retsave
"mov -8(%%rsp), %%"
FF_REG_a
"
\n\t
"
"mov
%%"
FF_REG_a
", %7
\n\t
"
// retsave
#else
#if defined(PIC)
"mov
%%"
REG_b
", %7
\n\t
"
// ebxsave
"mov
%%"
FF_REG_b
", %7
\n\t
"
// ebxsave
#endif
#endif
"pxor %%mm7, %%mm7
\n\t
"
"mov %0, %%"
REG_c
"
\n\t
"
"mov %1, %%"
REG_D
"
\n\t
"
"mov %2, %%"
REG_d
"
\n\t
"
"mov %3, %%"
REG_b
"
\n\t
"
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
// i
PREFETCH
" (%%"
REG_c
")
\n\t
"
PREFETCH
" 32(%%"
REG_c
")
\n\t
"
PREFETCH
" 64(%%"
REG_c
")
\n\t
"
"mov %0, %%"
FF_REG_c
"
\n\t
"
"mov %1, %%"
FF_REG_D
"
\n\t
"
"mov %2, %%"
FF_REG_d
"
\n\t
"
"mov %3, %%"
FF_REG_b
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
// i
PREFETCH
" (%%"
FF_REG_c
")
\n\t
"
PREFETCH
" 32(%%"
FF_REG_c
")
\n\t
"
PREFETCH
" 64(%%"
FF_REG_c
")
\n\t
"
CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
// i
"mov
%5, %%"
REG_c
"
\n\t
"
// src2
"mov
%6, %%"
REG_D
"
\n\t
"
// dst2
PREFETCH
" (%%"
REG_c
")
\n\t
"
PREFETCH
" 32(%%"
REG_c
")
\n\t
"
PREFETCH
" 64(%%"
REG_c
")
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
// i
"mov
%5, %%"
FF_REG_c
"
\n\t
"
// src2
"mov
%6, %%"
FF_REG_D
"
\n\t
"
// dst2
PREFETCH
" (%%"
FF_REG_c
")
\n\t
"
PREFETCH
" 32(%%"
FF_REG_c
")
\n\t
"
PREFETCH
" 64(%%"
FF_REG_c
")
\n\t
"
CALL_MMXEXT_FILTER_CODE
CALL_MMXEXT_FILTER_CODE
...
...
@@ -329,11 +329,11 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
CALL_MMXEXT_FILTER_CODE
#if ARCH_X86_64
"mov
%7, %%"
REG_a
"
\n\t
"
"mov %%"
REG_a
", -8(%%rsp)
\n\t
"
"mov
%7, %%"
FF_REG_a
"
\n\t
"
"mov %%"
FF_REG_a
", -8(%%rsp)
\n\t
"
#else
#if defined(PIC)
"mov %7, %%"
REG_b
"
\n\t
"
"mov %7, %%"
FF_
REG_b
"
\n\t
"
#endif
#endif
::
"m"
(
src1
),
"m"
(
dst1
),
"m"
(
filter
),
"m"
(
filterPos
),
...
...
@@ -345,9 +345,9 @@ void ff_hcscale_fast_mmxext(SwsContext *c, int16_t *dst1, int16_t *dst2,
,
"m"
(
ebxsave
)
#endif
#endif
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_D
:
"%"
FF_REG_a
,
"%"
FF_REG_c
,
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_D
#if ARCH_X86_64 || !defined(PIC)
,
"%"
REG_b
,
"%"
FF_
REG_b
#endif
);
...
...
libswscale/x86/rgb2rgb_template.c
View file @
9eb3da2f
...
...
@@ -1101,43 +1101,43 @@ static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, int sr
unsigned
i
;
x86_reg
mmx_size
=
23
-
src_size
;
__asm__
volatile
(
"test %%"
REG_a
", %%"
REG_a
"
\n\t
"
"test %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"jns 2f
\n\t
"
"movq "
MANGLE
(
mask24r
)
", %%mm5
\n\t
"
"movq "
MANGLE
(
mask24g
)
", %%mm6
\n\t
"
"movq "
MANGLE
(
mask24b
)
", %%mm7
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 32(%1, %%"
REG_a
")
\n\t
"
"movq
(%1, %%"
REG_a
"), %%mm0
\n\t
"
// BGR BGR BG
"movq
(%1, %%"
REG_a
"), %%mm1
\n\t
"
// BGR BGR BG
"movq
2(%1, %%"
REG_a
"), %%mm2
\n\t
"
// R BGR BGR B
PREFETCH
" 32(%1, %%"
FF_REG_a
")
\n\t
"
"movq
(%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// BGR BGR BG
"movq
(%1, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// BGR BGR BG
"movq
2(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// R BGR BGR B
"psllq $16, %%mm0
\n\t
"
// 00 BGR BGR
"pand %%mm5, %%mm0
\n\t
"
"pand %%mm6, %%mm1
\n\t
"
"pand %%mm7, %%mm2
\n\t
"
"por %%mm0, %%mm1
\n\t
"
"por %%mm2, %%mm1
\n\t
"
"movq
6(%1, %%"
REG_a
"), %%mm0
\n\t
"
// BGR BGR BG
MOVNTQ
" %%mm1,
(%2, %%"
REG_a
")
\n\t
"
// RGB RGB RG
"movq
8(%1, %%"
REG_a
"), %%mm1
\n\t
"
// R BGR BGR B
"movq
10(%1, %%"
REG_a
"), %%mm2
\n\t
"
// GR BGR BGR
"movq
6(%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// BGR BGR BG
MOVNTQ
" %%mm1,
(%2, %%"
FF_
REG_a
")
\n\t
"
// RGB RGB RG
"movq
8(%1, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// R BGR BGR B
"movq
10(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// GR BGR BGR
"pand %%mm7, %%mm0
\n\t
"
"pand %%mm5, %%mm1
\n\t
"
"pand %%mm6, %%mm2
\n\t
"
"por %%mm0, %%mm1
\n\t
"
"por %%mm2, %%mm1
\n\t
"
"movq
14(%1, %%"
REG_a
"), %%mm0
\n\t
"
// R BGR BGR B
MOVNTQ
" %%mm1,
8(%2, %%"
REG_a
")
\n\t
"
// B RGB RGB R
"movq
16(%1, %%"
REG_a
"), %%mm1
\n\t
"
// GR BGR BGR
"movq
18(%1, %%"
REG_a
"), %%mm2
\n\t
"
// BGR BGR BG
"movq
14(%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// R BGR BGR B
MOVNTQ
" %%mm1,
8(%2, %%"
FF_REG_a
")
\n\t
"
// B RGB RGB R
"movq
16(%1, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// GR BGR BGR
"movq
18(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
// BGR BGR BG
"pand %%mm6, %%mm0
\n\t
"
"pand %%mm7, %%mm1
\n\t
"
"pand %%mm5, %%mm2
\n\t
"
"por %%mm0, %%mm1
\n\t
"
"por %%mm2, %%mm1
\n\t
"
MOVNTQ
" %%mm1, 16(%2, %%"
REG_a
")
\n\t
"
"add $24, %%"
REG_a
"
\n\t
"
MOVNTQ
" %%mm1, 16(%2, %%"
FF_REG_a
")
\n\t
"
"add $24, %%"
FF_REG_a
"
\n\t
"
" js 1b
\n\t
"
"2:
\n\t
"
:
"+a"
(
mmx_size
)
...
...
@@ -1173,20 +1173,20 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
for
(
y
=
0
;
y
<
height
;
y
++
)
{
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__
volatile
(
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
"
32(%1, %%"
REG_a
", 2)
\n\t
"
PREFETCH
"
32(%2, %%"
REG_a
")
\n\t
"
PREFETCH
"
32(%3, %%"
REG_a
")
\n\t
"
"movq
(%2, %%"
REG_a
"), %%mm0
\n\t
"
// U(0)
PREFETCH
"
32(%1, %%"
FF_
REG_a
", 2)
\n\t
"
PREFETCH
"
32(%2, %%"
FF_
REG_a
")
\n\t
"
PREFETCH
"
32(%3, %%"
FF_
REG_a
")
\n\t
"
"movq
(%2, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// U(0)
"movq %%mm0, %%mm2
\n\t
"
// U(0)
"movq
(%3, %%"
REG_a
"), %%mm1
\n\t
"
// V(0)
"movq
(%3, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// V(0)
"punpcklbw %%mm1, %%mm0
\n\t
"
// UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2
\n\t
"
// UVUV UVUV(8)
"movq
(%1, %%"
REG_a
",2), %%mm3
\n\t
"
// Y(0)
"movq
8(%1, %%"
REG_a
",2), %%mm5
\n\t
"
// Y(8)
"movq
(%1, %%"
FF_
REG_a
",2), %%mm3
\n\t
"
// Y(0)
"movq
8(%1, %%"
FF_
REG_a
",2), %%mm5
\n\t
"
// Y(8)
"movq %%mm3, %%mm4
\n\t
"
// Y(0)
"movq %%mm5, %%mm6
\n\t
"
// Y(8)
"punpcklbw %%mm0, %%mm3
\n\t
"
// YUYV YUYV(0)
...
...
@@ -1194,16 +1194,16 @@ static inline void RENAME(yuvPlanartoyuy2)(const uint8_t *ysrc, const uint8_t *u
"punpcklbw %%mm2, %%mm5
\n\t
"
// YUYV YUYV(8)
"punpckhbw %%mm2, %%mm6
\n\t
"
// YUYV YUYV(12)
MOVNTQ
" %%mm3, (%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm4, 8(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm5, 16(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm6, 24(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm3, (%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm4, 8(%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm5, 16(%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm6, 24(%0, %%"
FF_
REG_a
", 4)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
dst
),
"r"
(
ysrc
),
"r"
(
usrc
),
"r"
(
vsrc
),
"g"
(
chromWidth
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
((
y
&
(
vertLumPerChroma
-
1
))
==
vertLumPerChroma
-
1
)
{
usrc
+=
chromStride
;
...
...
@@ -1238,20 +1238,20 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
for
(
y
=
0
;
y
<
height
;
y
++
)
{
//FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
"
32(%1, %%"
REG_a
", 2)
\n\t
"
PREFETCH
"
32(%2, %%"
REG_a
")
\n\t
"
PREFETCH
"
32(%3, %%"
REG_a
")
\n\t
"
"movq
(%2, %%"
REG_a
"), %%mm0
\n\t
"
// U(0)
PREFETCH
"
32(%1, %%"
FF_REG_a
", 2)
\n\t
"
PREFETCH
"
32(%2, %%"
FF_REG_a
")
\n\t
"
PREFETCH
"
32(%3, %%"
FF_REG_a
")
\n\t
"
"movq
(%2, %%"
FF_
REG_a
"), %%mm0
\n\t
"
// U(0)
"movq %%mm0, %%mm2
\n\t
"
// U(0)
"movq
(%3, %%"
REG_a
"), %%mm1
\n\t
"
// V(0)
"movq
(%3, %%"
FF_
REG_a
"), %%mm1
\n\t
"
// V(0)
"punpcklbw %%mm1, %%mm0
\n\t
"
// UVUV UVUV(0)
"punpckhbw %%mm1, %%mm2
\n\t
"
// UVUV UVUV(8)
"movq
(%1, %%"
REG_a
",2), %%mm3
\n\t
"
// Y(0)
"movq
8(%1, %%"
REG_a
",2), %%mm5
\n\t
"
// Y(8)
"movq
(%1, %%"
FF_
REG_a
",2), %%mm3
\n\t
"
// Y(0)
"movq
8(%1, %%"
FF_
REG_a
",2), %%mm5
\n\t
"
// Y(8)
"movq %%mm0, %%mm4
\n\t
"
// Y(0)
"movq %%mm2, %%mm6
\n\t
"
// Y(8)
"punpcklbw %%mm3, %%mm0
\n\t
"
// YUYV YUYV(0)
...
...
@@ -1259,16 +1259,16 @@ static inline void RENAME(yuvPlanartouyvy)(const uint8_t *ysrc, const uint8_t *u
"punpcklbw %%mm5, %%mm2
\n\t
"
// YUYV YUYV(8)
"punpckhbw %%mm5, %%mm6
\n\t
"
// YUYV YUYV(12)
MOVNTQ
" %%mm0, (%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm4, 8(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm2, 16(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm6, 24(%0, %%"
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm0, (%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm4, 8(%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm2, 16(%0, %%"
FF_
REG_a
", 4)
\n\t
"
MOVNTQ
" %%mm6, 24(%0, %%"
FF_
REG_a
", 4)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
dst
),
"r"
(
ysrc
),
"r"
(
usrc
),
"r"
(
vsrc
),
"g"
(
chromWidth
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
if
((
y
&
(
vertLumPerChroma
-
1
))
==
vertLumPerChroma
-
1
)
{
usrc
+=
chromStride
;
...
...
@@ -1326,14 +1326,14 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
const
x86_reg
chromWidth
=
width
>>
1
;
for
(
y
=
0
;
y
<
height
;
y
+=
2
)
{
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"pcmpeqw %%mm7, %%mm7
\n\t
"
"psrlw $8, %%mm7
\n\t
"
// FF,00,FF,00...
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0, %%"
REG_a
", 4)
\n\t
"
"movq
(%0, %%"
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq
8(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
PREFETCH
" 64(%0, %%"
FF_REG_a
", 4)
\n\t
"
"movq
(%0, %%"
FF_
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq
8(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq %%mm0, %%mm2
\n\t
"
// YUYV YUYV(0)
"movq %%mm1, %%mm3
\n\t
"
// YUYV YUYV(4)
"psrlw $8, %%mm0
\n\t
"
// U0V0 U0V0(0)
...
...
@@ -1343,10 +1343,10 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// UVUV UVUV(0)
"packuswb %%mm3, %%mm2
\n\t
"
// YYYY YYYY(0)
MOVNTQ
" %%mm2, (%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, (%1, %%"
FF_REG_a
", 2)
\n\t
"
"movq
16(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(8)
"movq
24(%0, %%"
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(12)
"movq
16(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(8)
"movq
24(%0, %%"
FF_
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(12)
"movq %%mm1, %%mm3
\n\t
"
// YUYV YUYV(8)
"movq %%mm2, %%mm4
\n\t
"
// YUYV YUYV(12)
"psrlw $8, %%mm1
\n\t
"
// U0V0 U0V0(8)
...
...
@@ -1356,7 +1356,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm2, %%mm1
\n\t
"
// UVUV UVUV(8)
"packuswb %%mm4, %%mm3
\n\t
"
// YYYY YYYY(8)
MOVNTQ
" %%mm3, 8(%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm3, 8(%1, %%"
FF_
REG_a
", 2)
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
// UVUV UVUV(0)
"movq %%mm1, %%mm3
\n\t
"
// UVUV UVUV(8)
...
...
@@ -1367,28 +1367,28 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// VVVV VVVV(0)
"packuswb %%mm3, %%mm2
\n\t
"
// UUUU UUUU(0)
MOVNTQ
" %%mm0, (%3, %%"
REG_a
")
\n\t
"
MOVNTQ
" %%mm2, (%2, %%"
REG_a
")
\n\t
"
MOVNTQ
" %%mm0, (%3, %%"
FF_
REG_a
")
\n\t
"
MOVNTQ
" %%mm2, (%2, %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
src
),
"r"
(
ydst
),
"r"
(
udst
),
"r"
(
vdst
),
"g"
(
chromWidth
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
ydst
+=
lumStride
;
src
+=
srcStride
;
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0, %%"
REG_a
", 4)
\n\t
"
"movq
(%0, %%"
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq
8(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq
16(%0, %%"
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(8)
"movq
24(%0, %%"
REG_a
", 4), %%mm3
\n\t
"
// YUYV YUYV(12)
PREFETCH
" 64(%0, %%"
FF_REG_a
", 4)
\n\t
"
"movq
(%0, %%"
FF_
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq
8(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq
16(%0, %%"
FF_
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(8)
"movq
24(%0, %%"
FF_
REG_a
", 4), %%mm3
\n\t
"
// YUYV YUYV(12)
"pand %%mm7, %%mm0
\n\t
"
// Y0Y0 Y0Y0(0)
"pand %%mm7, %%mm1
\n\t
"
// Y0Y0 Y0Y0(4)
"pand %%mm7, %%mm2
\n\t
"
// Y0Y0 Y0Y0(8)
...
...
@@ -1396,15 +1396,15 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// YYYY YYYY(0)
"packuswb %%mm3, %%mm2
\n\t
"
// YYYY YYYY(8)
MOVNTQ
" %%mm0, (%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 8(%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm0, (%1, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 8(%1, %%"
FF_
REG_a
", 2)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
src
),
"r"
(
ydst
),
"r"
(
udst
),
"r"
(
vdst
),
"g"
(
chromWidth
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
udst
+=
chromStride
;
vdst
+=
chromStride
;
...
...
@@ -1438,23 +1438,23 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
if
(
mmxSize
)
{
__asm__
volatile
(
"mov
%4, %%"
REG_a
"
\n\t
"
"mov
%4, %%"
FF_REG_a
"
\n\t
"
"movq "
MANGLE
(
mmx_ff
)
", %%mm0
\n\t
"
"movq
(%0, %%"
REG_a
"), %%mm4
\n\t
"
"movq
(%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq %%mm4, %%mm2
\n\t
"
"psllq $8, %%mm4
\n\t
"
"pand %%mm0, %%mm2
\n\t
"
"por %%mm2, %%mm4
\n\t
"
"movq
(%1, %%"
REG_a
"), %%mm5
\n\t
"
"movq
(%1, %%"
FF_
REG_a
"), %%mm5
\n\t
"
"movq %%mm5, %%mm3
\n\t
"
"psllq $8, %%mm5
\n\t
"
"pand %%mm0, %%mm3
\n\t
"
"por %%mm3, %%mm5
\n\t
"
"1:
\n\t
"
"movq
(%0, %%"
REG_a
"), %%mm0
\n\t
"
"movq
(%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq
1(%0, %%"
REG_a
"), %%mm2
\n\t
"
"movq
1(%1, %%"
REG_a
"), %%mm3
\n\t
"
"movq
(%0, %%"
FF_
REG_a
"), %%mm0
\n\t
"
"movq
(%1, %%"
FF_
REG_a
"), %%mm1
\n\t
"
"movq
1(%0, %%"
FF_
REG_a
"), %%mm2
\n\t
"
"movq
1(%1, %%"
FF_
REG_a
"), %%mm3
\n\t
"
PAVGB
" %%mm0, %%mm5
\n\t
"
PAVGB
" %%mm0, %%mm3
\n\t
"
PAVGB
" %%mm0, %%mm5
\n\t
"
...
...
@@ -1469,19 +1469,19 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid
"punpckhbw %%mm3, %%mm7
\n\t
"
"punpcklbw %%mm2, %%mm4
\n\t
"
"punpckhbw %%mm2, %%mm6
\n\t
"
MOVNTQ
" %%mm5, (%2, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm7, 8(%2, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm4, (%3, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm6, 8(%3, %%"
REG_a
", 2)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"movq
-1(%0, %%"
REG_a
"), %%mm4
\n\t
"
"movq
-1(%1, %%"
REG_a
"), %%mm5
\n\t
"
MOVNTQ
" %%mm5, (%2, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm7, 8(%2, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm4, (%3, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm6, 8(%3, %%"
FF_
REG_a
", 2)
\n\t
"
"add $8, %%"
FF_
REG_a
"
\n\t
"
"movq
-1(%0, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq
-1(%1, %%"
FF_
REG_a
"), %%mm5
\n\t
"
" js 1b
\n\t
"
::
"r"
(
src
+
mmxSize
),
"r"
(
src
+
srcStride
+
mmxSize
),
"r"
(
dst
+
mmxSize
*
2
),
"r"
(
dst
+
dstStride
+
mmxSize
*
2
),
"g"
(
-
mmxSize
)
NAMED_CONSTRAINTS_ADD
(
mmx_ff
)
:
"%"
REG_a
:
"%"
FF_
REG_a
);
}
else
{
mmxSize
=
1
;
...
...
@@ -1532,14 +1532,14 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
const
x86_reg
chromWidth
=
width
>>
1
;
for
(
y
=
0
;
y
<
height
;
y
+=
2
)
{
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"pcmpeqw %%mm7, %%mm7
\n\t
"
"psrlw $8, %%mm7
\n\t
"
// FF,00,FF,00...
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0, %%"
REG_a
", 4)
\n\t
"
"movq (%0, %%"
REG_a
", 4), %%mm0
\n\t
"
// UYVY UYVY(0)
"movq 8(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// UYVY UYVY(4)
PREFETCH
" 64(%0, %%"
FF_
REG_a
", 4)
\n\t
"
"movq (%0, %%"
FF_
REG_a
", 4), %%mm0
\n\t
"
// UYVY UYVY(0)
"movq 8(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// UYVY UYVY(4)
"movq %%mm0, %%mm2
\n\t
"
// UYVY UYVY(0)
"movq %%mm1, %%mm3
\n\t
"
// UYVY UYVY(4)
"pand %%mm7, %%mm0
\n\t
"
// U0V0 U0V0(0)
...
...
@@ -1549,10 +1549,10 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// UVUV UVUV(0)
"packuswb %%mm3, %%mm2
\n\t
"
// YYYY YYYY(0)
MOVNTQ
" %%mm2, (%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, (%1, %%"
FF_
REG_a
", 2)
\n\t
"
"movq 16(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// UYVY UYVY(8)
"movq 24(%0, %%"
REG_a
", 4), %%mm2
\n\t
"
// UYVY UYVY(12)
"movq 16(%0, %%"
FF_
REG_a
", 4), %%mm1
\n\t
"
// UYVY UYVY(8)
"movq 24(%0, %%"
FF_
REG_a
", 4), %%mm2
\n\t
"
// UYVY UYVY(12)
"movq %%mm1, %%mm3
\n\t
"
// UYVY UYVY(8)
"movq %%mm2, %%mm4
\n\t
"
// UYVY UYVY(12)
"pand %%mm7, %%mm1
\n\t
"
// U0V0 U0V0(8)
...
...
@@ -1562,7 +1562,7 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm2, %%mm1
\n\t
"
// UVUV UVUV(8)
"packuswb %%mm4, %%mm3
\n\t
"
// YYYY YYYY(8)
MOVNTQ
" %%mm3, 8(%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm3, 8(%1, %%"
FF_
REG_a
", 2)
\n\t
"
"movq %%mm0, %%mm2
\n\t
"
// UVUV UVUV(0)
"movq %%mm1, %%mm3
\n\t
"
// UVUV UVUV(8)
...
...
@@ -1573,28 +1573,28 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// VVVV VVVV(0)
"packuswb %%mm3, %%mm2
\n\t
"
// UUUU UUUU(0)
MOVNTQ
" %%mm0, (%3, %%"
REG_a
")
\n\t
"
MOVNTQ
" %%mm2, (%2, %%"
REG_a
")
\n\t
"
MOVNTQ
" %%mm0, (%3, %%"
FF_
REG_a
")
\n\t
"
MOVNTQ
" %%mm2, (%2, %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
src
),
"r"
(
ydst
),
"r"
(
udst
),
"r"
(
vdst
),
"g"
(
chromWidth
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
ydst
+=
lumStride
;
src
+=
srcStride
;
__asm__
volatile
(
"xor
%%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor
%%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%0, %%"
REG_a
", 4)
\n\t
"
"movq (%0, %%"
REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq 8(%0, %%"
REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq 16(%0, %%"
REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(8)
"movq 24(%0, %%"
REG_a
", 4), %%mm3
\n\t
"
// YUYV YUYV(12)
PREFETCH
" 64(%0, %%"
FF_REG_a
", 4)
\n\t
"
"movq (%0, %%"
FF_REG_a
", 4), %%mm0
\n\t
"
// YUYV YUYV(0)
"movq 8(%0, %%"
FF_REG_a
", 4), %%mm1
\n\t
"
// YUYV YUYV(4)
"movq 16(%0, %%"
FF_REG_a
", 4), %%mm2
\n\t
"
// YUYV YUYV(8)
"movq 24(%0, %%"
FF_REG_a
", 4), %%mm3
\n\t
"
// YUYV YUYV(12)
"psrlw $8, %%mm0
\n\t
"
// Y0Y0 Y0Y0(0)
"psrlw $8, %%mm1
\n\t
"
// Y0Y0 Y0Y0(4)
"psrlw $8, %%mm2
\n\t
"
// Y0Y0 Y0Y0(8)
...
...
@@ -1602,15 +1602,15 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t
"packuswb %%mm1, %%mm0
\n\t
"
// YYYY YYYY(0)
"packuswb %%mm3, %%mm2
\n\t
"
// YYYY YYYY(8)
MOVNTQ
" %%mm0, (%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 8(%1, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm0, (%1, %%"
FF_
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 8(%1, %%"
FF_
REG_a
", 2)
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
"cmp %4, %%"
REG_a
"
\n\t
"
"add $8, %%"
FF_REG_a
"
\n\t
"
"cmp %4, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
src
),
"r"
(
ydst
),
"r"
(
udst
),
"r"
(
vdst
),
"g"
(
chromWidth
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
udst
+=
chromStride
;
vdst
+=
chromStride
;
...
...
@@ -1655,20 +1655,20 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
int
i
;
for
(
i
=
0
;
i
<
2
;
i
++
)
{
__asm__
volatile
(
"mov %2, %%"
REG_a
"
\n\t
"
"mov %2, %%"
FF_REG_a
"
\n\t
"
"movq "
BGR2Y_IDX
"(%3), %%mm6
\n\t
"
"movq "
MANGLE
(
ff_w1111
)
", %%mm5
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"lea (%%"
REG_a
", %%"
REG_a
", 2), %%"
REG_d
"
\n\t
"
"lea (%%"
FF_REG_a
", %%"
FF_REG_a
", 2), %%"
FF_REG_d
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
"
64(%0, %%"
REG_d
")
\n\t
"
"movd
(%0, %%"
REG_d
"), %%mm0
\n\t
"
"movd
3(%0, %%"
REG_d
"), %%mm1
\n\t
"
PREFETCH
"
64(%0, %%"
FF_
REG_d
")
\n\t
"
"movd
(%0, %%"
FF_
REG_d
"), %%mm0
\n\t
"
"movd
3(%0, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"movd
6(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
9(%0, %%"
REG_d
"), %%mm3
\n\t
"
"movd
6(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
9(%0, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"pmaddwd %%mm6, %%mm0
\n\t
"
...
...
@@ -1686,12 +1686,12 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm2, %%mm0
\n\t
"
"psraw $7, %%mm0
\n\t
"
"movd
12(%0, %%"
REG_d
"), %%mm4
\n\t
"
"movd
15(%0, %%"
REG_d
"), %%mm1
\n\t
"
"movd
12(%0, %%"
FF_
REG_d
"), %%mm4
\n\t
"
"movd
15(%0, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"movd
18(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
21(%0, %%"
REG_d
"), %%mm3
\n\t
"
"movd
18(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
21(%0, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"pmaddwd %%mm6, %%mm4
\n\t
"
...
...
@@ -1706,40 +1706,40 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm3, %%mm2
\n\t
"
"pmaddwd %%mm5, %%mm4
\n\t
"
"pmaddwd %%mm5, %%mm2
\n\t
"
"add $24, %%"
REG_d
"
\n\t
"
"add $24, %%"
FF_REG_d
"
\n\t
"
"packssdw %%mm2, %%mm4
\n\t
"
"psraw $7, %%mm4
\n\t
"
"packuswb %%mm4, %%mm0
\n\t
"
"paddusb "
MANGLE
(
ff_bgr2YOffset
)
", %%mm0
\n\t
"
MOVNTQ
" %%mm0, (%1, %%"
REG_a
")
\n\t
"
"add $8, %%"
REG_a
"
\n\t
"
MOVNTQ
" %%mm0, (%1, %%"
FF_
REG_a
")
\n\t
"
"add $8, %%"
FF_
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
:
"r"
(
src
+
width
*
3
),
"r"
(
ydst
+
width
),
"g"
((
x86_reg
)
-
width
),
"r"
(
rgb2yuv
)
NAMED_CONSTRAINTS_ADD
(
ff_w1111
,
ff_bgr2YOffset
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
ydst
+=
lumStride
;
src
+=
srcStride
;
}
src
-=
srcStride
*
2
;
__asm__
volatile
(
"mov %4, %%"
REG_a
"
\n\t
"
"mov %4, %%"
FF_REG_a
"
\n\t
"
"movq "
MANGLE
(
ff_w1111
)
", %%mm5
\n\t
"
"movq "
BGR2U_IDX
"(%5), %%mm6
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
"lea (%%"
REG_a
", %%"
REG_a
", 2), %%"
REG_d
"
\n\t
"
"add
%%"
REG_d
", %%"
REG_d
"
\n\t
"
"lea (%%"
FF_REG_a
", %%"
FF_REG_a
", 2), %%"
FF_REG_d
"
\n\t
"
"add
%%"
FF_REG_d
", %%"
FF_REG_d
"
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
PREFETCH
"
64(%0, %%"
REG_d
")
\n\t
"
PREFETCH
"
64(%1, %%"
REG_d
")
\n\t
"
PREFETCH
"
64(%0, %%"
FF_
REG_d
")
\n\t
"
PREFETCH
"
64(%1, %%"
FF_
REG_d
")
\n\t
"
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq
(%0, %%"
REG_d
"), %%mm0
\n\t
"
"movq
(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movq
6(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movq
6(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movq
(%0, %%"
FF_
REG_d
"), %%mm0
\n\t
"
"movq
(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movq
6(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movq
6(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
PAVGB
" %%mm1, %%mm0
\n\t
"
PAVGB
" %%mm3, %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
...
...
@@ -1751,10 +1751,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
#else
"movd
(%0, %%"
REG_d
"), %%mm0
\n\t
"
"movd
(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movd
3(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
3(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movd
(%0, %%"
FF_
REG_d
"), %%mm0
\n\t
"
"movd
(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movd
3(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
3(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -1762,10 +1762,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"movd
6(%0, %%"
REG_d
"), %%mm4
\n\t
"
"movd
6(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movd
9(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
9(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movd
6(%0, %%"
FF_
REG_d
"), %%mm4
\n\t
"
"movd
6(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movd
9(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
9(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -1795,10 +1795,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"psraw $7, %%mm0
\n\t
"
#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW
"movq
12(%0, %%"
REG_d
"), %%mm4
\n\t
"
"movq
12(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movq
18(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movq
18(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movq
12(%0, %%"
FF_
REG_d
"), %%mm4
\n\t
"
"movq
12(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movq
18(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movq
18(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
PAVGB
" %%mm1, %%mm4
\n\t
"
PAVGB
" %%mm3, %%mm2
\n\t
"
"movq %%mm4, %%mm1
\n\t
"
...
...
@@ -1810,10 +1810,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
#else
"movd
12(%0, %%"
REG_d
"), %%mm4
\n\t
"
"movd
12(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movd
15(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
15(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movd
12(%0, %%"
FF_
REG_d
"), %%mm4
\n\t
"
"movd
12(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movd
15(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
15(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -1821,10 +1821,10 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"paddw %%mm1, %%mm4
\n\t
"
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm2, %%mm4
\n\t
"
"movd
18(%0, %%"
REG_d
"), %%mm5
\n\t
"
"movd
18(%1, %%"
REG_d
"), %%mm1
\n\t
"
"movd
21(%0, %%"
REG_d
"), %%mm2
\n\t
"
"movd
21(%1, %%"
REG_d
"), %%mm3
\n\t
"
"movd
18(%0, %%"
FF_
REG_d
"), %%mm5
\n\t
"
"movd
18(%1, %%"
FF_
REG_d
"), %%mm1
\n\t
"
"movd
21(%0, %%"
FF_
REG_d
"), %%mm2
\n\t
"
"movd
21(%1, %%"
FF_
REG_d
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
...
...
@@ -1851,7 +1851,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"packssdw %%mm3, %%mm1
\n\t
"
"pmaddwd %%mm5, %%mm4
\n\t
"
"pmaddwd %%mm5, %%mm1
\n\t
"
"add $24, %%"
REG_d
"
\n\t
"
"add $24, %%"
FF_REG_d
"
\n\t
"
"packssdw %%mm1, %%mm4
\n\t
"
// V3 V2 U3 U2
"psraw $7, %%mm4
\n\t
"
...
...
@@ -1860,14 +1860,14 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_
"punpckhdq %%mm4, %%mm1
\n\t
"
"packsswb %%mm1, %%mm0
\n\t
"
"paddb "
MANGLE
(
ff_bgr2UVOffset
)
", %%mm0
\n\t
"
"movd %%mm0, (%2, %%"
REG_a
")
\n\t
"
"movd %%mm0, (%2, %%"
FF_
REG_a
")
\n\t
"
"punpckhdq %%mm0, %%mm0
\n\t
"
"movd %%mm0, (%3, %%"
REG_a
")
\n\t
"
"add $4, %%"
REG_a
"
\n\t
"
"movd %%mm0, (%3, %%"
FF_
REG_a
")
\n\t
"
"add $4, %%"
FF_
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
:
"r"
(
src
+
chromWidth
*
6
),
"r"
(
src
+
srcStride
+
chromWidth
*
6
),
"r"
(
udst
+
chromWidth
),
"r"
(
vdst
+
chromWidth
),
"g"
(
-
chromWidth
),
"r"
(
rgb2yuv
)
NAMED_CONSTRAINTS_ADD
(
ff_w1111
,
ff_bgr2UVOffset
)
:
"%"
REG_a
,
"%"
REG_d
:
"%"
FF_REG_a
,
"%"
FF_
REG_d
);
udst
+=
chromStride
;
...
...
@@ -1898,49 +1898,49 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui
#if COMPILE_TEMPLATE_SSE2
if
(
!
((((
intptr_t
)
src1
)
|
((
intptr_t
)
src2
)
|
((
intptr_t
)
dest
))
&
15
))
{
__asm__
(
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_
REG_a
"
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%1, %%"
REG_a
")
\n\t
"
PREFETCH
" 64(%2, %%"
REG_a
")
\n\t
"
"movdqa
(%1, %%"
REG_a
"), %%xmm0
\n\t
"
"movdqa
(%1, %%"
REG_a
"), %%xmm1
\n\t
"
"movdqa
(%2, %%"
REG_a
"), %%xmm2
\n\t
"
PREFETCH
" 64(%1, %%"
FF_REG_a
")
\n\t
"
PREFETCH
" 64(%2, %%"
FF_REG_a
")
\n\t
"
"movdqa
(%1, %%"
FF_
REG_a
"), %%xmm0
\n\t
"
"movdqa
(%1, %%"
FF_
REG_a
"), %%xmm1
\n\t
"
"movdqa
(%2, %%"
FF_
REG_a
"), %%xmm2
\n\t
"
"punpcklbw %%xmm2, %%xmm0
\n\t
"
"punpckhbw %%xmm2, %%xmm1
\n\t
"
"movntdq %%xmm0, (%0, %%"
REG_a
", 2)
\n\t
"
"movntdq %%xmm1, 16(%0, %%"
REG_a
", 2)
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"cmp %3, %%"
REG_a
"
\n\t
"
"movntdq %%xmm0, (%0, %%"
FF_REG_a
", 2)
\n\t
"
"movntdq %%xmm1, 16(%0, %%"
FF_REG_a
", 2)
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"cmp %3, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
dest
),
"r"
(
src1
),
"r"
(
src2
),
"r"
((
x86_reg
)
width
-
15
)
:
"memory"
,
XMM_CLOBBERS
(
"xmm0"
,
"xmm1"
,
"xmm2"
,)
"%"
REG_a
:
"memory"
,
XMM_CLOBBERS
(
"xmm0"
,
"xmm1"
,
"xmm2"
,)
"%"
FF_
REG_a
);
}
else
#endif
__asm__
(
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
"xor %%"
FF_REG_a
", %%"
FF_REG_a
"
\n\t
"
"1:
\n\t
"
PREFETCH
" 64(%1, %%"
REG_a
")
\n\t
"
PREFETCH
" 64(%2, %%"
REG_a
")
\n\t
"
"movq
(%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq
8(%1, %%"
REG_a
"), %%mm2
\n\t
"
PREFETCH
" 64(%1, %%"
FF_REG_a
")
\n\t
"
PREFETCH
" 64(%2, %%"
FF_REG_a
")
\n\t
"
"movq
(%1, %%"
FF_
REG_a
"), %%mm0
\n\t
"
"movq
8(%1, %%"
FF_
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq
(%2, %%"
REG_a
"), %%mm4
\n\t
"
"movq
8(%2, %%"
REG_a
"), %%mm5
\n\t
"
"movq
(%2, %%"
FF_
REG_a
"), %%mm4
\n\t
"
"movq
8(%2, %%"
FF_
REG_a
"), %%mm5
\n\t
"
"punpcklbw %%mm4, %%mm0
\n\t
"
"punpckhbw %%mm4, %%mm1
\n\t
"
"punpcklbw %%mm5, %%mm2
\n\t
"
"punpckhbw %%mm5, %%mm3
\n\t
"
MOVNTQ
" %%mm0, (%0, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm1, 8(%0, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 16(%0, %%"
REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm3, 24(%0, %%"
REG_a
", 2)
\n\t
"
"add $16, %%"
REG_a
"
\n\t
"
"cmp %3, %%"
REG_a
"
\n\t
"
MOVNTQ
" %%mm0, (%0, %%"
FF_REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm1, 8(%0, %%"
FF_REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm2, 16(%0, %%"
FF_REG_a
", 2)
\n\t
"
MOVNTQ
" %%mm3, 24(%0, %%"
FF_REG_a
", 2)
\n\t
"
"add $16, %%"
FF_REG_a
"
\n\t
"
"cmp %3, %%"
FF_REG_a
"
\n\t
"
" jb 1b
\n\t
"
::
"r"
(
dest
),
"r"
(
src1
),
"r"
(
src2
),
"r"
((
x86_reg
)
width
-
15
)
:
"memory"
,
"%"
REG_a
:
"memory"
,
"%"
FF_
REG_a
);
}
...
...
libswscale/x86/swscale.c
View file @
9eb3da2f
...
...
@@ -220,16 +220,16 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"movdqa %%xmm3, %%xmm4 \n\t" \
"movdqa %%xmm3, %%xmm7 \n\t" \
"movl %3, %%ecx \n\t" \
"mov %0, %%"
REG_d"
\n\t"\
"mov (%%"
REG_d"), %%"REG_S"
\n\t"\
"mov %0, %%"
FF_REG_d"
\n\t"\
"mov (%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
".p2align 4 \n\t"
/* FIXME Unroll? */
\
"1: \n\t"\
"movddup 8(%%"
REG_d"), %%xmm0
\n\t"
/* filterCoeff */
\
"movdqa (%%"
REG_S", %%"REG_c", 2), %%xmm2
\n\t"
/* srcData */
\
"movdqa 16(%%"
REG_S", %%"REG_c", 2), %%xmm5
\n\t"
/* srcData */
\
"add $16, %%"
REG_d"
\n\t"\
"mov (%%"
REG_d"), %%"REG_S"
\n\t"\
"test %%"
REG_S", %%"REG_S"
\n\t"\
"movddup 8(%%"
FF_REG_d"), %%xmm0
\n\t"
/* filterCoeff */
\
"movdqa (%%"
FF_REG_S", %%"FF_REG_c", 2), %%xmm2
\n\t"
/* srcData */
\
"movdqa 16(%%"
FF_REG_S", %%"FF_REG_c", 2), %%xmm5
\n\t"
/* srcData */
\
"add $16, %%"
FF_REG_d"
\n\t"\
"mov (%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"test %%"
FF_REG_S", %%"FF_REG_S"
\n\t"\
"pmulhw %%xmm0, %%xmm2 \n\t"\
"pmulhw %%xmm0, %%xmm5 \n\t"\
"paddw %%xmm2, %%xmm3 \n\t"\
...
...
@@ -238,13 +238,13 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"psraw $3, %%xmm3 \n\t"\
"psraw $3, %%xmm4 \n\t"\
"packuswb %%xmm4, %%xmm3 \n\t"\
"movntdq %%xmm3, (%1, %%"
REG_c")
\n\t"\
"add $16, %%"
REG_c"
\n\t"\
"cmp %2, %%"
REG_c"
\n\t"\
"movntdq %%xmm3, (%1, %%"
FF_REG_c")
\n\t"\
"add $16, %%"
FF_REG_c"
\n\t"\
"cmp %2, %%"
FF_REG_c"
\n\t"\
"movdqa %%xmm7, %%xmm3 \n\t" \
"movdqa %%xmm7, %%xmm4 \n\t" \
"mov %0, %%"
REG_d"
\n\t"\
"mov (%%"
REG_d"), %%"REG_S"
\n\t"\
"mov %0, %%"
FF_REG_d"
\n\t"\
"mov (%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"jb 1b \n\t"
if
(
offset
)
{
...
...
@@ -259,7 +259,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"r"
(
dest
-
offset
),
"g"
((
x86_reg
)(
dstW
+
offset
)),
"m"
(
offset
),
"m"
(
filterSize
),
"m"
(((
uint64_t
*
)
dither
)[
0
])
:
XMM_CLOBBERS
(
"%xmm0"
,
"%xmm1"
,
"%xmm2"
,
"%xmm3"
,
"%xmm4"
,
"%xmm5"
,
"%xmm7"
,)
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_c
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_c
);
}
else
{
__asm__
volatile
(
...
...
@@ -269,7 +269,7 @@ static void yuv2yuvX_sse3(const int16_t *filter, int filterSize,
"r"
(
dest
-
offset
),
"g"
((
x86_reg
)(
dstW
+
offset
)),
"m"
(
offset
),
"m"
(
filterSize
),
"m"
(((
uint64_t
*
)
dither
)[
0
])
:
XMM_CLOBBERS
(
"%xmm0"
,
"%xmm1"
,
"%xmm2"
,
"%xmm3"
,
"%xmm4"
,
"%xmm5"
,
"%xmm7"
,)
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_c
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_c
);
}
}
...
...
libswscale/x86/swscale_template.c
View file @
9eb3da2f
...
...
@@ -88,16 +88,16 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
"movq %%mm3, %%mm6
\n\t
"
"movq %%mm4, %%mm7
\n\t
"
"movl %3, %%ecx
\n\t
"
"mov %0, %%"
REG_d
"
\n\t
"
\
"mov (%%"
REG_d
"), %%"
REG_S
"
\n\t
"
\
"mov %0, %%"
FF_REG_d
"
\n\t
"
\
"mov (%%"
FF_REG_d
"), %%"
FF_REG_S
"
\n\t
"
\
".p2align 4
\n\t
"
/* FIXME Unroll? */
\
"1:
\n\t
"
\
"movq 8(%%"
REG_d
"), %%mm0
\n\t
"
/* filterCoeff */
\
"movq (%%"
REG_S
", %%"
REG_c
", 2), %%mm2
\n\t
"
/* srcData */
\
"movq 8(%%"
REG_S
", %%"
REG_c
", 2), %%mm5
\n\t
"
/* srcData */
\
"add $16, %%"
REG_d
"
\n\t
"
\
"mov (%%"
REG_d
"), %%"
REG_S
"
\n\t
"
\
"test %%"
REG_S
", %%"
REG_S
"
\n\t
"
\
"movq 8(%%"
FF_REG_d
"), %%mm0
\n\t
"
/* filterCoeff */
\
"movq (%%"
FF_REG_S
", %%"
FF_REG_c
", 2), %%mm2
\n\t
"
/* srcData */
\
"movq 8(%%"
FF_REG_S
", %%"
FF_REG_c
", 2), %%mm5
\n\t
"
/* srcData */
\
"add $16, %%"
FF_REG_d
"
\n\t
"
\
"mov (%%"
FF_REG_d
"), %%"
FF_REG_S
"
\n\t
"
\
"test %%"
FF_REG_S
", %%"
FF_REG_S
"
\n\t
"
\
"pmulhw %%mm0, %%mm2
\n\t
"
\
"pmulhw %%mm0, %%mm5
\n\t
"
\
"paddw %%mm2, %%mm3
\n\t
"
\
...
...
@@ -106,62 +106,62 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
"psraw $3, %%mm3
\n\t
"
\
"psraw $3, %%mm4
\n\t
"
\
"packuswb %%mm4, %%mm3
\n\t
"
MOVNTQ2
" %%mm3, (%1, %%"
REG_c
")
\n\t
"
"add $8, %%"
REG_c
"
\n\t
"
\
"cmp %2, %%"
REG_c
"
\n\t
"
\
MOVNTQ2
" %%mm3, (%1, %%"
FF_
REG_c
")
\n\t
"
"add $8, %%"
FF_REG_c
"
\n\t
"
\
"cmp %2, %%"
FF_REG_c
"
\n\t
"
\
"movq %%mm6, %%mm3
\n\t
"
"movq %%mm7, %%mm4
\n\t
"
"mov %0, %%"
REG_d
"
\n\t
"
\
"mov (%%"
REG_d
"), %%"
REG_S
"
\n\t
"
\
"mov %0, %%"
FF_REG_d
"
\n\t
"
\
"mov (%%"
FF_REG_d
"), %%"
FF_
REG_S
"
\n\t
"
\
"jb 1b
\n\t
"
\
::
"g"
(
filter
),
"r"
(
dest
-
offset
),
"g"
((
x86_reg
)(
dstW
+
offset
)),
"m"
(
offset
)
:
"%"
REG_d
,
"%"
REG_S
,
"%"
REG_c
:
"%"
FF_REG_d
,
"%"
FF_REG_S
,
"%"
FF_
REG_c
);
}
#define YSCALEYUV2PACKEDX_UV \
__asm__ volatile(\
"xor
%%"REG_a", %%"REG_a"
\n\t"\
"xor
%%"FF_REG_a", %%"FF_REG_a"
\n\t"\
".p2align 4 \n\t"\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\
".p2align 4 \n\t"\
"2: \n\t"\
"movq
8(%%"
REG_d"), %%mm0 \n\t"
/* filterCoeff */
\
"movq
(%%"REG_S", %%"REG_a"), %%mm2
\n\t"
/* UsrcData */
\
"add %6, %%"
REG_S"
\n\t" \
"movq
(%%"REG_S", %%"REG_a"), %%mm5
\n\t"
/* VsrcData */
\
"add $16, %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"movq
8(%%"FF_
REG_d"), %%mm0 \n\t"
/* filterCoeff */
\
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm2
\n\t"
/* UsrcData */
\
"add %6, %%"
FF_REG_S"
\n\t" \
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm5
\n\t"
/* VsrcData */
\
"add $16, %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
"pmulhw %%mm0, %%mm5 \n\t"\
"paddw %%mm2, %%mm3 \n\t"\
"paddw %%mm5, %%mm4 \n\t"\
"test
%%"REG_S", %%"REG_S"
\n\t"\
"test
%%"FF_REG_S", %%"FF_REG_S"
\n\t"\
" jnz 2b \n\t"\
#define YSCALEYUV2PACKEDX_YA(offset,coeff,src1,src2,dst1,dst2) \
"lea "offset"(%0), %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"lea "offset"(%0), %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"movq "VROUNDER_OFFSET"(%0), "#dst1" \n\t"\
"movq "#dst1", "#dst2" \n\t"\
".p2align 4 \n\t"\
"2: \n\t"\
"movq
8(%%"
REG_d"), "#coeff" \n\t"
/* filterCoeff */
\
"movq (%%"
REG_S", %%"REG_a", 2), "#src1"
\n\t"
/* Y1srcData */
\
"movq 8(%%"
REG_S", %%"REG_a", 2), "#src2"
\n\t"
/* Y2srcData */
\
"add $16, %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"movq
8(%%"FF_
REG_d"), "#coeff" \n\t"
/* filterCoeff */
\
"movq (%%"
FF_REG_S", %%"FF_REG_a", 2), "#src1"
\n\t"
/* Y1srcData */
\
"movq 8(%%"
FF_REG_S", %%"FF_REG_a", 2), "#src2"
\n\t"
/* Y2srcData */
\
"add $16, %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"pmulhw "#coeff", "#src1" \n\t"\
"pmulhw "#coeff", "#src2" \n\t"\
"paddw "#src1", "#dst1" \n\t"\
"paddw "#src2", "#dst2" \n\t"\
"test
%%"REG_S", %%"REG_S"
\n\t"\
"test
%%"FF_REG_S", %%"FF_REG_S"
\n\t"\
" jnz 2b \n\t"\
#define YSCALEYUV2PACKEDX \
...
...
@@ -173,41 +173,41 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
"m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW_reg), "m"(uv_off) \
NAMED_CONSTRAINTS_ADD(bF8,bFC) \
: "%"
REG_a, "%"REG_d, "%"
REG_S \
: "%"
FF_REG_a, "%"FF_REG_d, "%"FF_
REG_S \
);
#define YSCALEYUV2PACKEDX_ACCURATE_UV \
__asm__ volatile(\
"xor %%"
REG_a", %%"REG_a"
\n\t"\
"xor %%"
FF_REG_a", %%"FF_REG_a"
\n\t"\
".p2align 4 \n\t"\
"nop \n\t"\
"1: \n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"
REG_d"
\n\t"\
"mov
(%%"REG_d"), %%"REG_S"
\n\t"\
"lea "CHR_MMX_FILTER_OFFSET"(%0), %%"
FF_REG_d"
\n\t"\
"mov
(%%"FF_REG_d"), %%"FF_REG_S"
\n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
".p2align 4 \n\t"\
"2: \n\t"\
"movq
(%%"REG_S", %%"REG_a"), %%mm0
\n\t"
/* UsrcData */
\
"add %6, %%"
REG_S"
\n\t" \
"movq
(%%"REG_S", %%"REG_a"), %%mm2
\n\t"
/* VsrcData */
\
"mov "STR(APCK_PTR2)"(%%"
REG_d"), %%"REG_S"
\n\t"\
"movq
(%%"REG_S", %%"REG_a"), %%mm1
\n\t"
/* UsrcData */
\
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm0
\n\t"
/* UsrcData */
\
"add %6, %%"
FF_REG_S"
\n\t" \
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm2
\n\t"
/* VsrcData */
\
"mov "STR(APCK_PTR2)"(%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm1
\n\t"
/* UsrcData */
\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\
"movq "STR(APCK_COEF)"(%%"
REG_d"),%%mm1
\n\t"
/* filterCoeff */
\
"movq "STR(APCK_COEF)"(%%"
FF_REG_d"),%%mm1
\n\t"
/* filterCoeff */
\
"pmaddwd %%mm1, %%mm0 \n\t"\
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"add %6, %%"
REG_S"
\n\t" \
"movq
(%%"REG_S", %%"REG_a"), %%mm3
\n\t"
/* VsrcData */
\
"mov "STR(APCK_SIZE)"(%%"
REG_d"), %%"REG_S"
\n\t"\
"add $"STR(APCK_SIZE)", %%"
REG_d"
\n\t"\
"test
%%"REG_S", %%"REG_S"
\n\t"\
"add %6, %%"
FF_REG_S"
\n\t" \
"movq
(%%"FF_REG_S", %%"FF_REG_a"), %%mm3
\n\t"
/* VsrcData */
\
"mov "STR(APCK_SIZE)"(%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"add $"STR(APCK_SIZE)", %%"
FF_REG_d"
\n\t"\
"test
%%"FF_REG_S", %%"FF_REG_S"
\n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
...
...
@@ -229,30 +229,30 @@ static void RENAME(yuv2yuvX)(const int16_t *filter, int filterSize,
"movq %%mm6, "V_TEMP"(%0) \n\t"\
#define YSCALEYUV2PACKEDX_ACCURATE_YA(offset) \
"lea "offset"(%0), %%"
REG_d"
\n\t"\
"mov (%%"
REG_d"), %%"REG_S"
\n\t"\
"lea "offset"(%0), %%"
FF_REG_d"
\n\t"\
"mov (%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"pxor %%mm1, %%mm1 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
".p2align 4 \n\t"\
"2: \n\t"\
"movq (%%"
REG_S", %%"REG_a", 2), %%mm0
\n\t"
/* Y1srcData */
\
"movq 8(%%"
REG_S", %%"REG_a", 2), %%mm2
\n\t"
/* Y2srcData */
\
"mov "STR(APCK_PTR2)"(%%"
REG_d"), %%"REG_S"
\n\t"\
"movq (%%"
REG_S", %%"REG_a", 2), %%mm4
\n\t"
/* Y1srcData */
\
"movq (%%"
FF_REG_S", %%"FF_REG_a", 2), %%mm0
\n\t"
/* Y1srcData */
\
"movq 8(%%"
FF_REG_S", %%"FF_REG_a", 2), %%mm2
\n\t"
/* Y2srcData */
\
"mov "STR(APCK_PTR2)"(%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"movq (%%"
FF_REG_S", %%"FF_REG_a", 2), %%mm4
\n\t"
/* Y1srcData */
\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm4, %%mm0 \n\t"\
"punpckhwd %%mm4, %%mm3 \n\t"\
"movq "STR(APCK_COEF)"(%%"
REG_d"), %%mm4
\n\t"
/* filterCoeff */
\
"movq "STR(APCK_COEF)"(%%"
FF_REG_d"), %%mm4
\n\t"
/* filterCoeff */
\
"pmaddwd %%mm4, %%mm0 \n\t"\
"pmaddwd %%mm4, %%mm3 \n\t"\
"paddd %%mm0, %%mm1 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq 8(%%"
REG_S", %%"REG_a", 2), %%mm3
\n\t"
/* Y2srcData */
\
"mov "STR(APCK_SIZE)"(%%"
REG_d"), %%"REG_S"
\n\t"\
"add $"STR(APCK_SIZE)", %%"
REG_d"
\n\t"\
"test
%%"REG_S", %%"REG_S"
\n\t"\
"movq 8(%%"
FF_REG_S", %%"FF_REG_a", 2), %%mm3
\n\t"
/* Y2srcData */
\
"mov "STR(APCK_SIZE)"(%%"
FF_REG_d"), %%"FF_REG_S"
\n\t"\
"add $"STR(APCK_SIZE)", %%"
FF_REG_d"
\n\t"\
"test
%%"FF_REG_S", %%"FF_REG_S"
\n\t"\
"movq %%mm2, %%mm0 \n\t"\
"punpcklwd %%mm3, %%mm2 \n\t"\
"punpckhwd %%mm3, %%mm0 \n\t"\
...
...
@@ -359,13 +359,13 @@ static void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
"packuswb %%mm7, %%mm1
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm3
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm2
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm3
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm2
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
else
{
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
}
...
...
@@ -388,13 +388,13 @@ static void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
"packuswb %%mm7, %%mm1
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
else
{
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
}
...
...
@@ -417,13 +417,13 @@ static void RENAME(yuv2bgr32_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
"packuswb %%mm7, %%mm1
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm5
,
%%
mm4
,
%%
mm2
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm5
,
%%
mm4
,
%%
mm2
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
else
{
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%
4
,
"%5"
,
%%
REGa
,
%%
mm5
,
%%
mm4
,
%%
mm2
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
"%5"
,
%%
FF_
REGa
,
%%
mm5
,
%%
mm4
,
%%
mm2
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
YSCALEYUV2PACKEDX_END
}
}
...
...
@@ -476,7 +476,7 @@ static void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "
GREEN_DITHER
"(%0), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%0), %%mm5
\n\t
"
#endif
WRITERGB16
(
%
4
,
"%5"
,
%%
REGa
)
WRITERGB16
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -500,7 +500,7 @@ static void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "
GREEN_DITHER
"(%0), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%0), %%mm5
\n\t
"
#endif
WRITERGB16
(
%
4
,
"%5"
,
%%
REGa
)
WRITERGB16
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -553,7 +553,7 @@ static void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilter,
"paddusb "
GREEN_DITHER
"(%0), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%0), %%mm5
\n\t
"
#endif
WRITERGB15
(
%
4
,
"%5"
,
%%
REGa
)
WRITERGB15
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -577,7 +577,7 @@ static void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
"paddusb "
GREEN_DITHER
"(%0), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%0), %%mm5
\n\t
"
#endif
WRITERGB15
(
%
4
,
"%5"
,
%%
REGa
)
WRITERGB15
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -705,14 +705,14 @@ static void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7
\n\t
"
"lea (%%"
REG_a
", %%"
REG_a
", 2), %%"
REG_c
"
\n\t
"
//FIXME optimize
"add %4, %%"
REG_c
"
\n\t
"
WRITEBGR24
(
%%
REGc
,
"%5"
,
%%
REGa
)
"lea (%%"
FF_REG_a
", %%"
FF_REG_a
", 2), %%"
FF_
REG_c
"
\n\t
"
//FIXME optimize
"add %4, %%"
FF_
REG_c
"
\n\t
"
WRITEBGR24
(
%%
FF_REGc
,
"%5"
,
%%
FF_
REGa
)
::
"r"
(
&
c
->
redDither
),
"m"
(
dummy
),
"m"
(
dummy
),
"m"
(
dummy
),
"r"
(
dest
),
"m"
(
dstW_reg
),
"m"
(
uv_off
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
:
"%"
FF_REG_a
,
"%"
FF_REG_c
,
"%"
FF_REG_d
,
"%"
FF_
REG_S
);
}
...
...
@@ -730,14 +730,14 @@ static void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
"pxor %%mm7, %%mm7
\n\t
"
"lea (%%"
REG_a
", %%"
REG_a
", 2), %%"
REG_c
"
\n\t
"
//FIXME optimize
"add %4, %%"
REG_c
"
\n\t
"
WRITEBGR24
(
%%
REGc
,
"%5"
,
%%
REGa
)
"lea (%%"
FF_REG_a
", %%"
FF_REG_a
", 2), %%"
FF_REG_c
"
\n\t
"
//FIXME optimize
"add %4, %%"
FF_REG_c
"
\n\t
"
WRITEBGR24
(
%%
FF_REGc
,
"%5"
,
%%
FF_
REGa
)
::
"r"
(
&
c
->
redDither
),
"m"
(
dummy
),
"m"
(
dummy
),
"m"
(
dummy
),
"r"
(
dest
),
"m"
(
dstW_reg
),
"m"
(
uv_off
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
:
"%"
FF_REG_a
,
"%"
FF_REG_c
,
"%"
FF_REG_d
,
"%"
FF_
REG_S
);
}
#endif
/* HAVE_6REGS */
...
...
@@ -776,7 +776,7 @@ static void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4
\n\t
"
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
WRITEYUY2
(
%
4
,
"%5"
,
%%
REGa
)
WRITEYUY2
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -797,7 +797,7 @@ static void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm4
\n\t
"
"psraw $3, %%mm1
\n\t
"
"psraw $3, %%mm7
\n\t
"
WRITEYUY2
(
%
4
,
"%5"
,
%%
REGa
)
WRITEYUY2
(
%
4
,
"%5"
,
%%
FF_
REGa
)
YSCALEYUV2PACKEDX_END
}
...
...
@@ -908,37 +908,37 @@ static void RENAME(yuv2rgb32_2)(SwsContext *c, const int16_t *buf[2],
c
->
u_temp
=
(
intptr_t
)
abuf0
;
c
->
v_temp
=
(
intptr_t
)
abuf1
;
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"push %0
\n\t
"
"push %1
\n\t
"
"mov "
U_TEMP
"(%5), %0
\n\t
"
"mov "
V_TEMP
"(%5), %1
\n\t
"
YSCALEYUV2RGB_YA
(
%%
REGBP
,
%
5
,
%
0
,
%
1
)
YSCALEYUV2RGB_YA
(
%%
FF_
REGBP
,
%
5
,
%
0
,
%
1
)
"psraw $3, %%mm1
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1
\n\t
"
"pop %1
\n\t
"
"pop %0
\n\t
"
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
#endif
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
@@ -954,14 +954,14 @@ static void RENAME(yuv2bgr24_2)(SwsContext *c, const int16_t *buf[2],
*
ubuf0
=
ubuf
[
0
],
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR24
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
...
...
@@ -977,10 +977,10 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
*
ubuf0
=
ubuf
[
0
],
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -988,9 +988,9 @@ static void RENAME(yuv2rgb555_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB15
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB15
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
)
...
...
@@ -1006,10 +1006,10 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
*
ubuf0
=
ubuf
[
0
],
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1017,9 +1017,9 @@ static void RENAME(yuv2rgb565_2)(SwsContext *c, const int16_t *buf[2],
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB16
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB16
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
,
bFC
)
...
...
@@ -1075,13 +1075,13 @@ static void RENAME(yuv2yuyv422_2)(SwsContext *c, const int16_t *buf[2],
*
ubuf0
=
ubuf
[
0
],
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2PACKED
(
%%
FF_
REGBP
,
%
5
)
WRITEYUY2
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
@@ -1217,27 +1217,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
const
int16_t
*
ubuf1
=
ubuf
[
0
];
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
needAlpha
)
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
FF_
REGBP
)
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
@@ -1246,27 +1246,27 @@ static void RENAME(yuv2rgb32_1)(SwsContext *c, const int16_t *buf0,
const
int16_t
*
ubuf1
=
ubuf
[
1
];
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
needAlpha
)
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
FF_
REGBP
)
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR32
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
@@ -1285,14 +1285,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const
int16_t
*
ubuf1
=
ubuf
[
0
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR24
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
...
...
@@ -1300,14 +1300,14 @@ static void RENAME(yuv2bgr24_1)(SwsContext *c, const int16_t *buf0,
}
else
{
const
int16_t
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITEBGR24
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
ff_M24A
,
ff_M24C
,
ff_M24B
)
...
...
@@ -1326,10 +1326,10 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const
int16_t
*
ubuf1
=
ubuf
[
0
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1337,9 +1337,9 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB15
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB15
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
)
...
...
@@ -1347,10 +1347,10 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
}
else
{
const
int16_t
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1358,9 +1358,9 @@ static void RENAME(yuv2rgb555_1)(SwsContext *c, const int16_t *buf0,
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB15
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB15
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
)
...
...
@@ -1379,10 +1379,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const
int16_t
*
ubuf1
=
ubuf
[
0
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1390,9 +1390,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB16
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB16
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
,
bFC
)
...
...
@@ -1400,10 +1400,10 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
}
else
{
const
int16_t
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
FF_
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1411,9 +1411,9 @@ static void RENAME(yuv2rgb565_1)(SwsContext *c, const int16_t *buf0,
"paddusb "
GREEN_DITHER
"(%5), %%mm4
\n\t
"
"paddusb "
RED_DITHER
"(%5), %%mm5
\n\t
"
#endif
WRITERGB16
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
WRITERGB16
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
NAMED_CONSTRAINTS_ADD
(
bF8
,
bFC
)
...
...
@@ -1469,26 +1469,26 @@ static void RENAME(yuv2yuyv422_1)(SwsContext *c, const int16_t *buf0,
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
const
int16_t
*
ubuf1
=
ubuf
[
0
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED1
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2PACKED1
(
%%
FF_
REGBP
,
%
5
)
WRITEYUY2
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
}
else
{
const
int16_t
*
ubuf1
=
ubuf
[
1
];
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED1b
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
DSTW_OFFSET
"(%5)"
,
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov %%"
FF_REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov
%4, %%"
FF_REG_b
"
\n\t
"
"push %%"
FF_REG_BP
"
\n\t
"
YSCALEYUV2PACKED1b
(
%%
FF_
REGBP
,
%
5
)
WRITEYUY2
(
%%
FF_REGb
,
DSTW_OFFSET
"(%5)"
,
%%
FF_
REGBP
)
"pop %%"
FF_REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
FF_REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment