Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
009f829d
Commit
009f829d
authored
May 27, 2011
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale: fix crash in bilinear scaling.
parent
88aa2159
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
66 additions
and
78 deletions
+66
-78
swscale_internal.h
libswscale/swscale_internal.h
+2
-0
utils.c
libswscale/utils.c
+1
-0
swscale_template.c
libswscale/x86/swscale_template.c
+63
-78
No files found.
libswscale/swscale_internal.h
View file @
009f829d
...
@@ -194,6 +194,7 @@ typedef struct SwsContext {
...
@@ -194,6 +194,7 @@ typedef struct SwsContext {
#define Y_TEMP "11*8+4*4*256*2+40"
#define Y_TEMP "11*8+4*4*256*2+40"
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define UV_OFF "11*8+4*4*256*3+48"
#define UV_OFF "11*8+4*4*256*3+48"
#define UV_OFFx2 "11*8+4*4*256*3+56"
DECLARE_ALIGNED
(
8
,
uint64_t
,
redDither
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
redDither
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
greenDither
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
greenDither
);
...
@@ -217,6 +218,7 @@ typedef struct SwsContext {
...
@@ -217,6 +218,7 @@ typedef struct SwsContext {
DECLARE_ALIGNED
(
8
,
uint64_t
,
y_temp
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
y_temp
);
int32_t
alpMmxFilter
[
4
*
MAX_FILTER_SIZE
];
int32_t
alpMmxFilter
[
4
*
MAX_FILTER_SIZE
];
DECLARE_ALIGNED
(
8
,
ptrdiff_t
,
uv_off
);
///< offset (in pixels) between u and v planes
DECLARE_ALIGNED
(
8
,
ptrdiff_t
,
uv_off
);
///< offset (in pixels) between u and v planes
DECLARE_ALIGNED
(
8
,
ptrdiff_t
,
uv_offx2
);
///< offset (in bytes) between u and v planes
#if HAVE_ALTIVEC
#if HAVE_ALTIVEC
vector
signed
short
CY
;
vector
signed
short
CY
;
...
...
libswscale/utils.c
View file @
009f829d
...
@@ -1001,6 +1001,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
...
@@ -1001,6 +1001,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
c
->
lumPixBuf
[
i
]
=
c
->
lumPixBuf
[
i
+
c
->
vLumBufSize
];
c
->
lumPixBuf
[
i
]
=
c
->
lumPixBuf
[
i
+
c
->
vLumBufSize
];
}
}
c
->
uv_off
=
dst_stride_px
;
c
->
uv_off
=
dst_stride_px
;
c
->
uv_offx2
=
dst_stride
;
for
(
i
=
0
;
i
<
c
->
vChrBufSize
;
i
++
)
{
for
(
i
=
0
;
i
<
c
->
vChrBufSize
;
i
++
)
{
FF_ALLOC_OR_GOTO
(
c
,
c
->
chrUPixBuf
[
i
+
c
->
vChrBufSize
],
dst_stride
*
2
+
1
,
fail
);
FF_ALLOC_OR_GOTO
(
c
,
c
->
chrUPixBuf
[
i
+
c
->
vChrBufSize
],
dst_stride
*
2
+
1
,
fail
);
c
->
chrUPixBuf
[
i
]
=
c
->
chrUPixBuf
[
i
+
c
->
vChrBufSize
];
c
->
chrUPixBuf
[
i
]
=
c
->
chrUPixBuf
[
i
+
c
->
vChrBufSize
];
...
...
libswscale/x86/swscale_template.c
View file @
009f829d
...
@@ -897,16 +897,16 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
...
@@ -897,16 +897,16 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
YSCALEYUV2PACKEDX_END
YSCALEYUV2PACKEDX_END
}
}
#define REAL_YSCALEYUV2RGB_UV(index, c
, uv_off
) \
#define REAL_YSCALEYUV2RGB_UV(index, c) \
"xor "#index", "#index" \n\t"\
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"add "
#uv_off"
, "#index" \n\t" \
"add "
UV_OFFx2"("#c")
, "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"sub "
#uv_off"
, "#index" \n\t" \
"sub "
UV_OFFx2"("#c")
, "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t"
/* uvbuf0[eax] - uvbuf1[eax]*/
\
"psubw %%mm3, %%mm2 \n\t"
/* uvbuf0[eax] - uvbuf1[eax]*/
\
"psubw %%mm4, %%mm5 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/
\
"psubw %%mm4, %%mm5 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/
\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
...
@@ -969,8 +969,8 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
...
@@ -969,8 +969,8 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
#define YSCALEYUV2RGB(index, c
, uv_off
) \
#define YSCALEYUV2RGB(index, c) \
REAL_YSCALEYUV2RGB_UV(index, c
, uv_off
) \
REAL_YSCALEYUV2RGB_UV(index, c) \
REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
REAL_YSCALEYUV2RGB_COEFF(c)
REAL_YSCALEYUV2RGB_COEFF(c)
...
@@ -984,12 +984,10 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -984,12 +984,10 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
alpPixBuf
)
{
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
alpPixBuf
)
{
#if ARCH_X86_64
#if ARCH_X86_64
__asm__
volatile
(
__asm__
volatile
(
YSCALEYUV2RGB
(
%%
r8
,
%
5
,
%
8
)
YSCALEYUV2RGB
(
%%
r8
,
%
5
)
YSCALEYUV2RGB_YA
(
%%
r8
,
%
5
,
%
6
,
%
7
)
YSCALEYUV2RGB_YA
(
%%
r8
,
%
5
,
%
6
,
%
7
)
"psraw $3, %%mm1
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm1
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
...
@@ -997,7 +995,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -997,7 +995,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
WRITEBGR32
(
%
4
,
8280
(
%
5
),
%%
r8
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%
4
,
8280
(
%
5
),
%%
r8
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"r"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"r"
(
dest
),
"a"
(
&
c
->
redDither
),
"a"
(
&
c
->
redDither
),
"r"
(
abuf0
),
"r"
(
abuf1
)
,
"m"
(
uv_off
)
"r"
(
abuf0
),
"r"
(
abuf1
)
:
"%r8"
:
"%r8"
);
);
#else
#else
...
@@ -1007,7 +1005,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1007,7 +1005,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"push %0
\n\t
"
"push %0
\n\t
"
"push %1
\n\t
"
"push %1
\n\t
"
"mov "
U_TEMP
"(%5), %0
\n\t
"
"mov "
U_TEMP
"(%5), %0
\n\t
"
...
@@ -1022,7 +1020,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1022,7 +1020,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
#endif
#endif
}
else
{
}
else
{
...
@@ -1030,13 +1028,13 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1030,13 +1028,13 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
}
}
...
@@ -1048,20 +1046,18 @@ static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1048,20 +1046,18 @@ static inline void RENAME(yuv2bgr24_2)(SwsContext *c, const uint16_t *buf0,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
...
@@ -1072,14 +1068,12 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1072,14 +1068,12 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
#ifdef DITHER1XBPP
...
@@ -1091,7 +1085,7 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1091,7 +1085,7 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
...
@@ -1102,14 +1096,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1102,14 +1096,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
#ifdef DITHER1XBPP
...
@@ -1121,11 +1113,11 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1121,11 +1113,11 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
#define REAL_YSCALEYUV2PACKED(index, c
, uv_off
) \
#define REAL_YSCALEYUV2PACKED(index, c) \
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
"movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
"movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
"psraw $3, %%mm0 \n\t"\
"psraw $3, %%mm0 \n\t"\
...
@@ -1137,10 +1129,10 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1137,10 +1129,10 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
"1: \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"add "
#uv_off"
, "#index" \n\t" \
"add "
UV_OFFx2"("#c")
, "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"sub "
#uv_off"
, "#index" \n\t" \
"sub "
UV_OFFx2"("#c")
, "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t"
/* uvbuf0[eax] - uvbuf1[eax]*/
\
"psubw %%mm3, %%mm2 \n\t"
/* uvbuf0[eax] - uvbuf1[eax]*/
\
"psubw %%mm4, %%mm5 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/
\
"psubw %%mm4, %%mm5 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/
\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
...
@@ -1163,7 +1155,7 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1163,7 +1155,7 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
"paddw %%mm0, %%mm1 \n\t"
/* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/
\
"paddw %%mm0, %%mm1 \n\t"
/* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/
\
"paddw %%mm6, %%mm7 \n\t"
/* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/
\
"paddw %%mm6, %%mm7 \n\t"
/* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/
\
#define YSCALEYUV2PACKED(index, c
, uv_off) REAL_YSCALEYUV2PACKED(index, c, uv_off
)
#define YSCALEYUV2PACKED(index, c
) REAL_YSCALEYUV2PACKED(index, c
)
static
inline
void
RENAME
(
yuv2yuyv422_2
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
static
inline
void
RENAME
(
yuv2yuyv422_2
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
ubuf0
,
...
@@ -1172,30 +1164,28 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1172,30 +1164,28 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2PACKED
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
#define REAL_YSCALEYUV2RGB1(index, c
, uv_off
) \
#define REAL_YSCALEYUV2RGB1(index, c) \
"xor "#index", "#index" \n\t"\
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t"
/* uvbuf0[eax]*/
\
"movq (%2, "#index"), %%mm3 \n\t"
/* uvbuf0[eax]*/
\
"add "
#uv_off"
, "#index" \n\t" \
"add "
UV_OFFx2"("#c")
, "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%2, "#index"), %%mm4 \n\t"
/* uvbuf0[eax+2048]*/
\
"sub "
#uv_off"
, "#index" \n\t" \
"sub "
UV_OFFx2"("#c")
, "#index" \n\t" \
"psraw $4, %%mm3 \n\t"
/* uvbuf0[eax] - uvbuf1[eax] >>4*/
\
"psraw $4, %%mm3 \n\t"
/* uvbuf0[eax] - uvbuf1[eax] >>4*/
\
"psraw $4, %%mm4 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/
\
"psraw $4, %%mm4 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/
\
"psubw "U_OFFSET"("#c"), %%mm3 \n\t"
/* (U-128)8*/
\
"psubw "U_OFFSET"("#c"), %%mm3 \n\t"
/* (U-128)8*/
\
...
@@ -1237,19 +1227,19 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1237,19 +1227,19 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
"packuswb %%mm6, %%mm5 \n\t"\
"packuswb %%mm6, %%mm5 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\
#define YSCALEYUV2RGB1(index, c
, uv_off) REAL_YSCALEYUV2RGB1(index, c, uv_off
)
#define YSCALEYUV2RGB1(index, c
) REAL_YSCALEYUV2RGB1(index, c
)
// do vertical chrominance interpolation
// do vertical chrominance interpolation
#define REAL_YSCALEYUV2RGB1b(index, c
, uv_off
) \
#define REAL_YSCALEYUV2RGB1b(index, c) \
"xor "#index", "#index" \n\t"\
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"add "
#uv_off"
, "#index" \n\t" \
"add "
UV_OFFx2"("#c")
, "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"sub "
#uv_off"
, "#index" \n\t" \
"sub "
UV_OFFx2"("#c")
, "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t"
/* uvbuf0[eax] + uvbuf1[eax]*/
\
"paddw %%mm2, %%mm3 \n\t"
/* uvbuf0[eax] + uvbuf1[eax]*/
\
"paddw %%mm5, %%mm4 \n\t"
/* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/
\
"paddw %%mm5, %%mm4 \n\t"
/* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/
\
"psrlw $5, %%mm3 \n\t"
/*FIXME might overflow*/
\
"psrlw $5, %%mm3 \n\t"
/*FIXME might overflow*/
\
...
@@ -1293,7 +1283,7 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
...
@@ -1293,7 +1283,7 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
"packuswb %%mm6, %%mm5 \n\t"\
"packuswb %%mm6, %%mm5 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\
#define YSCALEYUV2RGB1b(index, c
, uv_off) REAL_YSCALEYUV2RGB1b(index, c, uv_off
)
#define YSCALEYUV2RGB1b(index, c
) REAL_YSCALEYUV2RGB1b(index, c
)
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
"movq (%1, "#index", 2), %%mm7 \n\t"
/* abuf0[index ] */
\
"movq (%1, "#index", 2), %%mm7 \n\t"
/* abuf0[index ] */
\
...
@@ -1313,7 +1303,6 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1313,7 +1303,6 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
int
flags
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
@@ -1322,26 +1311,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1322,26 +1311,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
else
{
}
else
{
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
}
else
{
}
else
{
...
@@ -1350,26 +1339,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1350,26 +1339,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
else
{
}
else
{
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
}
}
...
@@ -1382,7 +1371,6 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1382,7 +1371,6 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
int
flags
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
@@ -1390,26 +1378,26 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1390,26 +1378,26 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
else
{
}
else
{
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
}
}
...
@@ -1421,7 +1409,6 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1421,7 +1409,6 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
int
flags
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
@@ -1429,7 +1416,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1429,7 +1416,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
#ifdef DITHER1XBPP
...
@@ -1441,14 +1428,14 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1441,14 +1428,14 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
else
{
}
else
{
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
#ifdef DITHER1XBPP
...
@@ -1460,7 +1447,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1460,7 +1447,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
}
}
...
@@ -1472,7 +1459,6 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1472,7 +1459,6 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
int
flags
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
@@ -1480,7 +1466,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1480,7 +1466,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
#ifdef DITHER1XBPP
...
@@ -1492,14 +1478,14 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1492,14 +1478,14 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
else
{
}
else
{
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
"pxor %%mm7, %%mm7
\n\t
"
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
#ifdef DITHER1XBPP
...
@@ -1511,19 +1497,19 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1511,19 +1497,19 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
}
}
#define REAL_YSCALEYUV2PACKED1(index, c
, uv_off
) \
#define REAL_YSCALEYUV2PACKED1(index, c) \
"xor "#index", "#index" \n\t"\
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t"
/* uvbuf0[eax]*/
\
"movq (%2, "#index"), %%mm3 \n\t"
/* uvbuf0[eax]*/
\
"add "
#uv_off"
, "#index" \n\t" \
"add "
UV_OFFx2"("#c")
, "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%2, "#index"), %%mm4 \n\t"
/* uvbuf0[eax+2048]*/
\
"sub "
#uv_off"
, "#index" \n\t" \
"sub "
UV_OFFx2"("#c")
, "#index" \n\t" \
"psraw $7, %%mm3 \n\t" \
"psraw $7, %%mm3 \n\t" \
"psraw $7, %%mm4 \n\t" \
"psraw $7, %%mm4 \n\t" \
"movq (%0, "#index", 2), %%mm1 \n\t"
/*buf0[eax]*/
\
"movq (%0, "#index", 2), %%mm1 \n\t"
/*buf0[eax]*/
\
...
@@ -1531,18 +1517,18 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1531,18 +1517,18 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"psraw $7, %%mm1 \n\t" \
"psraw $7, %%mm1 \n\t" \
"psraw $7, %%mm7 \n\t" \
"psraw $7, %%mm7 \n\t" \
#define YSCALEYUV2PACKED1(index, c
, uv_off) REAL_YSCALEYUV2PACKED1(index, c, uv_off
)
#define YSCALEYUV2PACKED1(index, c
) REAL_YSCALEYUV2PACKED1(index, c
)
#define REAL_YSCALEYUV2PACKED1b(index, c
, uv_off
) \
#define REAL_YSCALEYUV2PACKED1b(index, c) \
"xor "#index", "#index" \n\t"\
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"add "
#uv_off"
, "#index" \n\t" \
"add "
UV_OFFx2"("#c")
, "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"sub "
#uv_off"
, "#index" \n\t" \
"sub "
UV_OFFx2"("#c")
, "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t"
/* uvbuf0[eax] + uvbuf1[eax]*/
\
"paddw %%mm2, %%mm3 \n\t"
/* uvbuf0[eax] + uvbuf1[eax]*/
\
"paddw %%mm5, %%mm4 \n\t"
/* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/
\
"paddw %%mm5, %%mm4 \n\t"
/* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/
\
"psrlw $8, %%mm3 \n\t" \
"psrlw $8, %%mm3 \n\t" \
...
@@ -1551,7 +1537,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1551,7 +1537,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"movq 8(%0, "#index", 2), %%mm7 \n\t"
/*buf0[eax]*/
\
"movq 8(%0, "#index", 2), %%mm7 \n\t"
/*buf0[eax]*/
\
"psraw $7, %%mm1 \n\t" \
"psraw $7, %%mm1 \n\t" \
"psraw $7, %%mm7 \n\t"
"psraw $7, %%mm7 \n\t"
#define YSCALEYUV2PACKED1b(index, c
, uv_off) REAL_YSCALEYUV2PACKED1b(index, c, uv_off
)
#define YSCALEYUV2PACKED1b(index, c
) REAL_YSCALEYUV2PACKED1b(index, c
)
static
inline
void
RENAME
(
yuv2yuyv422_1
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
static
inline
void
RENAME
(
yuv2yuyv422_1
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
...
@@ -1560,7 +1546,6 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1560,7 +1546,6 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
int
flags
,
int
y
)
{
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
@@ -1568,24 +1553,24 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
...
@@ -1568,24 +1553,24 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED1
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2PACKED1
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
else
{
}
else
{
__asm__
volatile
(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED1b
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2PACKED1b
(
%%
REGBP
,
%
5
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
ubuf0
),
"D"
(
ubuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
"a"
(
&
c
->
redDither
)
);
);
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment