Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
b4a224c5
Commit
b4a224c5
authored
May 25, 2011
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale: split chroma buffers into separate U/V planes.
Preparatory step to implement support for sizes > VOFW.
parent
69645c02
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
359 additions
and
265 deletions
+359
-265
swscale.c
libswscale/swscale.c
+38
-30
swscale_internal.h
libswscale/swscale_internal.h
+18
-9
swscale_template.c
libswscale/swscale_template.c
+71
-59
utils.c
libswscale/utils.c
+12
-7
swscale_template.c
libswscale/x86/swscale_template.c
+220
-160
No files found.
libswscale/swscale.c
View file @
b4a224c5
...
...
@@ -207,7 +207,8 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
#endif
static
av_always_inline
void
yuv2yuvX16inC_template
(
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint16_t
*
dest
,
uint16_t
*
uDest
,
uint16_t
*
vDest
,
uint16_t
*
aDest
,
int
dstW
,
int
chrDstW
,
int
big_endian
,
int
output_bits
)
{
...
...
@@ -246,8 +247,8 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chr
Src
[
j
][
i
]
*
chrFilter
[
j
];
v
+=
chr
Src
[
j
][
i
+
VOFW
]
*
chrFilter
[
j
];
u
+=
chr
USrc
[
j
][
i
]
*
chrFilter
[
j
];
v
+=
chr
VSrc
[
j
][
i
]
*
chrFilter
[
j
];
}
output_pixel
(
&
uDest
[
i
],
u
);
...
...
@@ -271,13 +272,14 @@ static av_always_inline void yuv2yuvX16inC_template(const int16_t *lumFilter, co
#define yuv2NBPS(bits, BE_LE, is_be) \
static void yuv2yuvX ## bits ## BE_LE ## _c(const int16_t *lumFilter, \
const int16_t **lumSrc, int lumFilterSize, \
const int16_t *chrFilter, const int16_t **chrSrc, \
const int16_t *chrFilter, const int16_t **chrUSrc, \
const int16_t **chrVSrc, \
int chrFilterSize, const int16_t **alpSrc, \
uint16_t *dest, uint16_t *uDest, uint16_t *vDest, \
uint16_t *aDest, int dstW, int chrDstW) \
{ \
yuv2yuvX16inC_template(lumFilter, lumSrc, lumFilterSize, \
chrFilter, chrSrc, chrFilterSize, \
chrFilter, chr
USrc, chrV
Src, chrFilterSize, \
alpSrc, \
dest, uDest, vDest, aDest, \
dstW, chrDstW, is_be, bits); \
...
...
@@ -290,20 +292,20 @@ yuv2NBPS(16, BE, 1);
yuv2NBPS
(
16
,
LE
,
0
);
static
inline
void
yuv2yuvX16inC
(
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chr
USrc
,
const
int16_t
**
chrV
Src
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint16_t
*
dest
,
uint16_t
*
uDest
,
uint16_t
*
vDest
,
uint16_t
*
aDest
,
int
dstW
,
int
chrDstW
,
enum
PixelFormat
dstFormat
)
{
#define conv16(bits) \
if (isBE(dstFormat)) { \
yuv2yuvX ## bits ## BE_c(lumFilter, lumSrc, lumFilterSize, \
chrFilter, chrSrc, chrFilterSize, \
chrFilter, chr
USrc, chrV
Src, chrFilterSize, \
alpSrc, \
dest, uDest, vDest, aDest, \
dstW, chrDstW); \
} else { \
yuv2yuvX ## bits ## LE_c(lumFilter, lumSrc, lumFilterSize, \
chrFilter, chrSrc, chrFilterSize, \
chrFilter, chr
USrc, chrV
Src, chrFilterSize, \
alpSrc, \
dest, uDest, vDest, aDest, \
dstW, chrDstW); \
...
...
@@ -319,7 +321,8 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
}
static
inline
void
yuv2yuvXinC
(
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
)
{
//FIXME Optimize (just quickly written not optimized..)
...
...
@@ -339,8 +342,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
int
v
=
1
<<
18
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrSrc
[
j
][
i
]
*
chrFilter
[
j
];
v
+=
chr
Src
[
j
][
i
+
VOFW
]
*
chrFilter
[
j
];
u
+=
chr
U
Src
[
j
][
i
]
*
chrFilter
[
j
];
v
+=
chr
VSrc
[
j
][
i
]
*
chrFilter
[
j
];
}
uDest
[
i
]
=
av_clip_uint8
(
u
>>
19
);
...
...
@@ -360,7 +363,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
}
static
inline
void
yuv2nv12XinC
(
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
int
dstW
,
int
chrDstW
,
int
dstFormat
)
{
//FIXME Optimize (just quickly written not optimized..)
...
...
@@ -383,8 +387,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
int
v
=
1
<<
18
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrSrc
[
j
][
i
]
*
chrFilter
[
j
];
v
+=
chr
Src
[
j
][
i
+
VOFW
]
*
chrFilter
[
j
];
u
+=
chr
U
Src
[
j
][
i
]
*
chrFilter
[
j
];
v
+=
chr
VSrc
[
j
][
i
]
*
chrFilter
[
j
];
}
uDest
[
2
*
i
]
=
av_clip_uint8
(
u
>>
19
);
...
...
@@ -396,8 +400,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
int
v
=
1
<<
18
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrSrc
[
j
][
i
]
*
chrFilter
[
j
];
v
+=
chr
Src
[
j
][
i
+
VOFW
]
*
chrFilter
[
j
];
u
+=
chr
U
Src
[
j
][
i
]
*
chrFilter
[
j
];
v
+=
chr
VSrc
[
j
][
i
]
*
chrFilter
[
j
];
}
uDest
[
2
*
i
]
=
av_clip_uint8
(
v
>>
19
);
...
...
@@ -421,8 +425,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
Y2 += lumSrc[j][i2+1] * lumFilter[j];\
}\
for (j=0; j<chrFilterSize; j++) {\
U += chrSrc[j][i] * chrFilter[j];\
V += chr
Src[j][i+VOFW
] * chrFilter[j];\
U += chr
U
Src[j][i] * chrFilter[j];\
V += chr
VSrc[j][i
] * chrFilter[j];\
}\
Y1>>=19;\
Y2>>=19;\
...
...
@@ -469,8 +473,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
Y += lumSrc[j][i ] * lumFilter[j];\
}\
for (j=0; j<chrFilterSize; j++) {\
U += chr
Src[j][i
] * chrFilter[j];\
V += chr
Src[j][i+VOFW
] * chrFilter[j];\
U += chr
USrc[j][i
] * chrFilter[j];\
V += chr
VSrc[j][i
] * chrFilter[j];\
}\
Y >>=10;\
U >>=10;\
...
...
@@ -535,8 +539,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
const int i2= 2*i; \
int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
int U= (u
vbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19;
\
int V= (
uvbuf0[i+VOFW]*uvalpha1+uvbuf1[i+VOFW]*uvalpha)>>19;
\
int U= (u
buf0[i]*uvalpha1+ubuf1[i]*uvalpha)>>19;
\
int V= (
vbuf0[i]*uvalpha1+vbuf1[i]*uvalpha)>>19;
\
type av_unused *r, *b, *g; \
int av_unused A1, A2; \
if (alpha) {\
...
...
@@ -561,8 +565,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (u
vbuf1[i
])>>7;\
int V= (
uvbuf1[i+VOFW
])>>7;\
int U= (u
buf1[i
])>>7;\
int V= (
vbuf1[i
])>>7;\
type av_unused *r, *b, *g;\
int av_unused A1, A2;\
if (alpha) {\
...
...
@@ -587,8 +591,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
const int i2= 2*i;\
int Y1= buf0[i2 ]>>7;\
int Y2= buf0[i2+1]>>7;\
int U= (u
vbuf0[i ] + uvbuf1[i
])>>8;\
int V= (
uvbuf0[i+VOFW] + uvbuf1[i+VOFW
])>>8;\
int U= (u
buf0[i] + ubuf1[i
])>>8;\
int V= (
vbuf0[i] + vbuf1[i
])>>8;\
type av_unused *r, *b, *g;\
int av_unused A1, A2;\
if (alpha) {\
...
...
@@ -870,16 +874,20 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
break;\
}
static
inline
void
yuv2packedXinC
(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
static
inline
void
yuv2packedXinC
(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
int
dstW
,
int
y
)
{
int
i
;
YSCALE_YUV_2_ANYRGB_C
(
YSCALE_YUV_2_RGBX_C
,
YSCALE_YUV_2_PACKEDX_C
(
void
,
0
),
YSCALE_YUV_2_GRAY16_C
,
YSCALE_YUV_2_MONOX_C
)
}
static
inline
void
yuv2rgbXinC_full
(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
static
inline
void
yuv2rgbXinC_full
(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
int
dstW
,
int
y
)
{
int
i
;
...
...
libswscale/swscale_internal.h
View file @
b4a224c5
...
...
@@ -112,7 +112,8 @@ typedef struct SwsContext {
*/
//@{
int16_t
**
lumPixBuf
;
///< Ring buffer for scaled horizontal luma plane lines to be fed to the vertical scaler.
int16_t
**
chrPixBuf
;
///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int16_t
**
chrUPixBuf
;
///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int16_t
**
chrVPixBuf
;
///< Ring buffer for scaled horizontal chroma plane lines to be fed to the vertical scaler.
int16_t
**
alpPixBuf
;
///< Ring buffer for scaled horizontal alpha plane lines to be fed to the vertical scaler.
int
vLumBufSize
;
///< Number of vertical luma/alpha lines allocated in the ring buffer.
int
vChrBufSize
;
///< Number of vertical chroma lines allocated in the ring buffer.
...
...
@@ -200,6 +201,7 @@ typedef struct SwsContext {
#define V_TEMP "11*8+4*4*256*2+32"
#define Y_TEMP "11*8+4*4*256*2+40"
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define UV_OFF "11*8+4*4*256*3+48"
DECLARE_ALIGNED
(
8
,
uint64_t
,
redDither
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
greenDither
);
...
...
@@ -222,6 +224,7 @@ typedef struct SwsContext {
DECLARE_ALIGNED
(
8
,
uint64_t
,
v_temp
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
y_temp
);
int32_t
alpMmxFilter
[
4
*
MAX_FILTER_SIZE
];
DECLARE_ALIGNED
(
8
,
ptrdiff_t
,
uv_off
);
///< offset (in pixels) between u and v planes
#if HAVE_ALTIVEC
vector
signed
short
CY
;
...
...
@@ -255,36 +258,42 @@ typedef struct SwsContext {
/* function pointers for swScale() */
void
(
*
yuv2nv12X
)(
struct
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
int
dstW
,
int
chrDstW
,
int
dstFormat
);
void
(
*
yuv2yuv1
)(
struct
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrSrc
,
const
int16_t
*
alpSrc
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
long
dstW
,
long
chrDstW
);
void
(
*
yuv2yuvX
)(
struct
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
long
dstW
,
long
chrDstW
);
void
(
*
yuv2packed1
)(
struct
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
uint8_t
*
dest
,
int
dstW
,
int
uvalpha
,
int
dstFormat
,
int
flags
,
int
y
);
void
(
*
yuv2packed2
)(
struct
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
);
void
(
*
yuv2packedX
)(
struct
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
int
chrFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
);
...
...
@@ -299,7 +308,7 @@ typedef struct SwsContext {
int16_t
*
dst
,
long
dstWidth
,
const
uint8_t
*
src
,
int
srcW
,
int
xInc
);
void
(
*
hcscale_fast
)(
struct
SwsContext
*
c
,
int16_t
*
dst
,
long
dstWidth
,
int16_t
*
dst
1
,
int16_t
*
dst2
,
long
dstWidth
,
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
int
srcW
,
int
xInc
);
...
...
@@ -308,7 +317,7 @@ typedef struct SwsContext {
long
filterSize
);
void
(
*
lumConvertRange
)(
uint16_t
*
dst
,
int
width
);
///< Color range conversion function for luma plane if needed.
void
(
*
chrConvertRange
)(
uint16_t
*
dst
,
int
width
);
///< Color range conversion function for chroma planes if needed.
void
(
*
chrConvertRange
)(
uint16_t
*
dst
1
,
uint16_t
*
dst2
,
int
width
);
///< Color range conversion function for chroma planes if needed.
int
lumSrcOffset
;
///< Offset given to luma src pointers passed to horizontal input functions.
int
chrSrcOffset
;
///< Offset given to chroma src pointers passed to horizontal input functions.
...
...
libswscale/swscale_template.c
View file @
b4a224c5
...
...
@@ -20,29 +20,32 @@
static
inline
void
yuv2yuvX_c
(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
long
dstW
,
long
chrDstW
)
{
yuv2yuvXinC
(
lumFilter
,
lumSrc
,
lumFilterSize
,
chrFilter
,
chrSrc
,
chrFilterSize
,
chrFilter
,
chr
USrc
,
chrV
Src
,
chrFilterSize
,
alpSrc
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
}
static
inline
void
yuv2nv12X_c
(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
int
dstW
,
int
chrDstW
,
enum
PixelFormat
dstFormat
)
{
yuv2nv12XinC
(
lumFilter
,
lumSrc
,
lumFilterSize
,
chrFilter
,
chrSrc
,
chrFilterSize
,
chrFilter
,
chr
USrc
,
chrV
Src
,
chrFilterSize
,
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
);
}
static
inline
void
yuv2yuv1_c
(
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrSrc
,
const
int16_t
*
alpSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
long
dstW
,
long
chrDstW
)
{
...
...
@@ -60,8 +63,8 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
if
(
uDest
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
(
chr
Src
[
i
]
+
64
)
>>
7
;
int
v
=
(
chr
Src
[
i
+
VOFW
]
+
64
)
>>
7
;
int
u
=
(
chr
USrc
[
i
]
+
64
)
>>
7
;
int
v
=
(
chr
VSrc
[
i
]
+
64
)
>>
7
;
if
((
u
|
v
)
&
256
)
{
if
(
u
<
0
)
u
=
0
;
...
...
@@ -87,12 +90,13 @@ static inline void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
*/
static
inline
void
yuv2packedX_c
(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
yuv2packedXinC
(
c
,
lumFilter
,
lumSrc
,
lumFilterSize
,
chrFilter
,
chrSrc
,
chrFilterSize
,
chrFilter
,
chr
USrc
,
chrV
Src
,
chrFilterSize
,
alpSrc
,
dest
,
dstW
,
dstY
);
}
...
...
@@ -100,8 +104,9 @@ static inline void yuv2packedX_c(SwsContext *c, const int16_t *lumFilter,
* vertical bilinear scale YV12 to RGB
*/
static
inline
void
yuv2packed2_c
(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
...
...
@@ -116,7 +121,8 @@ static inline void yuv2packed2_c(SwsContext *c, const uint16_t *buf0,
* YV12 to RGB without scaling or interpolating
*/
static
inline
void
yuv2packed1_c
(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
uint8_t
*
dest
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
...
...
@@ -359,20 +365,20 @@ static inline void hScale_c(int16_t *dst, int dstW, const uint8_t *src,
//FIXME all pal and rgb srcFormats could do this convertion as well
//FIXME all scalers more complex than bilinear could do half of this transform
static
void
chrRangeToJpeg_c
(
uint16_t
*
dst
,
int
width
)
static
void
chrRangeToJpeg_c
(
uint16_t
*
dst
U
,
uint16_t
*
dstV
,
int
width
)
{
int
i
;
for
(
i
=
0
;
i
<
width
;
i
++
)
{
dst
[
i
]
=
(
FFMIN
(
dst
[
i
],
30775
)
*
4663
-
9289992
)
>>
12
;
//-264
dst
[
i
+
VOFW
]
=
(
FFMIN
(
dst
[
i
+
VOFW
],
30775
)
*
4663
-
9289992
)
>>
12
;
//-264
dst
U
[
i
]
=
(
FFMIN
(
dstU
[
i
],
30775
)
*
4663
-
9289992
)
>>
12
;
//-264
dst
V
[
i
]
=
(
FFMIN
(
dstV
[
i
],
30775
)
*
4663
-
9289992
)
>>
12
;
//-264
}
}
static
void
chrRangeFromJpeg_c
(
uint16_t
*
dst
,
int
width
)
static
void
chrRangeFromJpeg_c
(
uint16_t
*
dst
U
,
uint16_t
*
dstV
,
int
width
)
{
int
i
;
for
(
i
=
0
;
i
<
width
;
i
++
)
{
dst
[
i
]
=
(
dst
[
i
]
*
1799
+
4081085
)
>>
11
;
//1469
dst
[
i
+
VOFW
]
=
(
dst
[
i
+
VOFW
]
*
1799
+
4081085
)
>>
11
;
//1469
dst
U
[
i
]
=
(
dstU
[
i
]
*
1799
+
4081085
)
>>
11
;
//1469
dst
V
[
i
]
=
(
dstV
[
i
]
*
1799
+
4081085
)
>>
11
;
//1469
}
}
static
void
lumRangeToJpeg_c
(
uint16_t
*
dst
,
int
width
)
...
...
@@ -429,7 +435,7 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
convertRange
(
dst
,
dstWidth
);
}
static
inline
void
hcscale_fast_c
(
SwsContext
*
c
,
int16_t
*
dst
,
static
inline
void
hcscale_fast_c
(
SwsContext
*
c
,
int16_t
*
dst
1
,
int16_t
*
dst2
,
long
dstWidth
,
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
int
srcW
,
int
xInc
)
{
...
...
@@ -438,17 +444,13 @@ static inline void hcscale_fast_c(SwsContext *c, int16_t *dst,
for
(
i
=
0
;
i
<
dstWidth
;
i
++
)
{
register
unsigned
int
xx
=
xpos
>>
16
;
register
unsigned
int
xalpha
=
(
xpos
&
0xFFFF
)
>>
9
;
dst
[
i
]
=
(
src1
[
xx
]
*
(
xalpha
^
127
)
+
src1
[
xx
+
1
]
*
xalpha
);
dst
[
i
+
VOFW
]
=
(
src2
[
xx
]
*
(
xalpha
^
127
)
+
src2
[
xx
+
1
]
*
xalpha
);
/* slower
dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha;
dst[i+VOFW]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha;
*/
dst1
[
i
]
=
(
src1
[
xx
]
*
(
xalpha
^
127
)
+
src1
[
xx
+
1
]
*
xalpha
);
dst2
[
i
]
=
(
src2
[
xx
]
*
(
xalpha
^
127
)
+
src2
[
xx
+
1
]
*
xalpha
);
xpos
+=
xInc
;
}
}
inline
static
void
hcscale_c
(
SwsContext
*
c
,
uint16_t
*
dst
,
long
dstWidth
,
inline
static
void
hcscale_c
(
SwsContext
*
c
,
uint16_t
*
dst
1
,
uint16_t
*
dst2
,
long
dstWidth
,
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
int
srcW
,
int
xInc
,
const
int16_t
*
hChrFilter
,
const
int16_t
*
hChrFilterPos
,
int
hChrFilterSize
,
...
...
@@ -466,14 +468,14 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
}
if
(
!
c
->
hcscale_fast
)
{
c
->
hScale
(
dst
,
dstWidth
,
src1
,
srcW
,
xInc
,
hChrFilter
,
hChrFilterPos
,
hChrFilterSize
);
c
->
hScale
(
dst
+
VOFW
,
dstWidth
,
src2
,
srcW
,
xInc
,
hChrFilter
,
hChrFilterPos
,
hChrFilterSize
);
c
->
hScale
(
dst
1
,
dstWidth
,
src1
,
srcW
,
xInc
,
hChrFilter
,
hChrFilterPos
,
hChrFilterSize
);
c
->
hScale
(
dst
2
,
dstWidth
,
src2
,
srcW
,
xInc
,
hChrFilter
,
hChrFilterPos
,
hChrFilterSize
);
}
else
{
// fast bilinear upscale / crap downscale
c
->
hcscale_fast
(
c
,
dst
,
dstWidth
,
src1
,
src2
,
srcW
,
xInc
);
c
->
hcscale_fast
(
c
,
dst
1
,
dst2
,
dstWidth
,
src1
,
src2
,
srcW
,
xInc
);
}
if
(
c
->
chrConvertRange
)
c
->
chrConvertRange
(
dst
,
dstWidth
);
c
->
chrConvertRange
(
dst
1
,
dst2
,
dstWidth
);
}
#define DEBUG_SWSCALE_BUFFERS 0
...
...
@@ -513,7 +515,8 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
const
int
hLumFilterSize
=
c
->
hLumFilterSize
;
const
int
hChrFilterSize
=
c
->
hChrFilterSize
;
int16_t
**
lumPixBuf
=
c
->
lumPixBuf
;
int16_t
**
chrPixBuf
=
c
->
chrPixBuf
;
int16_t
**
chrUPixBuf
=
c
->
chrUPixBuf
;
int16_t
**
chrVPixBuf
=
c
->
chrVPixBuf
;
int16_t
**
alpPixBuf
=
c
->
alpPixBuf
;
const
int
vLumBufSize
=
c
->
vLumBufSize
;
const
int
vChrBufSize
=
c
->
vChrBufSize
;
...
...
@@ -641,10 +644,10 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
//FIXME replace parameters through context struct (some at least)
if
(
c
->
needs_hcscale
)
hcscale_c
(
c
,
chr
PixBuf
[
chrBufIndex
],
chrDstW
,
src1
,
src2
,
chrSrcW
,
chrXInc
,
hChrFilter
,
hChrFilterPos
,
hChrFilterSize
,
formatConvBuffer
,
pal
);
hcscale_c
(
c
,
chr
UPixBuf
[
chrBufIndex
],
chrVPixBuf
[
chrBufIndex
]
,
chrDstW
,
src1
,
src2
,
chrSrcW
,
chrXInc
,
hChrFilter
,
hChrFilterPos
,
hChrFilterSize
,
formatConvBuffer
,
pal
);
lastInChrBuf
++
;
DEBUG_BUFFERS
(
"
\t\t
chrBufIndex %d: lastInChrBuf: %d
\n
"
,
chrBufIndex
,
lastInChrBuf
);
...
...
@@ -660,47 +663,54 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
#endif
if
(
dstY
<
dstH
-
2
)
{
const
int16_t
**
lumSrcPtr
=
(
const
int16_t
**
)
lumPixBuf
+
lumBufIndex
+
firstLumSrcY
-
lastInLumBuf
+
vLumBufSize
;
const
int16_t
**
chrSrcPtr
=
(
const
int16_t
**
)
chrPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
chrUSrcPtr
=
(
const
int16_t
**
)
chrUPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
chrVSrcPtr
=
(
const
int16_t
**
)
chrVPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
alpSrcPtr
=
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
?
(
const
int16_t
**
)
alpPixBuf
+
lumBufIndex
+
firstLumSrcY
-
lastInLumBuf
+
vLumBufSize
:
NULL
;
if
(
dstFormat
==
PIX_FMT_NV12
||
dstFormat
==
PIX_FMT_NV21
)
{
const
int
chrSkipMask
=
(
1
<<
c
->
chrDstVSubSample
)
-
1
;
if
(
dstY
&
chrSkipMask
)
uDest
=
NULL
;
//FIXME split functions in lumi / chromi
c
->
yuv2nv12X
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
);
}
else
if
(
isPlanarYUV
(
dstFormat
)
||
dstFormat
==
PIX_FMT_GRAY8
)
{
//YV12 like
const
int
chrSkipMask
=
(
1
<<
c
->
chrDstVSubSample
)
-
1
;
if
((
dstY
&
chrSkipMask
)
||
isGray
(
dstFormat
))
uDest
=
vDest
=
NULL
;
//FIXME split functions in lumi / chromi
if
(
is16BPS
(
dstFormat
)
||
is9_OR_10BPS
(
dstFormat
))
{
yuv2yuvX16inC
(
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
(
uint16_t
*
)
dest
,
(
uint16_t
*
)
uDest
,
(
uint16_t
*
)
vDest
,
(
uint16_t
*
)
aDest
,
dstW
,
chrDstW
,
yuv2yuvX16inC
(
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrUSrcPtr
,
chrVSrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
(
uint16_t
*
)
dest
,
(
uint16_t
*
)
uDest
,
(
uint16_t
*
)
vDest
,
(
uint16_t
*
)
aDest
,
dstW
,
chrDstW
,
dstFormat
);
}
else
if
(
vLumFilterSize
==
1
&&
vChrFilterSize
==
1
)
{
// unscaled YV12
const
int16_t
*
lumBuf
=
lumSrcPtr
[
0
];
const
int16_t
*
chrBuf
=
chrSrcPtr
[
0
];
const
int16_t
*
chrUBuf
=
chrUSrcPtr
[
0
];
const
int16_t
*
chrVBuf
=
chrVSrcPtr
[
0
];
const
int16_t
*
alpBuf
=
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
?
alpSrcPtr
[
0
]
:
NULL
;
c
->
yuv2yuv1
(
c
,
lumBuf
,
chrBuf
,
alpBuf
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
c
->
yuv2yuv1
(
c
,
lumBuf
,
chrUBuf
,
chrVBuf
,
alpBuf
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
}
else
{
//General YV12
c
->
yuv2yuvX
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrUSrcPtr
,
chrVSrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
}
}
else
{
assert
(
lumSrcPtr
+
vLumFilterSize
-
1
<
lumPixBuf
+
vLumBufSize
*
2
);
assert
(
chr
SrcPtr
+
vChrFilterSize
-
1
<
chr
PixBuf
+
vChrBufSize
*
2
);
assert
(
lumSrcPtr
+
vLumFilterSize
-
1
<
lumPixBuf
+
vLumBufSize
*
2
);
assert
(
chr
USrcPtr
+
vChrFilterSize
-
1
<
chrU
PixBuf
+
vChrBufSize
*
2
);
if
(
vLumFilterSize
==
1
&&
vChrFilterSize
==
2
)
{
//unscaled RGB
int
chrAlpha
=
vChrFilter
[
2
*
dstY
+
1
];
if
(
flags
&
SWS_FULL_CHR_H_INT
)
{
yuv2rgbXinC_full
(
c
,
//FIXME write a packed1_full function
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chrUSrcPtr
,
chrVSrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
dstW
,
dstY
);
}
else
{
c
->
yuv2packed1
(
c
,
*
lumSrcPtr
,
*
chrSrcPtr
,
*
(
chrSrcPtr
+
1
),
c
->
yuv2packed1
(
c
,
*
lumSrcPtr
,
*
chrUSrcPtr
,
*
(
chrUSrcPtr
+
1
),
*
chrVSrcPtr
,
*
(
chrVSrcPtr
+
1
),
alpPixBuf
?
*
alpSrcPtr
:
NULL
,
dest
,
dstW
,
chrAlpha
,
dstFormat
,
flags
,
dstY
);
}
...
...
@@ -714,10 +724,11 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
if
(
flags
&
SWS_FULL_CHR_H_INT
)
{
yuv2rgbXinC_full
(
c
,
//FIXME write a packed2_full function
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
dstW
,
dstY
);
}
else
{
c
->
yuv2packed2
(
c
,
*
lumSrcPtr
,
*
(
lumSrcPtr
+
1
),
*
chrSrcPtr
,
*
(
chrSrcPtr
+
1
),
c
->
yuv2packed2
(
c
,
*
lumSrcPtr
,
*
(
lumSrcPtr
+
1
),
*
chrUSrcPtr
,
*
(
chrUSrcPtr
+
1
),
*
chrVSrcPtr
,
*
(
chrVSrcPtr
+
1
),
alpPixBuf
?
*
alpSrcPtr
:
NULL
,
alpPixBuf
?
*
(
alpSrcPtr
+
1
)
:
NULL
,
dest
,
dstW
,
lumAlpha
,
chrAlpha
,
dstY
);
}
...
...
@@ -725,26 +736,27 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
if
(
flags
&
SWS_FULL_CHR_H_INT
)
{
yuv2rgbXinC_full
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
dstW
,
dstY
);
}
else
{
c
->
yuv2packedX
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
dstW
,
dstY
);
}
}
}
}
else
{
// hmm looks like we can't use MMX here without overwriting this array's tail
const
int16_t
**
lumSrcPtr
=
(
const
int16_t
**
)
lumPixBuf
+
lumBufIndex
+
firstLumSrcY
-
lastInLumBuf
+
vLumBufSize
;
const
int16_t
**
chrSrcPtr
=
(
const
int16_t
**
)
chrPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
chrUSrcPtr
=
(
const
int16_t
**
)
chrUPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
chrVSrcPtr
=
(
const
int16_t
**
)
chrVPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
alpSrcPtr
=
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
?
(
const
int16_t
**
)
alpPixBuf
+
lumBufIndex
+
firstLumSrcY
-
lastInLumBuf
+
vLumBufSize
:
NULL
;
if
(
dstFormat
==
PIX_FMT_NV12
||
dstFormat
==
PIX_FMT_NV21
)
{
const
int
chrSkipMask
=
(
1
<<
c
->
chrDstVSubSample
)
-
1
;
if
(
dstY
&
chrSkipMask
)
uDest
=
NULL
;
//FIXME split functions in lumi / chromi
yuv2nv12XinC
(
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
);
}
else
if
(
isPlanarYUV
(
dstFormat
)
||
dstFormat
==
PIX_FMT_GRAY8
)
{
//YV12
const
int
chrSkipMask
=
(
1
<<
c
->
chrDstVSubSample
)
-
1
;
...
...
@@ -752,27 +764,27 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
if
(
is16BPS
(
dstFormat
)
||
is9_OR_10BPS
(
dstFormat
))
{
yuv2yuvX16inC
(
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
(
uint16_t
*
)
dest
,
(
uint16_t
*
)
uDest
,
(
uint16_t
*
)
vDest
,
(
uint16_t
*
)
aDest
,
dstW
,
chrDstW
,
dstFormat
);
}
else
{
yuv2yuvXinC
(
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
}
}
else
{
assert
(
lumSrcPtr
+
vLumFilterSize
-
1
<
lumPixBuf
+
vLumBufSize
*
2
);
assert
(
chr
SrcPtr
+
vChrFilterSize
-
1
<
chr
PixBuf
+
vChrBufSize
*
2
);
assert
(
chr
USrcPtr
+
vChrFilterSize
-
1
<
chrU
PixBuf
+
vChrBufSize
*
2
);
if
(
flags
&
SWS_FULL_CHR_H_INT
)
{
yuv2rgbXinC_full
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
dstW
,
dstY
);
}
else
{
yuv2packedXinC
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chrSrcPtr
,
vChrFilterSize
,
vChrFilter
+
dstY
*
vChrFilterSize
,
chr
USrcPtr
,
chrV
SrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
dstW
,
dstY
);
}
}
...
...
libswscale/utils.c
View file @
b4a224c5
...
...
@@ -989,7 +989,8 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
// allocate pixbufs (we use dynamic allocation because otherwise we would need to
// allocate several megabytes to handle all possible cases)
FF_ALLOC_OR_GOTO
(
c
,
c
->
lumPixBuf
,
c
->
vLumBufSize
*
2
*
sizeof
(
int16_t
*
),
fail
);
FF_ALLOC_OR_GOTO
(
c
,
c
->
chrPixBuf
,
c
->
vChrBufSize
*
2
*
sizeof
(
int16_t
*
),
fail
);
FF_ALLOC_OR_GOTO
(
c
,
c
->
chrUPixBuf
,
c
->
vChrBufSize
*
2
*
sizeof
(
int16_t
*
),
fail
);
FF_ALLOC_OR_GOTO
(
c
,
c
->
chrVPixBuf
,
c
->
vChrBufSize
*
2
*
sizeof
(
int16_t
*
),
fail
);
if
(
CONFIG_SWSCALE_ALPHA
&&
isALPHA
(
c
->
srcFormat
)
&&
isALPHA
(
c
->
dstFormat
))
FF_ALLOCZ_OR_GOTO
(
c
,
c
->
alpPixBuf
,
c
->
vLumBufSize
*
2
*
sizeof
(
int16_t
*
),
fail
);
//Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
...
...
@@ -998,9 +999,11 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
FF_ALLOCZ_OR_GOTO
(
c
,
c
->
lumPixBuf
[
i
+
c
->
vLumBufSize
],
VOF
+
1
,
fail
);
c
->
lumPixBuf
[
i
]
=
c
->
lumPixBuf
[
i
+
c
->
vLumBufSize
];
}
c
->
uv_off
=
VOFW
;
for
(
i
=
0
;
i
<
c
->
vChrBufSize
;
i
++
)
{
FF_ALLOC_OR_GOTO
(
c
,
c
->
chrPixBuf
[
i
+
c
->
vChrBufSize
],
(
VOF
+
1
)
*
2
,
fail
);
c
->
chrPixBuf
[
i
]
=
c
->
chrPixBuf
[
i
+
c
->
vChrBufSize
];
FF_ALLOC_OR_GOTO
(
c
,
c
->
chrUPixBuf
[
i
+
c
->
vChrBufSize
],
VOF
*
2
+
1
,
fail
);
c
->
chrUPixBuf
[
i
]
=
c
->
chrUPixBuf
[
i
+
c
->
vChrBufSize
];
c
->
chrVPixBuf
[
i
]
=
c
->
chrVPixBuf
[
i
+
c
->
vChrBufSize
]
=
c
->
chrUPixBuf
[
i
]
+
VOFW
;
}
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
alpPixBuf
)
for
(
i
=
0
;
i
<
c
->
vLumBufSize
;
i
++
)
{
...
...
@@ -1009,7 +1012,8 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
}
//try to avoid drawing green stuff between the right end and the stride end
for
(
i
=
0
;
i
<
c
->
vChrBufSize
;
i
++
)
memset
(
c
->
chrPixBuf
[
i
],
64
,
(
VOF
+
1
)
*
2
);
for
(
i
=
0
;
i
<
c
->
vChrBufSize
;
i
++
)
memset
(
c
->
chrUPixBuf
[
i
],
64
,
VOF
*
2
+
1
);
assert
(
2
*
VOFW
==
VOF
);
...
...
@@ -1462,10 +1466,11 @@ void sws_freeContext(SwsContext *c)
av_freep
(
&
c
->
lumPixBuf
);
}
if
(
c
->
chrPixBuf
)
{
if
(
c
->
chr
U
PixBuf
)
{
for
(
i
=
0
;
i
<
c
->
vChrBufSize
;
i
++
)
av_freep
(
&
c
->
chrPixBuf
[
i
]);
av_freep
(
&
c
->
chrPixBuf
);
av_freep
(
&
c
->
chrUPixBuf
[
i
]);
av_freep
(
&
c
->
chrUPixBuf
);
av_freep
(
&
c
->
chrVPixBuf
);
}
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
alpPixBuf
)
{
...
...
libswscale/x86/swscale_template.c
View file @
b4a224c5
...
...
@@ -37,9 +37,8 @@
#endif
#define MOVNTQ(a,b) REAL_MOVNTQ(a,b)
#define YSCALEYUV2YV12X(
x, offset, dest, width
) \
#define YSCALEYUV2YV12X(
offset, dest, end, pos
) \
__asm__ volatile(\
"xor %%"REG_a", %%"REG_a" \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
...
...
@@ -47,8 +46,8 @@
".p2align 4 \n\t"
/* FIXME Unroll? */
\
"1: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t"
/* filterCoeff */
\
"movq
" x "(%%"REG_S", %%"REG_a"
, 2), %%mm2 \n\t"
/* srcData */
\
"movq
8+" x "(%%"REG_S", %%"REG_a"
, 2), %%mm5 \n\t"
/* srcData */
\
"movq
(%%"REG_S", %3
, 2), %%mm2 \n\t"
/* srcData */
\
"movq
8(%%"REG_S", %3
, 2), %%mm5 \n\t"
/* srcData */
\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
...
...
@@ -61,40 +60,40 @@
"psraw $3, %%mm4 \n\t"\
"packuswb %%mm4, %%mm3 \n\t"\
MOVNTQ(%%mm3, (%1, %%REGa))\
"add $8, %
%"REG_a"
\n\t"\
"cmp %2, %
%"REG_a"
\n\t"\
"add $8, %
3
\n\t"\
"cmp %2, %
3
\n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm3 \n\t"\
"movq %%mm3, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
"r" (dest), "g" ((x86_reg)width
)\
: "%"REG_
a, "%"REG_
d, "%"REG_S\
"r" (dest), "g" ((x86_reg)(end)), "r"((x86_reg)(pos)
)\
: "%"REG_d, "%"REG_S\
);
static
inline
void
RENAME
(
yuv2yuvX
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
long
dstW
,
long
chrDstW
)
{
if
(
uDest
)
{
YSCALEYUV2YV12X
(
"0"
,
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
)
YSCALEYUV2YV12X
(
AV_STRINGIFY
(
VOF
),
CHR_MMX_FILTER_OFFSET
,
vDest
,
chrDstW
)
YSCALEYUV2YV12X
(
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
,
0
)
YSCALEYUV2YV12X
(
CHR_MMX_FILTER_OFFSET
,
vDest
,
chrDstW
+
c
->
uv_off
,
c
->
uv_off
)
}
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
{
YSCALEYUV2YV12X
(
"0"
,
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
)
YSCALEYUV2YV12X
(
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
,
0
)
}
YSCALEYUV2YV12X
(
"0"
,
LUM_MMX_FILTER_OFFSET
,
dest
,
dstW
)
YSCALEYUV2YV12X
(
LUM_MMX_FILTER_OFFSET
,
dest
,
dstW
,
0
)
}
#define YSCALEYUV2YV12X_ACCURATE(
x, offset, dest, width
) \
#define YSCALEYUV2YV12X_ACCURATE(
offset, dest, end, pos
) \
__asm__ volatile(\
"lea " offset "(%0), %%"REG_d" \n\t"\
"xor %%"REG_a", %%"REG_a" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
...
...
@@ -102,10 +101,10 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
"mov (%%"REG_d"), %%"REG_S" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"movq
" x "(%%"REG_S", %%"REG_a"
, 2), %%mm0 \n\t"
/* srcData */
\
"movq
8+" x "(%%"REG_S", %%"REG_a"
, 2), %%mm2 \n\t"
/* srcData */
\
"movq
(%%"REG_S", %3
, 2), %%mm0 \n\t"
/* srcData */
\
"movq
8(%%"REG_S", %3
, 2), %%mm2 \n\t"
/* srcData */
\
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
"movq
" x "(%%"REG_S", %%"REG_a"
, 2), %%mm1 \n\t"
/* srcData */
\
"movq
(%%"REG_S", %3
, 2), %%mm1 \n\t"
/* srcData */
\
"movq %%mm0, %%mm3 \n\t"\
"punpcklwd %%mm1, %%mm0 \n\t"\
"punpckhwd %%mm1, %%mm3 \n\t"\
...
...
@@ -114,7 +113,7 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq
8+" x "(%%"REG_S", %%"REG_a"
, 2), %%mm3 \n\t"
/* srcData */
\
"movq
8(%%"REG_S", %3
, 2), %%mm3 \n\t"
/* srcData */
\
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
...
...
@@ -139,8 +138,8 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
"psraw $3, %%mm6 \n\t"\
"packuswb %%mm6, %%mm4 \n\t"\
MOVNTQ(%%mm4, (%1, %%REGa))\
"add $8, %
%"REG_a"
\n\t"\
"cmp %2, %
%"REG_a"
\n\t"\
"add $8, %
3
\n\t"\
"cmp %2, %
3
\n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
...
...
@@ -149,26 +148,27 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
"r" (dest), "g" ((x86_reg)
width
)\
"r" (dest), "g" ((x86_reg)
(end)), "r"((x86_reg)(pos)
)\
: "%"REG_a, "%"REG_d, "%"REG_S\
);
static
inline
void
RENAME
(
yuv2yuvX_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
long
dstW
,
long
chrDstW
)
{
if
(
uDest
)
{
YSCALEYUV2YV12X_ACCURATE
(
"0"
,
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
)
YSCALEYUV2YV12X_ACCURATE
(
AV_STRINGIFY
(
VOF
),
CHR_MMX_FILTER_OFFSET
,
vDest
,
chrDstW
)
YSCALEYUV2YV12X_ACCURATE
(
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
,
0
)
YSCALEYUV2YV12X_ACCURATE
(
CHR_MMX_FILTER_OFFSET
,
vDest
,
chrDstW
+
c
->
uv_off
,
c
->
uv_off
)
}
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
{
YSCALEYUV2YV12X_ACCURATE
(
"0"
,
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
)
YSCALEYUV2YV12X_ACCURATE
(
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
,
0
)
}
YSCALEYUV2YV12X_ACCURATE
(
"0"
,
LUM_MMX_FILTER_OFFSET
,
dest
,
dstW
)
YSCALEYUV2YV12X_ACCURATE
(
LUM_MMX_FILTER_OFFSET
,
dest
,
dstW
,
0
)
}
#define YSCALEYUV2YV121 \
...
...
@@ -185,12 +185,13 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
"jnc 1b \n\t"
static
inline
void
RENAME
(
yuv2yuv1
)(
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrSrc
,
const
int16_t
*
alpSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
long
dstW
,
long
chrDstW
)
{
long
p
=
4
;
const
uint8_t
*
src
[
4
]
=
{
alpSrc
+
dstW
,
lumSrc
+
dstW
,
chr
Src
+
chrDstW
,
chrSrc
+
VOFW
+
chrDstW
};
const
uint8_t
*
src
[
4
]
=
{
alpSrc
+
dstW
,
lumSrc
+
dstW
,
chr
USrc
+
chrDstW
,
chrVSrc
+
chrDstW
};
uint8_t
*
dst
[
4
]
=
{
aDest
,
dest
,
uDest
,
vDest
};
x86_reg
counter
[
4
]
=
{
dstW
,
dstW
,
chrDstW
,
chrDstW
};
...
...
@@ -225,12 +226,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
"jnc 1b \n\t"
static
inline
void
RENAME
(
yuv2yuv1_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrSrc
,
const
int16_t
*
alpSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
long
dstW
,
long
chrDstW
)
{
long
p
=
4
;
const
uint8_t
*
src
[
4
]
=
{
alpSrc
+
dstW
,
lumSrc
+
dstW
,
chr
Src
+
chrDstW
,
chrSrc
+
VOFW
+
chrDstW
};
const
uint8_t
*
src
[
4
]
=
{
alpSrc
+
dstW
,
lumSrc
+
dstW
,
chr
USrc
+
chrDstW
,
chrVSrc
+
chrDstW
};
uint8_t
*
dst
[
4
]
=
{
aDest
,
dest
,
uDest
,
vDest
};
x86_reg
counter
[
4
]
=
{
dstW
,
dstW
,
chrDstW
,
chrDstW
};
...
...
@@ -260,7 +262,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
"2: \n\t"\
"movq 8(%%"REG_d"), %%mm0 \n\t"
/* filterCoeff */
\
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t"
/* UsrcData */
\
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm5 \n\t"
/* VsrcData */
\
"add %6, %%"REG_S" \n\t" \
"movq (%%"REG_S", %%"REG_a"), %%mm5 \n\t"
/* VsrcData */
\
"add $16, %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"pmulhw %%mm0, %%mm2 \n\t"\
...
...
@@ -296,7 +299,7 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
#define YSCALEYUV2PACKEDX_END \
:: "r" (&c->redDither), \
"m" (dummy), "m" (dummy), "m" (dummy),\
"r" (dest), "m" (dstW_reg)
\
"r" (dest), "m" (dstW_reg)
, "m"(uv_off)
\
: "%"REG_a, "%"REG_d, "%"REG_S \
);
...
...
@@ -315,7 +318,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
".p2align 4 \n\t"\
"2: \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm0 \n\t"
/* UsrcData */
\
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm2 \n\t"
/* VsrcData */
\
"add %6, %%"REG_S" \n\t" \
"movq (%%"REG_S", %%"REG_a"), %%mm2 \n\t"
/* VsrcData */
\
"mov "STR(APCK_PTR2)"(%%"REG_d"), %%"REG_S" \n\t"\
"movq (%%"REG_S", %%"REG_a"), %%mm1 \n\t"
/* UsrcData */
\
"movq %%mm0, %%mm3 \n\t"\
...
...
@@ -326,7 +330,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
"pmaddwd %%mm1, %%mm3 \n\t"\
"paddd %%mm0, %%mm4 \n\t"\
"paddd %%mm3, %%mm5 \n\t"\
"movq "AV_STRINGIFY(VOF)"(%%"REG_S", %%"REG_a"), %%mm3 \n\t"
/* VsrcData */
\
"add %6, %%"REG_S" \n\t" \
"movq (%%"REG_S", %%"REG_a"), %%mm3 \n\t"
/* VsrcData */
\
"mov "STR(APCK_SIZE)"(%%"REG_d"), %%"REG_S" \n\t"\
"add $"STR(APCK_SIZE)", %%"REG_d" \n\t"\
"test %%"REG_S", %%"REG_S" \n\t"\
...
...
@@ -461,12 +466,14 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
static
inline
void
RENAME
(
yuv2rgb32_X_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
alpPixBuf
)
{
YSCALEYUV2PACKEDX_ACCURATE
...
...
@@ -492,12 +499,14 @@ static inline void RENAME(yuv2rgb32_X_ar)(SwsContext *c, const int16_t *lumFilte
static
inline
void
RENAME
(
yuv2rgb32_X
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
alpPixBuf
)
{
YSCALEYUV2PACKEDX
...
...
@@ -547,12 +556,14 @@ static inline void RENAME(yuv2rgb32_X)(SwsContext *c, const int16_t *lumFilter,
static
inline
void
RENAME
(
yuv2rgb565_X_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
...
...
@@ -569,12 +580,14 @@ static inline void RENAME(yuv2rgb565_X_ar)(SwsContext *c, const int16_t *lumFilt
static
inline
void
RENAME
(
yuv2rgb565_X
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
...
...
@@ -620,12 +633,14 @@ static inline void RENAME(yuv2rgb565_X)(SwsContext *c, const int16_t *lumFilter,
static
inline
void
RENAME
(
yuv2rgb555_X_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
...
...
@@ -642,12 +657,14 @@ static inline void RENAME(yuv2rgb555_X_ar)(SwsContext *c, const int16_t *lumFilt
static
inline
void
RENAME
(
yuv2rgb555_X
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
...
...
@@ -773,12 +790,14 @@ static inline void RENAME(yuv2rgb555_X)(SwsContext *c, const int16_t *lumFilter,
static
inline
void
RENAME
(
yuv2bgr24_X_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
YSCALEYUV2PACKEDX_ACCURATE
YSCALEYUV2RGBX
...
...
@@ -788,19 +807,21 @@ static inline void RENAME(yuv2bgr24_X_ar)(SwsContext *c, const int16_t *lumFilte
WRITEBGR24
(
%%
REGc
,
%
5
,
%%
REGa
)
::
"r"
(
&
c
->
redDither
),
"m"
(
dummy
),
"m"
(
dummy
),
"m"
(
dummy
),
"r"
(
dest
),
"m"
(
dstW_reg
)
"r"
(
dest
),
"m"
(
dstW_reg
)
,
"m"
(
uv_off
)
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
);
}
static
inline
void
RENAME
(
yuv2bgr24_X
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
YSCALEYUV2PACKEDX
YSCALEYUV2RGBX
...
...
@@ -810,7 +831,7 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
WRITEBGR24
(
%%
REGc
,
%
5
,
%%
REGa
)
::
"r"
(
&
c
->
redDither
),
"m"
(
dummy
),
"m"
(
dummy
),
"m"
(
dummy
),
"r"
(
dest
),
"m"
(
dstW_reg
)
"r"
(
dest
),
"m"
(
dstW_reg
)
,
"m"
(
uv_off
)
:
"%"
REG_a
,
"%"
REG_c
,
"%"
REG_d
,
"%"
REG_S
);
}
...
...
@@ -834,12 +855,14 @@ static inline void RENAME(yuv2bgr24_X)(SwsContext *c, const int16_t *lumFilter,
static
inline
void
RENAME
(
yuv2yuyv422_X_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
YSCALEYUV2PACKEDX_ACCURATE
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
...
...
@@ -853,12 +876,14 @@ static inline void RENAME(yuv2yuyv422_X_ar)(SwsContext *c, const int16_t *lumFil
static
inline
void
RENAME
(
yuv2yuyv422_X
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrSrc
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
long
dstW
,
long
dstY
)
{
x86_reg
dummy
=
0
;
x86_reg
dstW_reg
=
dstW
;
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
YSCALEYUV2PACKEDX
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
...
...
@@ -870,14 +895,16 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
YSCALEYUV2PACKEDX_END
}
#define REAL_YSCALEYUV2RGB_UV(index, c) \
#define REAL_YSCALEYUV2RGB_UV(index, c
, uv_off
) \
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"add "#uv_off", "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"sub "#uv_off", "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t"
/* uvbuf0[eax] - uvbuf1[eax]*/
\
"psubw %%mm4, %%mm5 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/
\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
...
...
@@ -940,8 +967,8 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
#define YSCALEYUV2RGB_YA(index, c, b1, b2) REAL_YSCALEYUV2RGB_YA(index, c, b1, b2)
#define YSCALEYUV2RGB(index, c) \
REAL_YSCALEYUV2RGB_UV(index, c) \
#define YSCALEYUV2RGB(index, c
, uv_off
) \
REAL_YSCALEYUV2RGB_UV(index, c
, uv_off
) \
REAL_YSCALEYUV2RGB_YA(index, c, %0, %1) \
REAL_YSCALEYUV2RGB_COEFF(c)
...
...
@@ -949,23 +976,26 @@ static inline void RENAME(yuv2yuyv422_X)(SwsContext *c, const int16_t *lumFilter
* vertical bilinear scale YV12 to RGB
*/
static
inline
void
RENAME
(
yuv2rgb32_2
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
if
(
CONFIG_SWSCALE_ALPHA
&&
c
->
alpPixBuf
)
{
#if ARCH_X86_64
__asm__
volatile
(
YSCALEYUV2RGB
(
%%
r8
,
%
5
)
YSCALEYUV2RGB
(
%%
r8
,
%
5
,
%
8
)
YSCALEYUV2RGB_YA
(
%%
r8
,
%
5
,
%
6
,
%
7
)
"psraw $3, %%mm1
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"psraw $3, %%mm7
\n\t
"
/* abuf0[eax] - abuf1[eax] >>7*/
"packuswb %%mm7, %%mm1
\n\t
"
WRITEBGR32
(
%
4
,
8280
(
%
5
),
%%
r8
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm1
,
%%
mm0
,
%%
mm7
,
%%
mm3
,
%%
mm6
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"r"
(
dest
),
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"r"
(
dest
),
"a"
(
&
c
->
redDither
),
"r"
(
abuf0
),
"r"
(
abuf1
)
"r"
(
abuf0
),
"r"
(
abuf1
)
,
"m"
(
uv_off
)
:
"%r8"
);
#else
...
...
@@ -975,7 +1005,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
"push %0
\n\t
"
"push %1
\n\t
"
"mov "
U_TEMP
"(%5), %0
\n\t
"
...
...
@@ -990,7 +1020,7 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
uvbuf0
),
"D"
(
uvbuf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
#endif
}
else
{
...
...
@@ -998,50 +1028,56 @@ static inline void RENAME(yuv2rgb32_2)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
}
static
inline
void
RENAME
(
yuv2bgr24_2
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
static
inline
void
RENAME
(
yuv2rgb555_2
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1052,23 +1088,26 @@ static inline void RENAME(yuv2rgb555_2)(SwsContext *c, const uint16_t *buf0,
WRITERGB15
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
static
inline
void
RENAME
(
yuv2rgb565_2
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1079,12 +1118,12 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
WRITERGB16
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
#define REAL_YSCALEYUV2PACKED(index, c) \
#define REAL_YSCALEYUV2PACKED(index, c
, uv_off
) \
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
"movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1 \n\t"\
"psraw $3, %%mm0 \n\t"\
...
...
@@ -1096,8 +1135,10 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"add "#uv_off", "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"sub "#uv_off", "#index" \n\t" \
"psubw %%mm3, %%mm2 \n\t"
/* uvbuf0[eax] - uvbuf1[eax]*/
\
"psubw %%mm4, %%mm5 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/
\
"movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0 \n\t"\
...
...
@@ -1120,34 +1161,39 @@ static inline void RENAME(yuv2rgb565_2)(SwsContext *c, const uint16_t *buf0,
"paddw %%mm0, %%mm1 \n\t"
/* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/
\
"paddw %%mm6, %%mm7 \n\t"
/* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/
\
#define YSCALEYUV2PACKED(index, c
) REAL_YSCALEYUV2PACKED(index, c
)
#define YSCALEYUV2PACKED(index, c
, uv_off) REAL_YSCALEYUV2PACKED(index, c, uv_off
)
static
inline
void
RENAME
(
yuv2yuyv422_2
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
buf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
const
uint16_t
*
abuf1
,
uint8_t
*
dest
,
int
dstW
,
int
yalpha
,
int
uvalpha
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
//Note 8280 == DSTW_OFFSET but the preprocessor can't handle that there :(
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED
(
%%
REGBP
,
%
5
)
YSCALEYUV2PACKED
(
%%
REGBP
,
%
5
,
%
6
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
#define REAL_YSCALEYUV2RGB1(index, c) \
#define REAL_YSCALEYUV2RGB1(index, c
, uv_off
) \
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t"
/* uvbuf0[eax]*/
\
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t"
/* uvbuf0[eax+2048]*/
\
"add "#uv_off", "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t"
/* uvbuf0[eax+2048]*/
\
"sub "#uv_off", "#index" \n\t" \
"psraw $4, %%mm3 \n\t"
/* uvbuf0[eax] - uvbuf1[eax] >>4*/
\
"psraw $4, %%mm4 \n\t"
/* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/
\
"psubw "U_OFFSET"("#c"), %%mm3 \n\t"
/* (U-128)8*/
\
...
...
@@ -1189,17 +1235,19 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
"packuswb %%mm6, %%mm5 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\
#define YSCALEYUV2RGB1(index, c
) REAL_YSCALEYUV2RGB1(index, c
)
#define YSCALEYUV2RGB1(index, c
, uv_off) REAL_YSCALEYUV2RGB1(index, c, uv_off
)
// do vertical chrominance interpolation
#define REAL_YSCALEYUV2RGB1b(index, c) \
#define REAL_YSCALEYUV2RGB1b(index, c
, uv_off
) \
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"add "#uv_off", "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"sub "#uv_off", "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t"
/* uvbuf0[eax] + uvbuf1[eax]*/
\
"paddw %%mm5, %%mm4 \n\t"
/* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/
\
"psrlw $5, %%mm3 \n\t"
/*FIXME might overflow*/
\
...
...
@@ -1243,7 +1291,7 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
"packuswb %%mm6, %%mm5 \n\t"\
"packuswb %%mm3, %%mm4 \n\t"\
#define YSCALEYUV2RGB1b(index, c
) REAL_YSCALEYUV2RGB1b(index, c
)
#define YSCALEYUV2RGB1b(index, c
, uv_off) REAL_YSCALEYUV2RGB1b(index, c, uv_off
)
#define REAL_YSCALEYUV2RGB1_ALPHA(index) \
"movq (%1, "#index", 2), %%mm7 \n\t"
/* abuf0[index ] */
\
...
...
@@ -1257,11 +1305,13 @@ static inline void RENAME(yuv2yuyv422_2)(SwsContext *c, const uint16_t *buf0,
* YV12 to RGB without scaling or interpolating
*/
static
inline
void
RENAME
(
yuv2rgb32_1
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
uint8_t
*
dest
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
...
@@ -1270,26 +1320,26 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
}
else
{
...
...
@@ -1298,37 +1348,39 @@ static inline void RENAME(yuv2rgb32_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
YSCALEYUV2RGB1_ALPHA
(
%%
REGBP
)
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
abuf0
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
"pcmpeqd %%mm7, %%mm7
\n\t
"
WRITEBGR32
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
,
%%
mm2
,
%%
mm4
,
%%
mm5
,
%%
mm7
,
%%
mm0
,
%%
mm1
,
%%
mm3
,
%%
mm6
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
}
}
static
inline
void
RENAME
(
yuv2bgr24_1
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
uint8_t
*
dest
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
...
@@ -1336,36 +1388,38 @@ static inline void RENAME(yuv2bgr24_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
WRITEBGR24
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
}
static
inline
void
RENAME
(
yuv2rgb555_1
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
uint8_t
*
dest
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
...
@@ -1373,7 +1427,7 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1384,15 +1438,15 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
WRITERGB15
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1403,18 +1457,20 @@ static inline void RENAME(yuv2rgb555_1)(SwsContext *c, const uint16_t *buf0,
WRITERGB15
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
}
static
inline
void
RENAME
(
yuv2rgb565_1
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
uint8_t
*
dest
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
...
@@ -1422,7 +1478,7 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1433,15 +1489,15 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
WRITERGB16
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2RGB1b
(
%%
REGBP
,
%
5
,
%
6
)
"pxor %%mm7, %%mm7
\n\t
"
/* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */
#ifdef DITHER1XBPP
...
...
@@ -1452,18 +1508,20 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
WRITERGB16
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
}
#define REAL_YSCALEYUV2PACKED1(index, c) \
#define REAL_YSCALEYUV2PACKED1(index, c
, uv_off
) \
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm3 \n\t"
/* uvbuf0[eax]*/
\
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm4 \n\t"
/* uvbuf0[eax+2048]*/
\
"add "#uv_off", "#index" \n\t" \
"movq (%2, "#index"), %%mm4 \n\t"
/* uvbuf0[eax+2048]*/
\
"sub "#uv_off", "#index" \n\t" \
"psraw $7, %%mm3 \n\t" \
"psraw $7, %%mm4 \n\t" \
"movq (%0, "#index", 2), %%mm1 \n\t"
/*buf0[eax]*/
\
...
...
@@ -1471,16 +1529,18 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"psraw $7, %%mm1 \n\t" \
"psraw $7, %%mm7 \n\t" \
#define YSCALEYUV2PACKED1(index, c
) REAL_YSCALEYUV2PACKED1(index, c
)
#define YSCALEYUV2PACKED1(index, c
, uv_off) REAL_YSCALEYUV2PACKED1(index, c, uv_off
)
#define REAL_YSCALEYUV2PACKED1b(index, c) \
#define REAL_YSCALEYUV2PACKED1b(index, c
, uv_off
) \
"xor "#index", "#index" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
"movq (%2, "#index"), %%mm2 \n\t"
/* uvbuf0[eax]*/
\
"movq (%3, "#index"), %%mm3 \n\t"
/* uvbuf1[eax]*/
\
"movq "AV_STRINGIFY(VOF)"(%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq "AV_STRINGIFY(VOF)"(%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"add "#uv_off", "#index" \n\t" \
"movq (%2, "#index"), %%mm5 \n\t"
/* uvbuf0[eax+2048]*/
\
"movq (%3, "#index"), %%mm4 \n\t"
/* uvbuf1[eax+2048]*/
\
"sub "#uv_off", "#index" \n\t" \
"paddw %%mm2, %%mm3 \n\t"
/* uvbuf0[eax] + uvbuf1[eax]*/
\
"paddw %%mm5, %%mm4 \n\t"
/* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/
\
"psrlw $8, %%mm3 \n\t" \
...
...
@@ -1489,14 +1549,16 @@ static inline void RENAME(yuv2rgb565_1)(SwsContext *c, const uint16_t *buf0,
"movq 8(%0, "#index", 2), %%mm7 \n\t"
/*buf0[eax]*/
\
"psraw $7, %%mm1 \n\t" \
"psraw $7, %%mm7 \n\t"
#define YSCALEYUV2PACKED1b(index, c
) REAL_YSCALEYUV2PACKED1b(index, c
)
#define YSCALEYUV2PACKED1b(index, c
, uv_off) REAL_YSCALEYUV2PACKED1b(index, c, uv_off
)
static
inline
void
RENAME
(
yuv2yuyv422_1
)(
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
uvbuf0
,
const
uint16_t
*
uvbuf1
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
const
uint16_t
*
vbuf0
,
const
uint16_t
*
vbuf1
,
const
uint16_t
*
abuf0
,
uint8_t
*
dest
,
int
dstW
,
int
uvalpha
,
enum
PixelFormat
dstFormat
,
int
flags
,
int
y
)
{
x86_reg
uv_off
=
c
->
uv_off
<<
1
;
const
uint16_t
*
buf1
=
buf0
;
//FIXME needed for RGB1/BGR1
if
(
uvalpha
<
2048
)
{
// note this is not correct (shifts chrominance by 0.5 pixels) but it is a bit faster
...
...
@@ -1504,24 +1566,24 @@ static inline void RENAME(yuv2yuyv422_1)(SwsContext *c, const uint16_t *buf0,
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED1
(
%%
REGBP
,
%
5
)
YSCALEYUV2PACKED1
(
%%
REGBP
,
%
5
,
%
6
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
else
{
__asm__
volatile
(
"mov %%"
REG_b
", "
ESP_OFFSET
"(%5)
\n\t
"
"mov %4, %%"
REG_b
"
\n\t
"
"push %%"
REG_BP
"
\n\t
"
YSCALEYUV2PACKED1b
(
%%
REGBP
,
%
5
)
YSCALEYUV2PACKED1b
(
%%
REGBP
,
%
5
,
%
6
)
WRITEYUY2
(
%%
REGb
,
8280
(
%
5
),
%%
REGBP
)
"pop %%"
REG_BP
"
\n\t
"
"mov "
ESP_OFFSET
"(%5), %%"
REG_b
"
\n\t
"
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
vbuf0
),
"D"
(
uv
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
::
"c"
(
buf0
),
"d"
(
buf1
),
"S"
(
u
buf0
),
"D"
(
u
buf1
),
"m"
(
dest
),
"a"
(
&
c
->
redDither
)
,
"m"
(
uv_off
)
);
}
}
...
...
@@ -2074,7 +2136,7 @@ static inline void RENAME(hyscale_fast)(SwsContext *c, int16_t *dst,
dst
[
i
]
=
src
[
srcW
-
1
]
*
128
;
}
static
inline
void
RENAME
(
hcscale_fast
)(
SwsContext
*
c
,
int16_t
*
dst
,
static
inline
void
RENAME
(
hcscale_fast
)(
SwsContext
*
c
,
int16_t
*
dst
1
,
int16_t
*
dst2
,
long
dstWidth
,
const
uint8_t
*
src1
,
const
uint8_t
*
src2
,
int
srcW
,
int
xInc
)
{
...
...
@@ -2089,7 +2151,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
__asm__
volatile
(
#if defined(PIC)
"mov %%"
REG_b
", %
6
\n\t
"
"mov %%"
REG_b
", %
7
\n\t
"
#endif
"pxor %%mm7, %%mm7
\n\t
"
"mov %0, %%"
REG_c
"
\n\t
"
...
...
@@ -2107,8 +2169,7 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
CALL_MMX2_FILTER_CODE
"xor %%"
REG_a
", %%"
REG_a
"
\n\t
"
// i
"mov %5, %%"
REG_c
"
\n\t
"
// src
"mov %1, %%"
REG_D
"
\n\t
"
// buf1
"add $"
AV_STRINGIFY
(
VOF
)
", %%"
REG_D
"
\n\t
"
"mov %6, %%"
REG_D
"
\n\t
"
// buf2
PREFETCH
" (%%"
REG_c
")
\n\t
"
PREFETCH
" 32(%%"
REG_c
")
\n\t
"
PREFETCH
" 64(%%"
REG_c
")
\n\t
"
...
...
@@ -2119,10 +2180,10 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
CALL_MMX2_FILTER_CODE
#if defined(PIC)
"mov %
6
, %%"
REG_b
"
\n\t
"
"mov %
7
, %%"
REG_b
"
\n\t
"
#endif
::
"m"
(
src1
),
"m"
(
dst
),
"m"
(
filter
),
"m"
(
filterPos
),
"m"
(
mmx2FilterCode
),
"m"
(
src2
)
::
"m"
(
src1
),
"m"
(
dst
1
),
"m"
(
filter
),
"m"
(
filterPos
),
"m"
(
mmx2FilterCode
),
"m"
(
src2
)
,
"m"
(
dst2
)
#if defined(PIC)
,
"m"
(
ebxsave
)
#endif
...
...
@@ -2133,8 +2194,8 @@ static inline void RENAME(hcscale_fast)(SwsContext *c, int16_t *dst,
);
for
(
i
=
dstWidth
-
1
;
(
i
*
xInc
)
>>
16
>=
srcW
-
1
;
i
--
)
{
dst
[
i
]
=
src1
[
srcW
-
1
]
*
128
;
dst
[
i
+
VOFW
]
=
src2
[
srcW
-
1
]
*
128
;
dst
1
[
i
]
=
src1
[
srcW
-
1
]
*
128
;
dst
2
[
i
]
=
src2
[
srcW
-
1
]
*
128
;
}
}
#endif
/* COMPILE_TEMPLATE_MMX2 */
...
...
@@ -2146,7 +2207,8 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
const
int
dstH
=
c
->
dstH
;
const
int
flags
=
c
->
flags
;
int16_t
**
lumPixBuf
=
c
->
lumPixBuf
;
int16_t
**
chrPixBuf
=
c
->
chrPixBuf
;
int16_t
**
chrUPixBuf
=
c
->
chrUPixBuf
;
int16_t
**
chrVPixBuf
=
c
->
chrVPixBuf
;
int16_t
**
alpPixBuf
=
c
->
alpPixBuf
;
const
int
vLumBufSize
=
c
->
vLumBufSize
;
const
int
vChrBufSize
=
c
->
vChrBufSize
;
...
...
@@ -2171,7 +2233,8 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
c
->
redDither
=
ff_dither8
[(
dstY
+
1
)
&
1
];
if
(
dstY
<
dstH
-
2
)
{
const
int16_t
**
lumSrcPtr
=
(
const
int16_t
**
)
lumPixBuf
+
lumBufIndex
+
firstLumSrcY
-
lastInLumBuf
+
vLumBufSize
;
const
int16_t
**
chrSrcPtr
=
(
const
int16_t
**
)
chrPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
chrUSrcPtr
=
(
const
int16_t
**
)
chrUPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
chrVSrcPtr
=
(
const
int16_t
**
)
chrVPixBuf
+
chrBufIndex
+
firstChrSrcY
-
lastInChrBuf
+
vChrBufSize
;
const
int16_t
**
alpSrcPtr
=
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
?
(
const
int16_t
**
)
alpPixBuf
+
lumBufIndex
+
firstLumSrcY
-
lastInLumBuf
+
vLumBufSize
:
NULL
;
int
i
;
if
(
flags
&
SWS_ACCURATE_RND
)
{
...
...
@@ -2190,29 +2253,26 @@ static void updateMMXDitherTables(SwsContext *c, int dstY, int lumBufIndex, int
}
}
for
(
i
=
0
;
i
<
vChrFilterSize
;
i
+=
2
)
{
*
(
const
void
**
)
&
chrMmxFilter
[
s
*
i
]
=
chrSrcPtr
[
i
];
*
(
const
void
**
)
&
chrMmxFilter
[
s
*
i
+
APCK_PTR2
/
4
]
=
chrSrcPtr
[
i
+
(
vChrFilterSize
>
1
)];
*
(
const
void
**
)
&
chrMmxFilter
[
s
*
i
]
=
chr
U
SrcPtr
[
i
];
*
(
const
void
**
)
&
chrMmxFilter
[
s
*
i
+
APCK_PTR2
/
4
]
=
chr
U
SrcPtr
[
i
+
(
vChrFilterSize
>
1
)];
chrMmxFilter
[
s
*
i
+
APCK_COEF
/
4
]
=
chrMmxFilter
[
s
*
i
+
APCK_COEF
/
4
+
1
]
=
vChrFilter
[
chrDstY
*
vChrFilterSize
+
i
]
+
(
vChrFilterSize
>
1
?
vChrFilter
[
chrDstY
*
vChrFilterSize
+
i
+
1
]
<<
16
:
0
);
}
}
else
{
for
(
i
=
0
;
i
<
vLumFilterSize
;
i
++
)
{
lumMmxFilter
[
4
*
i
+
0
]
=
(
int32_t
)
lumSrcPtr
[
i
];
lumMmxFilter
[
4
*
i
+
1
]
=
(
uint64_t
)
lumSrcPtr
[
i
]
>>
32
;
*
(
const
void
**
)
&
lumMmxFilter
[
4
*
i
+
0
]
=
lumSrcPtr
[
i
];
lumMmxFilter
[
4
*
i
+
2
]
=
lumMmxFilter
[
4
*
i
+
3
]
=
((
uint16_t
)
vLumFilter
[
dstY
*
vLumFilterSize
+
i
])
*
0x10001
;
if
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
{
alpMmxFilter
[
4
*
i
+
0
]
=
(
int32_t
)
alpSrcPtr
[
i
];
alpMmxFilter
[
4
*
i
+
1
]
=
(
uint64_t
)
alpSrcPtr
[
i
]
>>
32
;
*
(
const
void
**
)
&
alpMmxFilter
[
4
*
i
+
0
]
=
alpSrcPtr
[
i
];
alpMmxFilter
[
4
*
i
+
2
]
=
alpMmxFilter
[
4
*
i
+
3
]
=
lumMmxFilter
[
4
*
i
+
2
];
}
}
for
(
i
=
0
;
i
<
vChrFilterSize
;
i
++
)
{
chrMmxFilter
[
4
*
i
+
0
]
=
(
int32_t
)
chrSrcPtr
[
i
];
chrMmxFilter
[
4
*
i
+
1
]
=
(
uint64_t
)
chrSrcPtr
[
i
]
>>
32
;
*
(
const
void
**
)
&
chrMmxFilter
[
4
*
i
+
0
]
=
chrUSrcPtr
[
i
];
chrMmxFilter
[
4
*
i
+
2
]
=
chrMmxFilter
[
4
*
i
+
3
]
=
((
uint16_t
)
vChrFilter
[
chrDstY
*
vChrFilterSize
+
i
])
*
0x10001
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment