Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
6713989c
Commit
6713989c
authored
Jun 02, 2011
by
Michael Niedermayer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale: dither for planar yuv outputs
Signed-off-by:
Michael Niedermayer
<
michaelni@gmx.at
>
parent
877f76ad
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
77 additions
and
54 deletions
+77
-54
swscale.c
libswscale/swscale.c
+15
-11
swscale_internal.h
libswscale/swscale_internal.h
+7
-3
swscale_template.c
libswscale/swscale_template.c
+17
-14
swscale_template.c
libswscale/x86/swscale_template.c
+38
-26
No files found.
libswscale/swscale.c
View file @
6713989c
...
...
@@ -282,6 +282,8 @@ DECLARE_ALIGNED(8, const uint8_t, dithers)[8][8][8]={
{
112
,
16
,
104
,
8
,
118
,
22
,
110
,
14
,},
}};
static
const
uint8_t
flat64
[
8
]
=
{
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
};
uint16_t
dither_scale
[
15
][
16
]
=
{
{
2
,
3
,
3
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,
5
,},
{
2
,
3
,
7
,
7
,
13
,
13
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,
25
,},
...
...
@@ -417,12 +419,13 @@ static inline void yuv2yuvX16inC(const int16_t *lumFilter, const int16_t **lumSr
static
inline
void
yuv2yuvXinC
(
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
)
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
{
//FIXME Optimize (just quickly written not optimized..)
int
i
;
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
1
<<
18
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
lumSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -432,8 +435,8 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
if
(
uDest
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
1
<<
18
;
int
v
=
1
<<
18
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
@@ -446,7 +449,7 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
1
<<
18
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
alpSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -459,12 +462,13 @@ static inline void yuv2yuvXinC(const int16_t *lumFilter, const int16_t **lumSrc,
static
inline
void
yuv2nv12XinC
(
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
int
dstW
,
int
chrDstW
,
int
dstFormat
)
uint8_t
*
dest
,
uint8_t
*
uDest
,
int
dstW
,
int
chrDstW
,
int
dstFormat
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
{
//FIXME Optimize (just quickly written not optimized..)
int
i
;
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
1
<<
18
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
lumSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -477,8 +481,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
if
(
dstFormat
==
PIX_FMT_NV12
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
1
<<
18
;
int
v
=
1
<<
18
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
@@ -490,8 +494,8 @@ static inline void yuv2nv12XinC(const int16_t *lumFilter, const int16_t **lumSrc
}
else
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
1
<<
18
;
int
v
=
1
<<
18
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
libswscale/swscale_internal.h
View file @
6713989c
...
...
@@ -195,6 +195,8 @@ typedef struct SwsContext {
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define UV_OFF "11*8+4*4*256*3+48"
#define UV_OFFx2 "11*8+4*4*256*3+56"
#define DITHER16 "11*8+4*4*256*3+64"
#define DITHER32 "11*8+4*4*256*3+64+16"
DECLARE_ALIGNED
(
8
,
uint64_t
,
redDither
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
greenDither
);
...
...
@@ -219,6 +221,8 @@ typedef struct SwsContext {
int32_t
alpMmxFilter
[
4
*
MAX_FILTER_SIZE
];
DECLARE_ALIGNED
(
8
,
ptrdiff_t
,
uv_off
);
///< offset (in pixels) between u and v planes
DECLARE_ALIGNED
(
8
,
ptrdiff_t
,
uv_offx2
);
///< offset (in bytes) between u and v planes
uint16_t
dither16
[
8
];
uint32_t
dither32
[
8
];
#if HAVE_ALTIVEC
vector
signed
short
CY
;
...
...
@@ -255,13 +259,13 @@ typedef struct SwsContext {
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
int
dstW
,
int
chrDstW
,
int
dstFormat
);
int
dstW
,
int
chrDstW
,
int
dstFormat
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
);
void
(
*
yuv2yuv1
)(
struct
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
);
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
);
void
(
*
yuv2yuvX
)(
struct
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
...
...
@@ -269,7 +273,7 @@ typedef struct SwsContext {
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
);
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
);
void
(
*
yuv2packed1
)(
struct
SwsContext
*
c
,
const
uint16_t
*
buf0
,
const
uint16_t
*
ubuf0
,
const
uint16_t
*
ubuf1
,
...
...
libswscale/swscale_template.c
View file @
6713989c
...
...
@@ -24,11 +24,11 @@ static inline void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
)
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
{
yuv2yuvXinC
(
lumFilter
,
lumSrc
,
lumFilterSize
,
chrFilter
,
chrUSrc
,
chrVSrc
,
chrFilterSize
,
alpSrc
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
alpSrc
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
,
lumDither
,
chrDither
);
}
static
inline
void
yuv2nv12X_c
(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
...
...
@@ -36,36 +36,37 @@ static inline void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
int
dstW
,
int
chrDstW
,
enum
PixelFormat
dstFormat
)
int
dstW
,
int
chrDstW
,
enum
PixelFormat
dstFormat
,
const
uint8_t
*
dither
,
const
uint8_t
*
chrDither
)
{
yuv2nv12XinC
(
lumFilter
,
lumSrc
,
lumFilterSize
,
chrFilter
,
chrUSrc
,
chrVSrc
,
chrFilterSize
,
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
);
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
,
dither
,
chrDither
);
}
static
inline
void
yuv2yuv1_c
(
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
)
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
{
int
i
;
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
(
lumSrc
[
i
]
+
64
)
>>
7
;
int
val
=
(
lumSrc
[
i
]
+
lumDither
[
i
&
7
]
)
>>
7
;
dest
[
i
]
=
av_clip_uint8
(
val
);
}
if
(
uDest
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
(
chrUSrc
[
i
]
+
64
)
>>
7
;
int
v
=
(
chrVSrc
[
i
]
+
64
)
>>
7
;
int
u
=
(
chrUSrc
[
i
]
+
chrDither
[
i
&
7
]
)
>>
7
;
int
v
=
(
chrVSrc
[
i
]
+
chrDither
[(
i
+
3
)
&
7
]
)
>>
7
;
uDest
[
i
]
=
av_clip_uint8
(
u
);
vDest
[
i
]
=
av_clip_uint8
(
v
);
}
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
(
alpSrc
[
i
]
+
64
)
>>
7
;
int
val
=
(
alpSrc
[
i
]
+
lumDither
[
i
&
7
]
)
>>
7
;
aDest
[
i
]
=
av_clip_uint8
(
val
);
}
}
...
...
@@ -609,6 +610,8 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
unsigned
char
*
uDest
=
dst
[
1
]
+
dstStride
[
1
]
*
chrDstY
;
unsigned
char
*
vDest
=
dst
[
2
]
+
dstStride
[
2
]
*
chrDstY
;
unsigned
char
*
aDest
=
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
?
dst
[
3
]
+
dstStride
[
3
]
*
dstY
:
NULL
;
const
uint8_t
*
lumDither
=
isNBPS
(
c
->
srcFormat
)
||
is16BPS
(
c
->
srcFormat
)
?
dithers
[
7
][
dstY
&
7
]
:
flat64
;
const
uint8_t
*
chrDither
=
isNBPS
(
c
->
srcFormat
)
||
is16BPS
(
c
->
srcFormat
)
?
dithers
[
7
][
chrDstY
&
7
]
:
flat64
;
const
int
firstLumSrcY
=
vLumFilterPos
[
dstY
];
//First line needed as input
const
int
firstLumSrcY2
=
vLumFilterPos
[
FFMIN
(
dstY
|
((
1
<<
c
->
chrDstVSubSample
)
-
1
),
dstH
-
1
)];
...
...
@@ -699,7 +702,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
c
->
yuv2nv12X
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrUSrcPtr
,
chrVSrcPtr
,
vChrFilterSize
,
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
);
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
,
lumDither
,
chrDither
);
}
else
if
(
isPlanarYUV
(
dstFormat
)
||
dstFormat
==
PIX_FMT_GRAY8
)
{
//YV12 like
const
int
chrSkipMask
=
(
1
<<
c
->
chrDstVSubSample
)
-
1
;
if
((
dstY
&
chrSkipMask
)
||
isGray
(
dstFormat
))
uDest
=
vDest
=
NULL
;
//FIXME split functions in lumi / chromi
...
...
@@ -716,13 +719,13 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
const
int16_t
*
chrVBuf
=
chrVSrcPtr
[
0
];
const
int16_t
*
alpBuf
=
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
?
alpSrcPtr
[
0
]
:
NULL
;
c
->
yuv2yuv1
(
c
,
lumBuf
,
chrUBuf
,
chrVBuf
,
alpBuf
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
,
lumDither
,
chrDither
);
}
else
{
//General YV12
c
->
yuv2yuvX
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrUSrcPtr
,
chrVSrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
alpSrcPtr
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
,
lumDither
,
chrDither
);
}
}
else
{
assert
(
lumSrcPtr
+
vLumFilterSize
-
1
<
lumPixBuf
+
vLumBufSize
*
2
);
...
...
@@ -784,7 +787,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
yuv2nv12XinC
(
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrUSrcPtr
,
chrVSrcPtr
,
vChrFilterSize
,
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
);
dest
,
uDest
,
dstW
,
chrDstW
,
dstFormat
,
lumDither
,
chrDither
);
}
else
if
(
isPlanarYUV
(
dstFormat
)
||
dstFormat
==
PIX_FMT_GRAY8
)
{
//YV12
const
int
chrSkipMask
=
(
1
<<
c
->
chrDstVSubSample
)
-
1
;
if
((
dstY
&
chrSkipMask
)
||
isGray
(
dstFormat
))
uDest
=
vDest
=
NULL
;
//FIXME split functions in lumi / chromi
...
...
@@ -798,7 +801,7 @@ static int swScale_c(SwsContext *c, const uint8_t* src[], int srcStride[],
yuv2yuvXinC
(
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrUSrcPtr
,
chrVSrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
);
alpSrcPtr
,
dest
,
uDest
,
vDest
,
aDest
,
dstW
,
chrDstW
,
lumDither
,
chrDither
);
}
}
else
{
assert
(
lumSrcPtr
+
vLumFilterSize
-
1
<
lumPixBuf
+
vLumBufSize
*
2
);
...
...
libswscale/x86/swscale_template.c
View file @
6713989c
...
...
@@ -39,8 +39,8 @@
#define YSCALEYUV2YV12X(offset, dest, end, pos) \
__asm__ volatile(\
"movq
"VROUNDER_OFFSET"
(%0), %%mm3 \n\t"\
"movq
%%mm3
, %%mm4 \n\t"\
"movq
"DITHER16"+0
(%0), %%mm3 \n\t"\
"movq
"DITHER16"+8(%0)
, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
".p2align 4 \n\t"
/* FIXME Unroll? */
\
...
...
@@ -62,8 +62,8 @@
MOVNTQ(%%mm3, (%1, %3))\
"add $8, %3 \n\t"\
"cmp %2, %3 \n\t"\
"movq
"VROUNDER_OFFSET"
(%0), %%mm3 \n\t"\
"movq
%%mm3
, %%mm4 \n\t"\
"movq
"DITHER16"+0
(%0), %%mm3 \n\t"\
"movq
"DITHER16"+8(%0)
, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
...
...
@@ -78,13 +78,18 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
)
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
{
int
i
;
if
(
uDest
)
{
x86_reg
uv_off
=
c
->
uv_off
;
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
chrDither
[
i
]
>>
4
;
YSCALEYUV2YV12X
(
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
,
0
)
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
chrDither
[(
i
+
3
)
&
7
]
>>
4
;
YSCALEYUV2YV12X
(
CHR_MMX_FILTER_OFFSET
,
vDest
-
uv_off
,
chrDstW
+
uv_off
,
uv_off
)
}
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
lumDither
[
i
]
>>
4
;
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
{
YSCALEYUV2YV12X
(
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
,
0
)
}
...
...
@@ -95,6 +100,10 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
__asm__ volatile(\
"lea " offset "(%0), %%"REG_d" \n\t"\
"movq "DITHER32"+0(%0), %%mm4 \n\t"\
"movq "DITHER32"+8(%0), %%mm5 \n\t"\
"movq "DITHER32"+16(%0), %%mm6 \n\t"\
"movq "DITHER32"+24(%0), %%mm7 \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
...
...
@@ -126,26 +135,21 @@ static inline void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
"paddd %%mm2, %%mm6 \n\t"\
"paddd %%mm0, %%mm7 \n\t"\
" jnz 1b \n\t"\
"psrad $16, %%mm4 \n\t"\
"psrad $16, %%mm5 \n\t"\
"psrad $16, %%mm6 \n\t"\
"psrad $16, %%mm7 \n\t"\
"movq "VROUNDER_OFFSET"(%0), %%mm0 \n\t"\
"psrad $19, %%mm4 \n\t"\
"psrad $19, %%mm5 \n\t"\
"psrad $19, %%mm6 \n\t"\
"psrad $19, %%mm7 \n\t"\
"packssdw %%mm5, %%mm4 \n\t"\
"packssdw %%mm7, %%mm6 \n\t"\
"paddw %%mm0, %%mm4 \n\t"\
"paddw %%mm0, %%mm6 \n\t"\
"psraw $3, %%mm4 \n\t"\
"psraw $3, %%mm6 \n\t"\
"packuswb %%mm6, %%mm4 \n\t"\
MOVNTQ(%%mm4, (%1, %3))\
"add $8, %3 \n\t"\
"cmp %2, %3 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"
pxor %%mm4
, %%mm4 \n\t"\
"
pxor %%mm5
, %%mm5 \n\t"\
"
pxor %%mm6
, %%mm6 \n\t"\
"
pxor %%mm7
, %%mm7 \n\t"\
"
movq "DITHER32"+0(%0)
, %%mm4 \n\t"\
"
movq "DITHER32"+8(%0)
, %%mm5 \n\t"\
"
movq "DITHER32"+16(%0)
, %%mm6 \n\t"\
"
movq "DITHER32"+24(%0)
, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
...
...
@@ -159,13 +163,18 @@ static inline void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
)
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
{
int
i
;
if
(
uDest
)
{
x86_reg
uv_off
=
c
->
uv_off
;
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither32
[
i
]
=
chrDither
[
i
]
<<
12
;
YSCALEYUV2YV12X_ACCURATE
(
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
,
0
)
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither32
[
i
]
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
YSCALEYUV2YV12X_ACCURATE
(
CHR_MMX_FILTER_OFFSET
,
vDest
-
uv_off
,
chrDstW
+
uv_off
,
uv_off
)
}
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither32
[
i
]
=
lumDither
[
i
]
<<
12
;
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
{
YSCALEYUV2YV12X_ACCURATE
(
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
,
0
)
}
...
...
@@ -190,7 +199,8 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
)
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
{
int
p
=
4
;
const
int16_t
*
src
[
4
]
=
{
alpSrc
+
dstW
,
lumSrc
+
dstW
,
chrUSrc
+
chrDstW
,
chrVSrc
+
chrDstW
};
...
...
@@ -211,14 +221,13 @@ static inline void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
#define YSCALEYUV2YV121_ACCURATE \
"mov %2, %%"REG_a" \n\t"\
"pcmpeqw %%mm7, %%mm7 \n\t"\
"psrlw $15, %%mm7 \n\t"\
"psllw $6, %%mm7 \n\t"\
"movq 0(%3), %%mm6 \n\t"\
"movq 8(%3), %%mm7 \n\t"\
".p2align 4 \n\t"
/* FIXME Unroll? */
\
"1: \n\t"\
"movq (%0, %%"REG_a", 2), %%mm0 \n\t"\
"movq 8(%0, %%"REG_a", 2), %%mm1 \n\t"\
"paddsw %%mm
7
, %%mm0 \n\t"\
"paddsw %%mm
6
, %%mm0 \n\t"\
"paddsw %%mm7, %%mm1 \n\t"\
"psraw $7, %%mm0 \n\t"\
"psraw $7, %%mm1 \n\t"\
...
...
@@ -231,7 +240,8 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
,
uint8_t
*
uDest
,
uint8_t
*
vDest
,
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
)
uint8_t
*
aDest
,
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
{
int
p
=
4
;
const
int16_t
*
src
[
4
]
=
{
alpSrc
+
dstW
,
lumSrc
+
dstW
,
chrUSrc
+
chrDstW
,
chrVSrc
+
chrDstW
};
...
...
@@ -240,10 +250,12 @@ static inline void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
while
(
p
--
)
{
if
(
dst
[
p
])
{
int
i
;
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
i
<
2
?
lumDither
[
i
]
:
chrDither
[
i
];
__asm__
volatile
(
YSCALEYUV2YV121_ACCURATE
::
"r"
(
src
[
p
]),
"r"
(
dst
[
p
]
+
counter
[
p
]),
"g"
(
-
counter
[
p
])
"g"
(
-
counter
[
p
])
,
"r"
(
c
->
dither16
)
:
"%"
REG_a
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment