Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
c59f9a68
Commit
c59f9a68
authored
Jul 05, 2011
by
Ronald S. Bultje
Committed by
Michael Niedermayer
Jul 11, 2011
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale: error dithering for 16/9/10-bit to 8-bit.
Based on a somewhat similar idea in FFmpeg's swscale copy.
parent
93a10dd5
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
160 additions
and
49 deletions
+160
-49
swscale.c
libswscale/swscale.c
+41
-23
swscale_internal.h
libswscale/swscale_internal.h
+5
-4
swscale_template.c
libswscale/x86/swscale_template.c
+114
-22
No files found.
libswscale/swscale.c
View file @
c59f9a68
...
...
@@ -193,6 +193,18 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
{
77
,
23
,
60
,
15
,
72
,
21
,
56
,
14
,
},
};
#endif
DECLARE_ALIGNED
(
8
,
const
uint8_t
,
dither_8x8_128
)[
8
][
8
]
=
{
{
36
,
68
,
60
,
92
,
34
,
66
,
58
,
90
,},
{
100
,
4
,
124
,
28
,
98
,
2
,
122
,
26
,},
{
52
,
84
,
44
,
76
,
50
,
82
,
42
,
74
,},
{
116
,
20
,
108
,
12
,
114
,
18
,
106
,
10
,},
{
32
,
64
,
56
,
88
,
38
,
70
,
62
,
94
,},
{
96
,
0
,
120
,
24
,
102
,
6
,
126
,
30
,},
{
48
,
80
,
40
,
72
,
54
,
86
,
46
,
78
,},
{
112
,
16
,
104
,
8
,
118
,
22
,
110
,
14
,},
};
DECLARE_ALIGNED
(
8
,
const
uint8_t
,
ff_sws_pb_64
)[
8
]
=
{
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
};
DECLARE_ALIGNED
(
8
,
const
uint8_t
,
dithers
)[
8
][
8
][
8
]
=
{
{
...
...
@@ -387,16 +399,16 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
)
{
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
],
*
vDest
=
dest
[
2
],
*
aDest
=
CONFIG_SWSCALE_ALPHA
?
dest
[
3
]
:
NULL
;
int
i
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
//FIXME Optimize (just quickly written not optimized..)
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
lumSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -406,8 +418,8 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
if
(
uDest
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
@@ -420,7 +432,7 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
alpSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -432,29 +444,29 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
static
void
yuv2yuv1_c
(
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
)
{
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
],
*
vDest
=
dest
[
2
],
*
aDest
=
CONFIG_SWSCALE_ALPHA
?
dest
[
3
]
:
NULL
;
int
i
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
(
lumSrc
[
i
]
+
lumDither
[
i
&
7
])
>>
7
;
int
val
=
(
lumSrc
[
i
]
+
lumDither
[
i
&
7
])
>>
7
;
yDest
[
i
]
=
av_clip_uint8
(
val
);
}
if
(
uDest
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
(
chrUSrc
[
i
]
+
chrDither
[
i
&
7
])
>>
7
;
int
v
=
(
chrVSrc
[
i
]
+
chrDither
[(
i
+
3
)
&
7
])
>>
7
;
int
u
=
(
chrUSrc
[
i
]
+
chrDither
[
i
&
7
])
>>
7
;
int
v
=
(
chrVSrc
[
i
]
+
chrDither
[(
i
+
3
)
&
7
])
>>
7
;
uDest
[
i
]
=
av_clip_uint8
(
u
);
vDest
[
i
]
=
av_clip_uint8
(
v
);
}
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
(
alpSrc
[
i
]
+
lumDither
[
i
&
7
])
>>
7
;
int
val
=
(
alpSrc
[
i
]
+
lumDither
[
i
&
7
])
>>
7
;
aDest
[
i
]
=
av_clip_uint8
(
val
);
}
}
...
...
@@ -464,16 +476,16 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
int
dstW
,
int
chrDstW
)
{
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
];
enum
PixelFormat
dstFormat
=
c
->
dstFormat
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
//FIXME Optimize (just quickly written not optimized..)
int
i
;
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
lumSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -486,8 +498,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
if
(
dstFormat
==
PIX_FMT_NV12
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
@@ -499,8 +511,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
}
else
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
@@ -2523,6 +2535,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
const
int
chrSrcSliceH
=
-
((
-
srcSliceH
)
>>
c
->
chrSrcVSubSample
);
int
lastDstY
;
uint32_t
*
pal
=
c
->
pal_yuv
;
int
should_dither
=
isNBPS
(
c
->
srcFormat
)
||
is16BPS
(
c
->
srcFormat
);
yuv2planar1_fn
yuv2yuv1
=
c
->
yuv2yuv1
;
yuv2planarX_fn
yuv2yuvX
=
c
->
yuv2yuvX
;
...
...
@@ -2578,6 +2591,9 @@ static int swScale(SwsContext *c, const uint8_t* src[],
lastInChrBuf
=
-
1
;
}
if
(
!
should_dither
)
{
c
->
chrDither8
=
c
->
lumDither8
=
ff_sws_pb_64
;
}
lastDstY
=
dstY
;
for
(;
dstY
<
dstH
;
dstY
++
)
{
...
...
@@ -2588,8 +2604,6 @@ static int swScale(SwsContext *c, const uint8_t* src[],
dst
[
2
]
+
dstStride
[
2
]
*
chrDstY
,
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
?
dst
[
3
]
+
dstStride
[
3
]
*
dstY
:
NULL
,
};
const
uint8_t
*
lumDither
=
should_dither
?
dithers
[
7
][
dstY
&
7
]
:
flat64
;
const
uint8_t
*
chrDither
=
should_dither
?
dithers
[
7
][
chrDstY
&
7
]
:
flat64
;
const
int
firstLumSrcY
=
vLumFilterPos
[
dstY
];
//First line needed as input
const
int
firstLumSrcY2
=
vLumFilterPos
[
FFMIN
(
dstY
|
((
1
<<
c
->
chrDstVSubSample
)
-
1
),
dstH
-
1
)];
...
...
@@ -2669,6 +2683,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
#if HAVE_MMX
updateMMXDitherTables
(
c
,
dstY
,
lumBufIndex
,
chrBufIndex
,
lastInLumBuf
,
lastInChrBuf
);
#endif
if
(
should_dither
)
{
c
->
chrDither8
=
dither_8x8_128
[
chrDstY
&
7
];
c
->
lumDither8
=
dither_8x8_128
[
dstY
&
7
];
}
if
(
dstY
>=
dstH
-
2
)
{
// hmm looks like we can't use MMX here without overwriting this array's tail
find_c_packed_planar_out_funcs
(
c
,
&
yuv2yuv1
,
&
yuv2yuvX
,
...
...
@@ -2689,13 +2707,13 @@ static int swScale(SwsContext *c, const uint8_t* src[],
if
(
c
->
yuv2yuv1
&&
vLumFilterSize
==
1
&&
vChrFilterSize
==
1
)
{
// unscaled YV12
const
int16_t
*
alpBuf
=
(
CONFIG_SWSCALE_ALPHA
&&
alpPixBuf
)
?
alpSrcPtr
[
0
]
:
NULL
;
yuv2yuv1
(
c
,
lumSrcPtr
[
0
],
chrUSrcPtr
[
0
],
chrVSrcPtr
[
0
],
alpBuf
,
dest
,
dstW
,
chrDstW
,
lumDither
,
chrDither
);
dest
,
dstW
,
chrDstW
);
}
else
{
//General YV12
yuv2yuvX
(
c
,
vLumFilter
+
dstY
*
vLumFilterSize
,
lumSrcPtr
,
vLumFilterSize
,
vChrFilter
+
chrDstY
*
vChrFilterSize
,
chrUSrcPtr
,
chrVSrcPtr
,
vChrFilterSize
,
alpSrcPtr
,
dest
,
dstW
,
chrDstW
,
lumDither
,
chrDither
);
alpSrcPtr
,
dest
,
dstW
,
chrDstW
);
}
}
else
{
assert
(
lumSrcPtr
+
vLumFilterSize
-
1
<
lumPixBuf
+
vLumBufSize
*
2
);
...
...
libswscale/swscale_internal.h
View file @
c59f9a68
...
...
@@ -75,8 +75,7 @@ typedef int (*SwsFunc)(struct SwsContext *context, const uint8_t* src[],
typedef
void
(
*
yuv2planar1_fn
)
(
struct
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
);
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
);
/**
* Write one line of horizontally scaled Y/U/V/A to planar output
* with multi-point vertical scaling between input pixels.
...
...
@@ -99,7 +98,7 @@ typedef void (*yuv2planarX_fn) (struct SwsContext *c, const int16_t *lumFilter,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
);
int
dstW
,
int
chrDstW
);
/**
* Write one line of horizontally scaled Y/U/V/A to packed-pixel YUV/RGB
* output without any additional vertical scaling (or point-scaling). Note
...
...
@@ -323,7 +322,7 @@ typedef struct SwsContext {
#define UV_OFF "11*8+4*4*256*3+48"
#define UV_OFFx2 "11*8+4*4*256*3+56"
#define DITHER16 "11*8+4*4*256*3+64"
#define DITHER32 "11*8+4*4*256*3+
64+16
"
#define DITHER32 "11*8+4*4*256*3+
80
"
DECLARE_ALIGNED
(
8
,
uint64_t
,
redDither
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
greenDither
);
...
...
@@ -351,6 +350,8 @@ typedef struct SwsContext {
uint16_t
dither16
[
8
];
uint32_t
dither32
[
8
];
const
uint8_t
*
chrDither8
,
*
lumDither8
;
#if HAVE_ALTIVEC
vector
signed
short
CY
;
vector
signed
short
CRV
;
...
...
libswscale/x86/swscale_template.c
View file @
c59f9a68
...
...
@@ -70,26 +70,62 @@
: "%"REG_d, "%"REG_S\
);
#if !COMPILE_TEMPLATE_MMX2
static
av_always_inline
void
dither_8to16
(
SwsContext
*
c
,
const
uint8_t
*
srcDither
,
int
rot
)
{
if
(
rot
)
{
__asm__
volatile
(
"pxor %%mm0, %%mm0
\n\t
"
"movq (%0), %%mm3
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"psrlq $24, %%mm3
\n\t
"
"psllq $40, %%mm4
\n\t
"
"por %%mm4, %%mm3
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"punpcklbw %%mm0, %%mm3
\n\t
"
"punpckhbw %%mm0, %%mm4
\n\t
"
"psraw $4, %%mm3
\n\t
"
"psraw $4, %%mm4
\n\t
"
"movq %%mm3, "
DITHER16
"+0(%1)
\n\t
"
"movq %%mm4, "
DITHER16
"+8(%1)
\n\t
"
::
"r"
(
srcDither
),
"r"
(
&
c
->
redDither
)
);
}
else
{
__asm__
volatile
(
"pxor %%mm0, %%mm0
\n\t
"
"movq (%0), %%mm3
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"punpcklbw %%mm0, %%mm3
\n\t
"
"punpckhbw %%mm0, %%mm4
\n\t
"
"psraw $4, %%mm3
\n\t
"
"psraw $4, %%mm4
\n\t
"
"movq %%mm3, "
DITHER16
"+0(%1)
\n\t
"
"movq %%mm4, "
DITHER16
"+8(%1)
\n\t
"
::
"r"
(
srcDither
),
"r"
(
&
c
->
redDither
)
);
}
}
#endif
static
void
RENAME
(
yuv2yuvX
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
)
{
int
i
;
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
],
*
vDest
=
dest
[
2
],
*
aDest
=
CONFIG_SWSCALE_ALPHA
?
dest
[
3
]
:
NULL
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
if
(
uDest
)
{
x86_reg
uv_off
=
c
->
uv_offx2
>>
1
;
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
chrDither
[
i
]
>>
4
;
dither_8to16
(
c
,
chrDither
,
0
)
;
YSCALEYUV2YV12X
(
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
,
0
)
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
chrDither
[(
i
+
3
)
&
7
]
>>
4
;
dither_8to16
(
c
,
chrDither
,
1
)
;
YSCALEYUV2YV12X
(
CHR_MMX_FILTER_OFFSET
,
vDest
-
uv_off
,
chrDstW
+
uv_off
,
uv_off
)
}
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
lumDither
[
i
]
>>
4
;
dither_8to16
(
c
,
lumDither
,
0
)
;
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
{
YSCALEYUV2YV12X
(
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
,
0
)
}
...
...
@@ -104,10 +140,6 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
"movq "DITHER32"+8(%0), %%mm5 \n\t"\
"movq "DITHER32"+16(%0), %%mm6 \n\t"\
"movq "DITHER32"+24(%0), %%mm7 \n\t"\
"pxor %%mm4, %%mm4 \n\t"\
"pxor %%mm5, %%mm5 \n\t"\
"pxor %%mm6, %%mm6 \n\t"\
"pxor %%mm7, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
...
...
@@ -157,26 +189,87 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
: "%"REG_a, "%"REG_d, "%"REG_S\
);
#if !COMPILE_TEMPLATE_MMX2
static
av_always_inline
void
dither_8to32
(
SwsContext
*
c
,
const
uint8_t
*
srcDither
,
int
rot
)
{
int
i
;
if
(
rot
)
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither32
[
i
]
=
srcDither
[(
i
+
3
)
&
7
]
<<
12
;
else
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither32
[
i
]
=
srcDither
[
i
&
7
]
<<
12
;
return
;
if
(
rot
)
{
__asm__
volatile
(
"pxor %%mm0, %%mm0
\n\t
"
"movq (%0), %%mm4
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"psrlq $24, %%mm4
\n\t
"
"psllq $40, %%mm5
\n\t
"
"por %%mm5, %%mm4
\n\t
"
"movq %%mm4, %%mm6
\n\t
"
"punpcklbw %%mm0, %%mm4
\n\t
"
"punpckhbw %%mm0, %%mm6
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"movq %%mm6, %%mm7
\n\t
"
"punpcklwd %%mm0, %%mm4
\n\t
"
"punpckhwd %%mm0, %%mm5
\n\t
"
"punpcklwd %%mm0, %%mm6
\n\t
"
"punpckhwd %%mm0, %%mm7
\n\t
"
"psllw $12, %%mm4
\n\t
"
"psllw $12, %%mm5
\n\t
"
"psllw $12, %%mm6
\n\t
"
"psllw $12, %%mm7
\n\t
"
"movq %%mm4, "
DITHER32
"+0(%1)
\n\t
"
"movq %%mm5, "
DITHER32
"+8(%1)
\n\t
"
"movq %%mm6, "
DITHER32
"+16(%1)
\n\t
"
"movq %%mm7, "
DITHER32
"+24(%1)
\n\t
"
::
"r"
(
srcDither
),
"r"
(
&
c
->
redDither
)
);
}
else
{
__asm__
volatile
(
"pxor %%mm0, %%mm0
\n\t
"
"movq (%0), %%mm4
\n\t
"
"movq %%mm4, %%mm6
\n\t
"
"punpcklbw %%mm0, %%mm4
\n\t
"
"punpckhbw %%mm0, %%mm6
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"movq %%mm6, %%mm7
\n\t
"
"punpcklwd %%mm0, %%mm4
\n\t
"
"punpckhwd %%mm0, %%mm5
\n\t
"
"punpcklwd %%mm0, %%mm6
\n\t
"
"punpckhwd %%mm0, %%mm7
\n\t
"
"psllw $12, %%mm4
\n\t
"
"psllw $12, %%mm5
\n\t
"
"psllw $12, %%mm6
\n\t
"
"psllw $12, %%mm7
\n\t
"
"movq %%mm4, "
DITHER32
"+0(%1)
\n\t
"
"movq %%mm5, "
DITHER32
"+8(%1)
\n\t
"
"movq %%mm6, "
DITHER32
"+16(%1)
\n\t
"
"movq %%mm7, "
DITHER32
"+24(%1)
\n\t
"
::
"r"
(
srcDither
),
"r"
(
&
c
->
redDither
)
);
}
}
#endif
static
void
RENAME
(
yuv2yuvX_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
const
int16_t
**
chrVSrc
,
int
chrFilterSize
,
const
int16_t
**
alpSrc
,
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
uint8_t
*
dest
[
4
],
int
dstW
,
int
chrDstW
)
{
int
i
;
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
],
*
vDest
=
dest
[
2
],
*
aDest
=
CONFIG_SWSCALE_ALPHA
?
dest
[
3
]
:
NULL
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
if
(
uDest
)
{
x86_reg
uv_off
=
c
->
uv_offx2
>>
1
;
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither32
[
i
]
=
chrDither
[
i
]
<<
12
;
dither_8to32
(
c
,
chrDither
,
0
)
;
YSCALEYUV2YV12X_ACCURATE
(
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
,
0
)
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither32
[
i
]
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
dither_8to32
(
c
,
chrDither
,
1
)
;
YSCALEYUV2YV12X_ACCURATE
(
CHR_MMX_FILTER_OFFSET
,
vDest
-
uv_off
,
chrDstW
+
uv_off
,
uv_off
)
}
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither32
[
i
]
=
lumDither
[
i
]
<<
12
;
dither_8to32
(
c
,
lumDither
,
0
)
;
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
{
YSCALEYUV2YV12X_ACCURATE
(
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
,
0
)
}
...
...
@@ -187,8 +280,7 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
static
void
RENAME
(
yuv2yuv1
)(
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dst
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
uint8_t
*
dst
[
4
],
int
dstW
,
int
chrDstW
)
{
int
p
=
4
;
const
int16_t
*
src
[
4
]
=
{
...
...
@@ -222,8 +314,7 @@ static void RENAME(yuv2yuv1)(SwsContext *c, const int16_t *lumSrc,
static
void
RENAME
(
yuv2yuv1_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumSrc
,
const
int16_t
*
chrUSrc
,
const
int16_t
*
chrVSrc
,
const
int16_t
*
alpSrc
,
uint8_t
*
dst
[
4
],
int
dstW
,
int
chrDstW
,
const
uint8_t
*
lumDither
,
const
uint8_t
*
chrDither
)
uint8_t
*
dst
[
4
],
int
dstW
,
int
chrDstW
)
{
int
p
=
4
;
const
int16_t
*
src
[
4
]
=
{
...
...
@@ -231,15 +322,16 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
chrVSrc
+
chrDstW
,
alpSrc
+
dstW
};
x86_reg
counter
[
4
]
=
{
dstW
,
chrDstW
,
chrDstW
,
dstW
};
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
while
(
p
--
)
{
if
(
dst
[
p
])
{
int
i
;
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
i
<
2
?
lumDither
[
i
]
:
chrDither
[
i
];
for
(
i
=
0
;
i
<
8
;
i
++
)
c
->
dither16
[
i
]
=
(
p
==
2
||
p
==
3
)
?
lumDither
[
i
]
:
chrDither
[
i
];
__asm__
volatile
(
"mov %2, %%"
REG_a
"
\n\t
"
"movq
0(%3), %%mm6
\n\t
"
"movq
8(%3), %%mm7
\n\t
"
"movq
"
DITHER16
"+
0(%3), %%mm6
\n\t
"
"movq
"
DITHER16
"+
8(%3), %%mm7
\n\t
"
".p2align 4
\n\t
"
/* FIXME Unroll? */
"1:
\n\t
"
"movq (%0, %%"
REG_a
", 2), %%mm0
\n\t
"
...
...
@@ -253,7 +345,7 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
"add $8, %%"
REG_a
"
\n\t
"
"jnc 1b
\n\t
"
::
"r"
(
src
[
p
]),
"r"
(
dst
[
p
]
+
counter
[
p
]),
"g"
(
-
counter
[
p
]),
"r"
(
c
->
dither16
)
"g"
(
-
counter
[
p
]),
"r"
(
&
c
->
redDither
)
:
"%"
REG_a
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment