Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
4e3e333a
Commit
4e3e333a
authored
Jul 05, 2011
by
Ronald S. Bultje
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
swscale: error dithering for 16/9/10-bit to 8-bit.
Based on a somewhat similar idea in FFmpeg's swscale copy.
parent
7d7bacf0
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
160 additions
and
30 deletions
+160
-30
swscale.c
libswscale/swscale.c
+36
-13
swscale_internal.h
libswscale/swscale_internal.h
+6
-0
swscale_template.c
libswscale/x86/swscale_template.c
+118
-17
No files found.
libswscale/swscale.c
View file @
4e3e333a
...
...
@@ -182,6 +182,18 @@ DECLARE_ALIGNED(8, const uint8_t, dither_8x8_220)[8][8]={
{
77
,
23
,
60
,
15
,
72
,
21
,
56
,
14
,
},
};
#endif
DECLARE_ALIGNED
(
8
,
const
uint8_t
,
dither_8x8_128
)[
8
][
8
]
=
{
{
36
,
68
,
60
,
92
,
34
,
66
,
58
,
90
,},
{
100
,
4
,
124
,
28
,
98
,
2
,
122
,
26
,},
{
52
,
84
,
44
,
76
,
50
,
82
,
42
,
74
,},
{
116
,
20
,
108
,
12
,
114
,
18
,
106
,
10
,},
{
32
,
64
,
56
,
88
,
38
,
70
,
62
,
94
,},
{
96
,
0
,
120
,
24
,
102
,
6
,
126
,
30
,},
{
48
,
80
,
40
,
72
,
54
,
86
,
46
,
78
,},
{
112
,
16
,
104
,
8
,
118
,
22
,
110
,
14
,},
};
DECLARE_ALIGNED
(
8
,
const
uint8_t
,
ff_sws_pb_64
)[
8
]
=
{
64
,
64
,
64
,
64
,
64
,
64
,
64
,
64
};
static
av_always_inline
void
yuv2yuvX16_c_template
(
const
int16_t
*
lumFilter
,
const
int32_t
**
lumSrc
,
...
...
@@ -285,10 +297,11 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
],
*
vDest
=
dest
[
2
],
*
aDest
=
CONFIG_SWSCALE_ALPHA
?
dest
[
3
]
:
NULL
;
int
i
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
//FIXME Optimize (just quickly written not optimized..)
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
1
<<
18
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
lumSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -298,8 +311,8 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
if
(
uDest
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
1
<<
18
;
int
v
=
1
<<
18
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
@@ -312,7 +325,7 @@ static void yuv2yuvX_c(SwsContext *c, const int16_t *lumFilter,
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
1
<<
18
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
alpSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -329,23 +342,24 @@ static void yuv2yuv1_c(SwsContext *c, const int16_t *lumSrc,
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
],
*
vDest
=
dest
[
2
],
*
aDest
=
CONFIG_SWSCALE_ALPHA
?
dest
[
3
]
:
NULL
;
int
i
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
(
lumSrc
[
i
]
+
64
)
>>
7
;
int
val
=
(
lumSrc
[
i
]
+
lumDither
[
i
&
7
])
>>
7
;
yDest
[
i
]
=
av_clip_uint8
(
val
);
}
if
(
uDest
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
(
chrUSrc
[
i
]
+
64
)
>>
7
;
int
v
=
(
chrVSrc
[
i
]
+
64
)
>>
7
;
int
u
=
(
chrUSrc
[
i
]
+
chrDither
[
i
&
7
])
>>
7
;
int
v
=
(
chrVSrc
[
i
]
+
chrDither
[(
i
+
3
)
&
7
])
>>
7
;
uDest
[
i
]
=
av_clip_uint8
(
u
);
vDest
[
i
]
=
av_clip_uint8
(
v
);
}
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
(
alpSrc
[
i
]
+
64
)
>>
7
;
int
val
=
(
alpSrc
[
i
]
+
lumDither
[
i
&
7
])
>>
7
;
aDest
[
i
]
=
av_clip_uint8
(
val
);
}
}
...
...
@@ -359,11 +373,12 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
{
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
];
enum
PixelFormat
dstFormat
=
c
->
dstFormat
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
//FIXME Optimize (just quickly written not optimized..)
int
i
;
for
(
i
=
0
;
i
<
dstW
;
i
++
)
{
int
val
=
1
<<
18
;
int
val
=
lumDither
[
i
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
lumFilterSize
;
j
++
)
val
+=
lumSrc
[
j
][
i
]
*
lumFilter
[
j
];
...
...
@@ -376,8 +391,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
if
(
dstFormat
==
PIX_FMT_NV12
)
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
1
<<
18
;
int
v
=
1
<<
18
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
@@ -389,8 +404,8 @@ static void yuv2nv12X_c(SwsContext *c, const int16_t *lumFilter,
}
else
for
(
i
=
0
;
i
<
chrDstW
;
i
++
)
{
int
u
=
1
<<
18
;
int
v
=
1
<<
18
;
int
u
=
chrDither
[
i
&
7
]
<<
12
;
int
v
=
chrDither
[(
i
+
3
)
&
7
]
<<
12
;
int
j
;
for
(
j
=
0
;
j
<
chrFilterSize
;
j
++
)
{
u
+=
chrUSrc
[
j
][
i
]
*
chrFilter
[
j
];
...
...
@@ -2352,6 +2367,7 @@ static int swScale(SwsContext *c, const uint8_t* src[],
yuv2packed1_fn
yuv2packed1
=
c
->
yuv2packed1
;
yuv2packed2_fn
yuv2packed2
=
c
->
yuv2packed2
;
yuv2packedX_fn
yuv2packedX
=
c
->
yuv2packedX
;
int
should_dither
=
is9_OR_10BPS
(
c
->
srcFormat
)
||
is16BPS
(
c
->
srcFormat
);
/* vars which will change and which we need to store back in the context */
int
dstY
=
c
->
dstY
;
...
...
@@ -2401,6 +2417,9 @@ static int swScale(SwsContext *c, const uint8_t* src[],
lastInChrBuf
=
-
1
;
}
if
(
!
should_dither
)
{
c
->
chrDither8
=
c
->
lumDither8
=
ff_sws_pb_64
;
}
lastDstY
=
dstY
;
for
(;
dstY
<
dstH
;
dstY
++
)
{
...
...
@@ -2490,6 +2509,10 @@ static int swScale(SwsContext *c, const uint8_t* src[],
#if HAVE_MMX
updateMMXDitherTables
(
c
,
dstY
,
lumBufIndex
,
chrBufIndex
,
lastInLumBuf
,
lastInChrBuf
);
#endif
if
(
should_dither
)
{
c
->
chrDither8
=
dither_8x8_128
[
chrDstY
&
7
];
c
->
lumDither8
=
dither_8x8_128
[
dstY
&
7
];
}
if
(
dstY
>=
dstH
-
2
)
{
// hmm looks like we can't use MMX here without overwriting this array's tail
find_c_packed_planar_out_funcs
(
c
,
&
yuv2yuv1
,
&
yuv2yuvX
,
...
...
libswscale/swscale_internal.h
View file @
4e3e333a
...
...
@@ -321,6 +321,8 @@ typedef struct SwsContext {
#define ALP_MMX_FILTER_OFFSET "11*8+4*4*256*2+48"
#define UV_OFF "11*8+4*4*256*3+48"
#define UV_OFFx2 "11*8+4*4*256*3+56"
#define DITHER16 "11*8+4*4*256*3+64"
#define DITHER32 "11*8+4*4*256*3+80"
DECLARE_ALIGNED
(
8
,
uint64_t
,
redDither
);
DECLARE_ALIGNED
(
8
,
uint64_t
,
greenDither
);
...
...
@@ -345,6 +347,10 @@ typedef struct SwsContext {
int32_t
alpMmxFilter
[
4
*
MAX_FILTER_SIZE
];
DECLARE_ALIGNED
(
8
,
ptrdiff_t
,
uv_off
);
///< offset (in pixels) between u and v planes
DECLARE_ALIGNED
(
8
,
ptrdiff_t
,
uv_offx2
);
///< offset (in bytes) between u and v planes
uint16_t
dither16
[
8
];
uint32_t
dither32
[
8
];
const
uint8_t
*
chrDither8
,
*
lumDither8
;
#if HAVE_ALTIVEC
vector
signed
short
CY
;
...
...
libswscale/x86/swscale_template.c
View file @
4e3e333a
...
...
@@ -37,8 +37,8 @@
#define YSCALEYUV2YV12X(offset, dest, end, pos) \
__asm__ volatile(\
"movq
"VROUNDER_OFFSET"
(%0), %%mm3 \n\t"\
"movq
%%mm3
, %%mm4 \n\t"\
"movq
"DITHER16"+0
(%0), %%mm3 \n\t"\
"movq
"DITHER16"+8(%0)
, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
".p2align 4 \n\t"
/* FIXME Unroll? */
\
...
...
@@ -60,8 +60,8 @@
MOVNTQ(%%mm3, (%1, %3))\
"add $8, %3 \n\t"\
"cmp %2, %3 \n\t"\
"movq
"VROUNDER_OFFSET"
(%0), %%mm3 \n\t"\
"movq
%%mm3
, %%mm4 \n\t"\
"movq
"DITHER16"+0
(%0), %%mm3 \n\t"\
"movq
"DITHER16"+8(%0)
, %%mm4 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
...
...
@@ -70,6 +70,42 @@
: "%"REG_d, "%"REG_S\
);
#if !COMPILE_TEMPLATE_MMX2
static
av_always_inline
void
dither_8to16
(
SwsContext
*
c
,
const
uint8_t
*
srcDither
,
int
rot
)
{
if
(
rot
)
{
__asm__
volatile
(
"pxor %%mm0, %%mm0
\n\t
"
"movq (%0), %%mm3
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"psrlq $24, %%mm3
\n\t
"
"psllq $40, %%mm4
\n\t
"
"por %%mm4, %%mm3
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"punpcklbw %%mm0, %%mm3
\n\t
"
"punpckhbw %%mm0, %%mm4
\n\t
"
"psraw $4, %%mm3
\n\t
"
"psraw $4, %%mm4
\n\t
"
"movq %%mm3, "
DITHER16
"+0(%1)
\n\t
"
"movq %%mm4, "
DITHER16
"+8(%1)
\n\t
"
::
"r"
(
srcDither
),
"r"
(
&
c
->
redDither
)
);
}
else
{
__asm__
volatile
(
"pxor %%mm0, %%mm0
\n\t
"
"movq (%0), %%mm3
\n\t
"
"movq %%mm3, %%mm4
\n\t
"
"punpcklbw %%mm0, %%mm3
\n\t
"
"punpckhbw %%mm0, %%mm4
\n\t
"
"psraw $4, %%mm3
\n\t
"
"psraw $4, %%mm4
\n\t
"
"movq %%mm3, "
DITHER16
"+0(%1)
\n\t
"
"movq %%mm4, "
DITHER16
"+8(%1)
\n\t
"
::
"r"
(
srcDither
),
"r"
(
&
c
->
redDither
)
);
}
}
#endif
static
void
RENAME
(
yuv2yuvX
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
...
...
@@ -79,12 +115,16 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
{
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
],
*
vDest
=
dest
[
2
],
*
aDest
=
CONFIG_SWSCALE_ALPHA
?
dest
[
3
]
:
NULL
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
if
(
uDest
)
{
x86_reg
uv_off
=
c
->
uv_offx2
>>
1
;
dither_8to16
(
c
,
chrDither
,
0
);
YSCALEYUV2YV12X
(
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
,
0
)
dither_8to16
(
c
,
chrDither
,
1
);
YSCALEYUV2YV12X
(
CHR_MMX_FILTER_OFFSET
,
vDest
-
uv_off
,
chrDstW
+
uv_off
,
uv_off
)
}
dither_8to16
(
c
,
lumDither
,
0
);
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
{
YSCALEYUV2YV12X
(
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
,
0
)
}
...
...
@@ -95,10 +135,10 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
#define YSCALEYUV2YV12X_ACCURATE(offset, dest, end, pos) \
__asm__ volatile(\
"lea " offset "(%0), %%"REG_d" \n\t"\
"
pxor %%mm4
, %%mm4 \n\t"\
"
pxor %%mm5
, %%mm5 \n\t"\
"
pxor %%mm6
, %%mm6 \n\t"\
"
pxor %%mm7
, %%mm7 \n\t"\
"
movq "DITHER32"+0(%0)
, %%mm4 \n\t"\
"
movq "DITHER32"+8(%0)
, %%mm5 \n\t"\
"
movq "DITHER32"+16(%0)
, %%mm6 \n\t"\
"
movq "DITHER32"+24(%0)
, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
".p2align 4 \n\t"\
"1: \n\t"\
...
...
@@ -142,10 +182,10 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
"add $8, %3 \n\t"\
"cmp %2, %3 \n\t"\
"lea " offset "(%0), %%"REG_d" \n\t"\
"
pxor %%mm4
, %%mm4 \n\t"\
"
pxor %%mm5
, %%mm5 \n\t"\
"
pxor %%mm6
, %%mm6 \n\t"\
"
pxor %%mm7
, %%mm7 \n\t"\
"
movq "DITHER32"+0(%0)
, %%mm4 \n\t"\
"
movq "DITHER32"+8(%0)
, %%mm5 \n\t"\
"
movq "DITHER32"+16(%0)
, %%mm6 \n\t"\
"
movq "DITHER32"+24(%0)
, %%mm7 \n\t"\
"mov (%%"REG_d"), %%"REG_S" \n\t"\
"jb 1b \n\t"\
:: "r" (&c->redDither),\
...
...
@@ -153,6 +193,62 @@ static void RENAME(yuv2yuvX)(SwsContext *c, const int16_t *lumFilter,
: "%"REG_a, "%"REG_d, "%"REG_S\
);
#if !COMPILE_TEMPLATE_MMX2
static
av_always_inline
void
dither_8to32
(
SwsContext
*
c
,
const
uint8_t
*
srcDither
,
int
rot
)
{
if
(
rot
)
{
__asm__
volatile
(
"pxor %%mm0, %%mm0
\n\t
"
"movq (%0), %%mm4
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"psrlq $24, %%mm4
\n\t
"
"psllq $40, %%mm5
\n\t
"
"por %%mm5, %%mm4
\n\t
"
"movq %%mm4, %%mm6
\n\t
"
"punpcklbw %%mm0, %%mm4
\n\t
"
"punpckhbw %%mm0, %%mm6
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"movq %%mm6, %%mm7
\n\t
"
"punpcklwd %%mm0, %%mm4
\n\t
"
"punpckhwd %%mm0, %%mm5
\n\t
"
"punpcklwd %%mm0, %%mm6
\n\t
"
"punpckhwd %%mm0, %%mm7
\n\t
"
"psllw $12, %%mm4
\n\t
"
"psllw $12, %%mm5
\n\t
"
"psllw $12, %%mm6
\n\t
"
"psllw $12, %%mm7
\n\t
"
"movq %%mm3, "
DITHER32
"+0(%1)
\n\t
"
"movq %%mm4, "
DITHER32
"+8(%1)
\n\t
"
"movq %%mm4, "
DITHER32
"+16(%1)
\n\t
"
"movq %%mm4, "
DITHER32
"+24(%1)
\n\t
"
::
"r"
(
srcDither
),
"r"
(
&
c
->
redDither
)
);
}
else
{
__asm__
volatile
(
"pxor %%mm0, %%mm0
\n\t
"
"movq (%0), %%mm4
\n\t
"
"movq %%mm4, %%mm6
\n\t
"
"punpcklbw %%mm0, %%mm4
\n\t
"
"punpckhbw %%mm0, %%mm6
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"movq %%mm6, %%mm7
\n\t
"
"punpcklwd %%mm0, %%mm4
\n\t
"
"punpckhwd %%mm0, %%mm5
\n\t
"
"punpcklwd %%mm0, %%mm6
\n\t
"
"punpckhwd %%mm0, %%mm7
\n\t
"
"psllw $12, %%mm4
\n\t
"
"psllw $12, %%mm5
\n\t
"
"psllw $12, %%mm6
\n\t
"
"psllw $12, %%mm7
\n\t
"
"movq %%mm3, "
DITHER32
"+0(%1)
\n\t
"
"movq %%mm4, "
DITHER32
"+8(%1)
\n\t
"
"movq %%mm4, "
DITHER32
"+16(%1)
\n\t
"
"movq %%mm4, "
DITHER32
"+24(%1)
\n\t
"
::
"r"
(
srcDither
),
"r"
(
&
c
->
redDither
)
);
}
}
#endif
static
void
RENAME
(
yuv2yuvX_ar
)(
SwsContext
*
c
,
const
int16_t
*
lumFilter
,
const
int16_t
**
lumSrc
,
int
lumFilterSize
,
const
int16_t
*
chrFilter
,
const
int16_t
**
chrUSrc
,
...
...
@@ -162,12 +258,16 @@ static void RENAME(yuv2yuvX_ar)(SwsContext *c, const int16_t *lumFilter,
{
uint8_t
*
yDest
=
dest
[
0
],
*
uDest
=
dest
[
1
],
*
vDest
=
dest
[
2
],
*
aDest
=
CONFIG_SWSCALE_ALPHA
?
dest
[
3
]
:
NULL
;
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
if
(
uDest
)
{
x86_reg
uv_off
=
c
->
uv_offx2
>>
1
;
dither_8to32
(
c
,
chrDither
,
0
);
YSCALEYUV2YV12X_ACCURATE
(
CHR_MMX_FILTER_OFFSET
,
uDest
,
chrDstW
,
0
)
dither_8to32
(
c
,
chrDither
,
1
);
YSCALEYUV2YV12X_ACCURATE
(
CHR_MMX_FILTER_OFFSET
,
vDest
-
uv_off
,
chrDstW
+
uv_off
,
uv_off
)
}
dither_8to32
(
c
,
lumDither
,
0
);
if
(
CONFIG_SWSCALE_ALPHA
&&
aDest
)
{
YSCALEYUV2YV12X_ACCURATE
(
ALP_MMX_FILTER_OFFSET
,
aDest
,
dstW
,
0
)
}
...
...
@@ -220,19 +320,20 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
chrVSrc
+
chrDstW
,
alpSrc
+
dstW
};
x86_reg
counter
[
4
]
=
{
dstW
,
chrDstW
,
chrDstW
,
dstW
};
const
uint8_t
*
lumDither
=
c
->
lumDither8
,
*
chrDither
=
c
->
chrDither8
;
while
(
p
--
)
{
if
(
dst
[
p
])
{
dither_8to16
(
c
,
(
p
==
2
||
p
==
3
)
?
chrDither
:
lumDither
,
p
==
2
);
__asm__
volatile
(
"mov %2, %%"
REG_a
"
\n\t
"
"pcmpeqw %%mm7, %%mm7
\n\t
"
"psrlw $15, %%mm7
\n\t
"
"psllw $6, %%mm7
\n\t
"
"movq "
DITHER16
"+0(%3), %%mm6
\n\t
"
"movq "
DITHER16
"+8(%3), %%mm7
\n\t
"
".p2align 4
\n\t
"
/* FIXME Unroll? */
"1:
\n\t
"
"movq (%0, %%"
REG_a
", 2), %%mm0
\n\t
"
"movq 8(%0, %%"
REG_a
", 2), %%mm1
\n\t
"
"paddsw %%mm
7
, %%mm0
\n\t
"
"paddsw %%mm
6
, %%mm0
\n\t
"
"paddsw %%mm7, %%mm1
\n\t
"
"psraw $7, %%mm0
\n\t
"
"psraw $7, %%mm1
\n\t
"
...
...
@@ -241,7 +342,7 @@ static void RENAME(yuv2yuv1_ar)(SwsContext *c, const int16_t *lumSrc,
"add $8, %%"
REG_a
"
\n\t
"
"jnc 1b
\n\t
"
::
"r"
(
src
[
p
]),
"r"
(
dst
[
p
]
+
counter
[
p
]),
"g"
(
-
counter
[
p
])
"g"
(
-
counter
[
p
])
,
"r"
(
&
c
->
redDither
)
:
"%"
REG_a
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment