Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
47f21232
Commit
47f21232
authored
Jan 08, 2017
by
James Almer
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
huffyuvdsp: move functions only used by huffyuv from lossless_videodsp
Signed-off-by:
James Almer
<
jamrial@gmail.com
>
parent
cf9ef839
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
208 additions
and
204 deletions
+208
-204
huffyuvdec.c
libavcodec/huffyuvdec.c
+4
-4
huffyuvdsp.c
libavcodec/huffyuvdsp.c
+34
-2
huffyuvdsp.h
libavcodec/huffyuvdsp.h
+10
-3
lagarith.c
libavcodec/lagarith.c
+1
-1
lossless_videodsp.c
libavcodec/lossless_videodsp.c
+2
-34
lossless_videodsp.h
libavcodec/lossless_videodsp.h
+3
-6
magicyuv.c
libavcodec/magicyuv.c
+1
-1
lossless_videodsp_altivec.c
libavcodec/ppc/lossless_videodsp_altivec.c
+1
-1
utvideodec.c
libavcodec/utvideodec.c
+1
-1
vble.c
libavcodec/vble.c
+1
-1
huffyuvdsp.asm
libavcodec/x86/huffyuvdsp.asm
+137
-0
huffyuvdsp_init.c
libavcodec/x86/huffyuvdsp_init.c
+12
-1
lossless_videodsp.asm
libavcodec/x86/lossless_videodsp.asm
+0
-136
lossless_videodsp_init.c
libavcodec/x86/lossless_videodsp_init.c
+1
-13
No files found.
libavcodec/huffyuvdec.c
View file @
47f21232
...
...
@@ -297,8 +297,8 @@ static av_cold int decode_init(AVCodecContext *avctx)
if
(
ret
<
0
)
return
ret
;
ff_huffyuvdsp_init
(
&
s
->
hdsp
);
ff_llviddsp_init
(
&
s
->
llviddsp
,
avctx
);
ff_huffyuvdsp_init
(
&
s
->
hdsp
,
avctx
);
ff_llviddsp_init
(
&
s
->
llviddsp
);
memset
(
s
->
vlc
,
0
,
4
*
sizeof
(
VLC
));
s
->
interlaced
=
avctx
->
height
>
288
;
...
...
@@ -891,7 +891,7 @@ static void add_bytes(HYuvContext *s, uint8_t *dst, uint8_t *src, int w)
if
(
s
->
bps
<=
8
)
{
s
->
llviddsp
.
add_bytes
(
dst
,
src
,
w
);
}
else
{
s
->
llvid
dsp
.
add_int16
((
uint16_t
*
)
dst
,
(
const
uint16_t
*
)
src
,
s
->
n
-
1
,
w
);
s
->
h
dsp
.
add_int16
((
uint16_t
*
)
dst
,
(
const
uint16_t
*
)
src
,
s
->
n
-
1
,
w
);
}
}
...
...
@@ -900,7 +900,7 @@ static void add_median_prediction(HYuvContext *s, uint8_t *dst, const uint8_t *s
if
(
s
->
bps
<=
8
)
{
s
->
llviddsp
.
add_median_pred
(
dst
,
src
,
diff
,
w
,
left
,
left_top
);
}
else
{
s
->
llvid
dsp
.
add_hfyu_median_pred_int16
((
uint16_t
*
)
dst
,
(
const
uint16_t
*
)
src
,
(
const
uint16_t
*
)
diff
,
s
->
n
-
1
,
w
,
left
,
left_top
);
s
->
h
dsp
.
add_hfyu_median_pred_int16
((
uint16_t
*
)
dst
,
(
const
uint16_t
*
)
src
,
(
const
uint16_t
*
)
diff
,
s
->
n
-
1
,
w
,
left
,
left_top
);
}
}
static
int
decode_frame
(
AVCodecContext
*
avctx
,
void
*
data
,
int
*
got_frame
,
...
...
libavcodec/huffyuvdsp.c
View file @
47f21232
...
...
@@ -23,6 +23,36 @@
#include "mathops.h"
#include "huffyuvdsp.h"
static
void
add_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
){
long
i
;
unsigned
long
pw_lsb
=
(
mask
>>
1
)
*
0x0001000100010001ULL
;
unsigned
long
pw_msb
=
pw_lsb
+
0x0001000100010001ULL
;
for
(
i
=
0
;
i
<=
w
-
(
int
)
sizeof
(
long
)
/
2
;
i
+=
sizeof
(
long
)
/
2
)
{
long
a
=
*
(
long
*
)(
src
+
i
);
long
b
=
*
(
long
*
)(
dst
+
i
);
*
(
long
*
)(
dst
+
i
)
=
((
a
&
pw_lsb
)
+
(
b
&
pw_lsb
))
^
((
a
^
b
)
&
pw_msb
);
}
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
(
dst
[
i
]
+
src
[
i
])
&
mask
;
}
static
void
add_hfyu_median_pred_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
){
int
i
;
uint16_t
l
,
lt
;
l
=
*
left
;
lt
=
*
left_top
;
for
(
i
=
0
;
i
<
w
;
i
++
){
l
=
(
mid_pred
(
l
,
src
[
i
],
(
l
+
src
[
i
]
-
lt
)
&
mask
)
+
diff
[
i
])
&
mask
;
lt
=
src
[
i
];
dst
[
i
]
=
l
;
}
*
left
=
l
;
*
left_top
=
lt
;
}
static
void
add_hfyu_left_pred_bgr32_c
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
uint8_t
*
left
)
{
...
...
@@ -47,10 +77,12 @@ static void add_hfyu_left_pred_bgr32_c(uint8_t *dst, const uint8_t *src,
left
[
A
]
=
a
;
}
av_cold
void
ff_huffyuvdsp_init
(
HuffYUVDSPContext
*
c
)
av_cold
void
ff_huffyuvdsp_init
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
)
{
c
->
add_int16
=
add_int16_c
;
c
->
add_hfyu_median_pred_int16
=
add_hfyu_median_pred_int16_c
;
c
->
add_hfyu_left_pred_bgr32
=
add_hfyu_left_pred_bgr32_c
;
if
(
ARCH_X86
)
ff_huffyuvdsp_init_x86
(
c
);
ff_huffyuvdsp_init_x86
(
c
,
avctx
);
}
libavcodec/huffyuvdsp.h
View file @
47f21232
...
...
@@ -21,6 +21,7 @@
#include <stdint.h>
#include "config.h"
#include "avcodec.h"
#if HAVE_BIGENDIAN
#define B 3
...
...
@@ -35,12 +36,18 @@
#endif
typedef
struct
HuffYUVDSPContext
{
void
(
*
add_int16
)(
uint16_t
*
dst
/*align 16*/
,
const
uint16_t
*
src
/*align 16*/
,
unsigned
mask
,
int
w
);
void
(
*
add_hfyu_median_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
top
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
);
void
(
*
add_hfyu_left_pred_bgr32
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
uint8_t
*
left
);
}
HuffYUVDSPContext
;
void
ff_huffyuvdsp_init
(
HuffYUVDSPContext
*
c
);
void
ff_huffyuvdsp_init_ppc
(
HuffYUVDSPContext
*
c
);
void
ff_huffyuvdsp_init_x86
(
HuffYUVDSPContext
*
c
);
void
ff_huffyuvdsp_init
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_huffyuvdsp_init_ppc
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
);
void
ff_huffyuvdsp_init_x86
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
);
#endif
/* AVCODEC_HUFFYUVDSP_H */
libavcodec/lagarith.c
View file @
47f21232
...
...
@@ -725,7 +725,7 @@ static av_cold int lag_decode_init(AVCodecContext *avctx)
LagarithContext
*
l
=
avctx
->
priv_data
;
l
->
avctx
=
avctx
;
ff_llviddsp_init
(
&
l
->
llviddsp
,
avctx
);
ff_llviddsp_init
(
&
l
->
llviddsp
);
return
0
;
}
...
...
libavcodec/lossless_videodsp.c
View file @
47f21232
...
...
@@ -79,36 +79,6 @@ static int add_left_pred_c(uint8_t *dst, const uint8_t *src, intptr_t w,
return
acc
;
}
static
void
add_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
){
long
i
;
unsigned
long
pw_lsb
=
(
mask
>>
1
)
*
0x0001000100010001ULL
;
unsigned
long
pw_msb
=
pw_lsb
+
0x0001000100010001ULL
;
for
(
i
=
0
;
i
<=
w
-
(
int
)
sizeof
(
long
)
/
2
;
i
+=
sizeof
(
long
)
/
2
)
{
long
a
=
*
(
long
*
)(
src
+
i
);
long
b
=
*
(
long
*
)(
dst
+
i
);
*
(
long
*
)(
dst
+
i
)
=
((
a
&
pw_lsb
)
+
(
b
&
pw_lsb
))
^
((
a
^
b
)
&
pw_msb
);
}
for
(;
i
<
w
;
i
++
)
dst
[
i
]
=
(
dst
[
i
]
+
src
[
i
])
&
mask
;
}
static
void
add_hfyu_median_pred_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
){
int
i
;
uint16_t
l
,
lt
;
l
=
*
left
;
lt
=
*
left_top
;
for
(
i
=
0
;
i
<
w
;
i
++
){
l
=
(
mid_pred
(
l
,
src
[
i
],
(
l
+
src
[
i
]
-
lt
)
&
mask
)
+
diff
[
i
])
&
mask
;
lt
=
src
[
i
];
dst
[
i
]
=
l
;
}
*
left
=
l
;
*
left_top
=
lt
;
}
static
int
add_hfyu_left_pred_int16_c
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
,
unsigned
acc
){
int
i
;
...
...
@@ -129,16 +99,14 @@ static int add_hfyu_left_pred_int16_c(uint16_t *dst, const uint16_t *src, unsign
}
void
ff_llviddsp_init
(
LLVidDSPContext
*
c
,
AVCodecContext
*
avctx
)
void
ff_llviddsp_init
(
LLVidDSPContext
*
c
)
{
c
->
add_bytes
=
add_bytes_c
;
c
->
add_median_pred
=
add_median_pred_c
;
c
->
add_left_pred
=
add_left_pred_c
;
c
->
add_int16
=
add_int16_c
;
c
->
add_hfyu_left_pred_int16
=
add_hfyu_left_pred_int16_c
;
c
->
add_hfyu_median_pred_int16
=
add_hfyu_median_pred_int16_c
;
if
(
ARCH_X86
)
ff_llviddsp_init_x86
(
c
,
avctx
);
ff_llviddsp_init_x86
(
c
);
}
libavcodec/lossless_videodsp.h
View file @
47f21232
...
...
@@ -34,14 +34,11 @@ typedef struct LLVidDSPContext {
int
(
*
add_left_pred
)(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
int
left
);
void
(
*
add_int16
)(
uint16_t
*
dst
/*align 16*/
,
const
uint16_t
*
src
/*align 16*/
,
unsigned
mask
,
int
w
);
void
(
*
add_hfyu_median_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
top
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
);
int
(
*
add_hfyu_left_pred_int16
)(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
,
unsigned
left
);
}
LLVidDSPContext
;
void
ff_llviddsp_init
(
LLVidDSPContext
*
llviddsp
,
AVCodecContext
*
avctx
);
void
ff_llviddsp_init_x86
(
LLVidDSPContext
*
llviddsp
,
AVCodecContext
*
avctx
);
void
ff_llviddsp_init_ppc
(
LLVidDSPContext
*
llviddsp
,
AVCodecContext
*
avctx
);
void
ff_llviddsp_init
(
LLVidDSPContext
*
llviddsp
);
void
ff_llviddsp_init_x86
(
LLVidDSPContext
*
llviddsp
);
void
ff_llviddsp_init_ppc
(
LLVidDSPContext
*
llviddsp
);
#endif //AVCODEC_LOSSLESS_VIDEODSP_H
libavcodec/magicyuv.c
View file @
47f21232
...
...
@@ -697,7 +697,7 @@ static int magy_init_thread_copy(AVCodecContext *avctx)
static
av_cold
int
magy_decode_init
(
AVCodecContext
*
avctx
)
{
MagicYUVContext
*
s
=
avctx
->
priv_data
;
ff_llviddsp_init
(
&
s
->
llviddsp
,
avctx
);
ff_llviddsp_init
(
&
s
->
llviddsp
);
return
0
;
}
...
...
libavcodec/ppc/lossless_videodsp_altivec.c
View file @
47f21232
...
...
@@ -51,7 +51,7 @@ static void add_bytes_altivec(uint8_t *dst, uint8_t *src, intptr_t w)
}
#endif
/* HAVE_ALTIVEC */
av_cold
void
ff_llviddsp_init_ppc
(
LLVidDSPContext
*
c
,
AVCodecContext
*
avctx
)
av_cold
void
ff_llviddsp_init_ppc
(
LLVidDSPContext
*
c
)
{
#if HAVE_ALTIVEC
if
(
!
PPC_ALTIVEC
(
av_get_cpu_flags
()))
...
...
libavcodec/utvideodec.c
View file @
47f21232
...
...
@@ -827,7 +827,7 @@ static av_cold int decode_init(AVCodecContext *avctx)
c
->
avctx
=
avctx
;
ff_bswapdsp_init
(
&
c
->
bdsp
);
ff_llviddsp_init
(
&
c
->
llviddsp
,
avctx
);
ff_llviddsp_init
(
&
c
->
llviddsp
);
if
(
avctx
->
extradata_size
>=
16
)
{
av_log
(
avctx
,
AV_LOG_DEBUG
,
"Encoder version %d.%d.%d.%d
\n
"
,
...
...
libavcodec/vble.c
View file @
47f21232
...
...
@@ -185,7 +185,7 @@ static av_cold int vble_decode_init(AVCodecContext *avctx)
/* Stash for later use */
ctx
->
avctx
=
avctx
;
ff_llviddsp_init
(
&
ctx
->
llviddsp
,
avctx
);
ff_llviddsp_init
(
&
ctx
->
llviddsp
);
avctx
->
pix_fmt
=
AV_PIX_FMT_YUV420P
;
avctx
->
bits_per_raw_sample
=
8
;
...
...
libavcodec/x86/huffyuvdsp.asm
View file @
47f21232
...
...
@@ -24,6 +24,78 @@
SECTION
.
text
%macro
INT16_LOOP
2
; %1 = a/u (aligned/unaligned), %2 = add/sub
movd
m4
,
maskd
SPLATW
m4
,
m4
add
wd
,
wd
test
wq
,
2
*
mmsize
-
1
jz
%%
.
tomainloop
push
tmpq
%%
.
wordloop
:
sub
wq
,
2
%ifidn
%2
,
add
mov
tmpw
,
[
srcq
+
wq
]
add
tmpw
,
[
dstq
+
wq
]
%else
mov
tmpw
,
[
src1q
+
wq
]
sub
tmpw
,
[
src2q
+
wq
]
%endif
and
tmpw
,
maskw
mov
[
dstq
+
wq
]
,
tmpw
test
wq
,
2
*
mmsize
-
1
jnz
%%
.
wordloop
pop
tmpq
%%
.
tomainloop
:
%ifidn
%2
,
add
add
srcq
,
wq
%else
add
src1q
,
wq
add
src2q
,
wq
%endif
add
dstq
,
wq
neg
wq
jz
%%
.
end
%%
.
loop
:
%ifidn
%2
,
add
mov%1
m0
,
[
srcq
+
wq
]
mov%1
m1
,
[
dstq
+
wq
]
mov%1
m2
,
[
srcq
+
wq
+
mmsize
]
mov%1
m3
,
[
dstq
+
wq
+
mmsize
]
%else
mov%1
m0
,
[
src1q
+
wq
]
mov%1
m1
,
[
src2q
+
wq
]
mov%1
m2
,
[
src1q
+
wq
+
mmsize
]
mov%1
m3
,
[
src2q
+
wq
+
mmsize
]
%endif
p%2
w
m0
,
m1
p%2
w
m2
,
m3
pand
m0
,
m4
pand
m2
,
m4
mov%1
[
dstq
+
wq
]
,
m0
mov%1
[
dstq
+
wq
+
mmsize
]
,
m2
add
wq
,
2
*
mmsize
jl
%%
.
loop
%%
.
end
:
RET
%endmacro
%if
ARCH_X86_32
INIT_MMX
mmx
cglobal
add_int16
,
4
,
4
,
5
,
dst
,
src
,
mask
,
w
,
tmp
INT16_LOOP
a
,
add
%endif
INIT_XMM
sse2
cglobal
add_int16
,
4
,
4
,
5
,
dst
,
src
,
mask
,
w
,
tmp
test
srcq
,
mmsize
-
1
jnz
.
unaligned
test
dstq
,
mmsize
-
1
jnz
.
unaligned
INT16_LOOP
a
,
add
.
unaligned
:
INT16_LOOP
u
,
add
; void add_hfyu_left_pred_bgr32(uint8_t *dst, const uint8_t *src,
; intptr_t w, uint8_t *left)
%macro
LEFT_BGR32
0
...
...
@@ -63,3 +135,68 @@ LEFT_BGR32
%endif
INIT_XMM
sse2
LEFT_BGR32
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
INIT_MMX
mmxext
cglobal
add_hfyu_median_pred_int16
,
7
,
7
,
0
,
dst
,
top
,
diff
,
mask
,
w
,
left
,
left_top
add
wd
,
wd
movd
mm6
,
maskd
SPLATW
mm6
,
mm6
movq
mm0
,
[topq]
movq
mm2
,
mm0
movd
mm4
,
[
left_topq
]
psllq
mm2
,
16
movq
mm1
,
mm0
por
mm4
,
mm2
movd
mm3
,
[leftq]
psubw
mm0
,
mm4
; t-tl
add
dstq
,
wq
add
topq
,
wq
add
diffq
,
wq
neg
wq
jmp
.
skip
.
loop
:
movq
mm4
,
[
topq
+
wq
]
movq
mm0
,
mm4
psllq
mm4
,
16
por
mm4
,
mm1
movq
mm1
,
mm0
; t
psubw
mm0
,
mm4
; t-tl
.
skip
:
movq
mm2
,
[
diffq
+
wq
]
%assign
i
0
%rep
4
movq
mm4
,
mm0
paddw
mm4
,
mm3
; t-tl+l
pand
mm4
,
mm6
movq
mm5
,
mm3
pmaxsw
mm3
,
mm1
pminsw
mm5
,
mm1
pminsw
mm3
,
mm4
pmaxsw
mm3
,
mm5
; median
paddw
mm3
,
mm2
; +residual
pand
mm3
,
mm6
%if
i
==
0
movq
mm7
,
mm3
psllq
mm7
,
48
%else
movq
mm4
,
mm3
psrlq
mm7
,
16
psllq
mm4
,
48
por
mm7
,
mm4
%endif
%if
i
<
3
psrlq
mm0
,
16
psrlq
mm1
,
16
psrlq
mm2
,
16
%endif
%assign
i
i
+
1
%endrep
movq
[
dstq
+
wq
]
,
mm7
add
wq
,
8
jl
.
loop
movzx
r2d
,
word
[
dstq
-
2
]
mov
[leftq],
r2d
movzx
r2d
,
word
[
topq
-
2
]
mov
[
left_topq
]
,
r2d
RET
libavcodec/x86/huffyuvdsp_init.c
View file @
47f21232
...
...
@@ -21,24 +21,35 @@
#include "config.h"
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/pixdesc.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/huffyuvdsp.h"
void
ff_add_int16_mmx
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
);
void
ff_add_int16_sse2
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
);
void
ff_add_hfyu_left_pred_bgr32_mmx
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
uint8_t
*
left
);
void
ff_add_hfyu_left_pred_bgr32_sse2
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
uint8_t
*
left
);
void
ff_add_hfyu_median_pred_int16_mmxext
(
uint16_t
*
dst
,
const
uint16_t
*
top
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
);
av_cold
void
ff_huffyuvdsp_init_x86
(
HuffYUVDSPContext
*
c
)
av_cold
void
ff_huffyuvdsp_init_x86
(
HuffYUVDSPContext
*
c
,
AVCodecContext
*
avctx
)
{
int
cpu_flags
=
av_get_cpu_flags
();
const
AVPixFmtDescriptor
*
pix_desc
=
av_pix_fmt_desc_get
(
avctx
->
pix_fmt
);
if
(
ARCH_X86_32
&&
EXTERNAL_MMX
(
cpu_flags
))
{
c
->
add_hfyu_left_pred_bgr32
=
ff_add_hfyu_left_pred_bgr32_mmx
;
c
->
add_int16
=
ff_add_int16_mmx
;
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
)
&&
pix_desc
&&
pix_desc
->
comp
[
0
].
depth
<
16
)
{
c
->
add_hfyu_median_pred_int16
=
ff_add_hfyu_median_pred_int16_mmxext
;
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
add_int16
=
ff_add_int16_sse2
;
c
->
add_hfyu_left_pred_bgr32
=
ff_add_hfyu_left_pred_bgr32_sse2
;
}
}
libavcodec/x86/lossless_videodsp.asm
View file @
47f21232
...
...
@@ -217,77 +217,6 @@ ADD_BYTES
INIT_XMM
sse2
ADD_BYTES
%macro
INT16_LOOP
2
; %1 = a/u (aligned/unaligned), %2 = add/sub
movd
m4
,
maskd
SPLATW
m4
,
m4
add
wd
,
wd
test
wq
,
2
*
mmsize
-
1
jz
%%
.
tomainloop
push
tmpq
%%
.
wordloop
:
sub
wq
,
2
%ifidn
%2
,
add
mov
tmpw
,
[
srcq
+
wq
]
add
tmpw
,
[
dstq
+
wq
]
%else
mov
tmpw
,
[
src1q
+
wq
]
sub
tmpw
,
[
src2q
+
wq
]
%endif
and
tmpw
,
maskw
mov
[
dstq
+
wq
]
,
tmpw
test
wq
,
2
*
mmsize
-
1
jnz
%%
.
wordloop
pop
tmpq
%%
.
tomainloop
:
%ifidn
%2
,
add
add
srcq
,
wq
%else
add
src1q
,
wq
add
src2q
,
wq
%endif
add
dstq
,
wq
neg
wq
jz
%%
.
end
%%
.
loop
:
%ifidn
%2
,
add
mov%1
m0
,
[
srcq
+
wq
]
mov%1
m1
,
[
dstq
+
wq
]
mov%1
m2
,
[
srcq
+
wq
+
mmsize
]
mov%1
m3
,
[
dstq
+
wq
+
mmsize
]
%else
mov%1
m0
,
[
src1q
+
wq
]
mov%1
m1
,
[
src2q
+
wq
]
mov%1
m2
,
[
src1q
+
wq
+
mmsize
]
mov%1
m3
,
[
src2q
+
wq
+
mmsize
]
%endif
p%2
w
m0
,
m1
p%2
w
m2
,
m3
pand
m0
,
m4
pand
m2
,
m4
mov%1
[
dstq
+
wq
]
,
m0
mov%1
[
dstq
+
wq
+
mmsize
]
,
m2
add
wq
,
2
*
mmsize
jl
%%
.
loop
%%
.
end
:
RET
%endmacro
%if
ARCH_X86_32
INIT_MMX
mmx
cglobal
add_int16
,
4
,
4
,
5
,
dst
,
src
,
mask
,
w
,
tmp
INT16_LOOP
a
,
add
%endif
INIT_XMM
sse2
cglobal
add_int16
,
4
,
4
,
5
,
dst
,
src
,
mask
,
w
,
tmp
test
srcq
,
mmsize
-
1
jnz
.
unaligned
test
dstq
,
mmsize
-
1
jnz
.
unaligned
INT16_LOOP
a
,
add
.
unaligned
:
INT16_LOOP
u
,
add
%macro
ADD_HFYU_LEFT_LOOP_INT16
2
; %1 = dst alignment (a/u), %2 = src alignment (a/u)
add
wd
,
wd
add
srcq
,
wq
...
...
@@ -359,68 +288,3 @@ cglobal add_hfyu_left_pred_int16, 4,4,8, dst, src, mask, w, left
ADD_HFYU_LEFT_LOOP_INT16
u
,
a
.
src_unaligned
:
ADD_HFYU_LEFT_LOOP_INT16
u
,
u
; void add_hfyu_median_prediction_mmxext(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int mask, int w, int *left, int *left_top)
INIT_MMX
mmxext
cglobal
add_hfyu_median_pred_int16
,
7
,
7
,
0
,
dst
,
top
,
diff
,
mask
,
w
,
left
,
left_top
add
wd
,
wd
movd
mm6
,
maskd
SPLATW
mm6
,
mm6
movq
mm0
,
[topq]
movq
mm2
,
mm0
movd
mm4
,
[
left_topq
]
psllq
mm2
,
16
movq
mm1
,
mm0
por
mm4
,
mm2
movd
mm3
,
[leftq]
psubw
mm0
,
mm4
; t-tl
add
dstq
,
wq
add
topq
,
wq
add
diffq
,
wq
neg
wq
jmp
.
skip
.
loop
:
movq
mm4
,
[
topq
+
wq
]
movq
mm0
,
mm4
psllq
mm4
,
16
por
mm4
,
mm1
movq
mm1
,
mm0
; t
psubw
mm0
,
mm4
; t-tl
.
skip
:
movq
mm2
,
[
diffq
+
wq
]
%assign
i
0
%rep
4
movq
mm4
,
mm0
paddw
mm4
,
mm3
; t-tl+l
pand
mm4
,
mm6
movq
mm5
,
mm3
pmaxsw
mm3
,
mm1
pminsw
mm5
,
mm1
pminsw
mm3
,
mm4
pmaxsw
mm3
,
mm5
; median
paddw
mm3
,
mm2
; +residual
pand
mm3
,
mm6
%if
i
==
0
movq
mm7
,
mm3
psllq
mm7
,
48
%else
movq
mm4
,
mm3
psrlq
mm7
,
16
psllq
mm4
,
48
por
mm7
,
mm4
%endif
%if
i
<
3
psrlq
mm0
,
16
psrlq
mm1
,
16
psrlq
mm2
,
16
%endif
%assign
i
i
+
1
%endrep
movq
[
dstq
+
wq
]
,
mm7
add
wq
,
8
jl
.
loop
movzx
r2d
,
word
[
dstq
-
2
]
mov
[leftq],
r2d
movzx
r2d
,
word
[
topq
-
2
]
mov
[
left_topq
]
,
r2d
RET
libavcodec/x86/lossless_videodsp_init.c
View file @
47f21232
...
...
@@ -21,7 +21,6 @@
#include "config.h"
#include "libavutil/x86/asm.h"
#include "../lossless_videodsp.h"
#include "libavutil/pixdesc.h"
#include "libavutil/x86/cpu.h"
void
ff_add_bytes_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
intptr_t
w
);
...
...
@@ -39,11 +38,8 @@ int ff_add_left_pred_ssse3(uint8_t *dst, const uint8_t *src,
int
ff_add_left_pred_sse4
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
intptr_t
w
,
int
left
);
void
ff_add_int16_mmx
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
);
void
ff_add_int16_sse2
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
);
int
ff_add_hfyu_left_pred_int16_ssse3
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
,
unsigned
acc
);
int
ff_add_hfyu_left_pred_int16_sse4
(
uint16_t
*
dst
,
const
uint16_t
*
src
,
unsigned
mask
,
int
w
,
unsigned
acc
);
void
ff_add_hfyu_median_pred_int16_mmxext
(
uint16_t
*
dst
,
const
uint16_t
*
top
,
const
uint16_t
*
diff
,
unsigned
mask
,
int
w
,
int
*
left
,
int
*
left_top
);
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
static
void
add_median_pred_cmov
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
...
...
@@ -83,10 +79,9 @@ static void add_median_pred_cmov(uint8_t *dst, const uint8_t *top,
}
#endif
void
ff_llviddsp_init_x86
(
LLVidDSPContext
*
c
,
AVCodecContext
*
avctx
)
void
ff_llviddsp_init_x86
(
LLVidDSPContext
*
c
)
{
int
cpu_flags
=
av_get_cpu_flags
();
const
AVPixFmtDescriptor
*
pix_desc
=
av_pix_fmt_desc_get
(
avctx
->
pix_fmt
);
#if HAVE_INLINE_ASM && HAVE_7REGS && ARCH_X86_32
if
(
cpu_flags
&
AV_CPU_FLAG_CMOV
)
...
...
@@ -95,7 +90,6 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
if
(
ARCH_X86_32
&&
EXTERNAL_MMX
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_mmx
;
c
->
add_int16
=
ff_add_int16_mmx
;
}
if
(
ARCH_X86_32
&&
EXTERNAL_MMXEXT
(
cpu_flags
))
{
...
...
@@ -104,15 +98,9 @@ void ff_llviddsp_init_x86(LLVidDSPContext *c, AVCodecContext *avctx)
c
->
add_median_pred
=
ff_add_median_pred_mmxext
;
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
)
&&
pix_desc
&&
pix_desc
->
comp
[
0
].
depth
<
16
)
{
c
->
add_hfyu_median_pred_int16
=
ff_add_hfyu_median_pred_int16_mmxext
;
}
if
(
EXTERNAL_SSE2
(
cpu_flags
))
{
c
->
add_bytes
=
ff_add_bytes_sse2
;
c
->
add_median_pred
=
ff_add_median_pred_sse2
;
c
->
add_int16
=
ff_add_int16_sse2
;
}
if
(
EXTERNAL_SSSE3
(
cpu_flags
))
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment