Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
c23ce454
Commit
c23ce454
authored
Jan 04, 2014
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: dsputil: Coalesce all init files
This makes the init files match the structure of the dsputil split.
parent
324ff594
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
447 additions
and
526 deletions
+447
-526
Makefile
libavcodec/x86/Makefile
+1
-2
dsputil_init.c
libavcodec/x86/dsputil_init.c
+446
-5
dsputil_x86.h
libavcodec/x86/dsputil_x86.h
+0
-32
motion_est.c
libavcodec/x86/motion_est.c
+0
-487
No files found.
libavcodec/x86/Makefile
View file @
c23ce454
...
@@ -6,8 +6,7 @@ OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o
...
@@ -6,8 +6,7 @@ OBJS-$(CONFIG_AUDIODSP) += x86/audiodsp_init.o
OBJS-$(CONFIG_BLOCKDSP)
+=
x86/blockdsp.o
OBJS-$(CONFIG_BLOCKDSP)
+=
x86/blockdsp.o
OBJS-$(CONFIG_BSWAPDSP)
+=
x86/bswapdsp_init.o
OBJS-$(CONFIG_BSWAPDSP)
+=
x86/bswapdsp_init.o
OBJS-$(CONFIG_DCT)
+=
x86/dct_init.o
OBJS-$(CONFIG_DCT)
+=
x86/dct_init.o
OBJS-$(CONFIG_DSPUTIL)
+=
x86/dsputil_init.o
\
OBJS-$(CONFIG_DSPUTIL)
+=
x86/dsputil_init.o
x86/motion_est.o
OBJS-$(CONFIG_FDCTDSP)
+=
x86/fdctdsp_init.o
OBJS-$(CONFIG_FDCTDSP)
+=
x86/fdctdsp_init.o
OBJS-$(CONFIG_FFT)
+=
x86/fft_init.o
OBJS-$(CONFIG_FFT)
+=
x86/fft_init.o
OBJS-$(CONFIG_H263DSP)
+=
x86/h263dsp_init.o
OBJS-$(CONFIG_H263DSP)
+=
x86/h263dsp_init.o
...
...
libavcodec/x86/dsputil_init.c
View file @
c23ce454
...
@@ -28,7 +28,6 @@
...
@@ -28,7 +28,6 @@
#include "libavutil/x86/cpu.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/dsputil.h"
#include "libavcodec/mpegvideo.h"
#include "libavcodec/mpegvideo.h"
#include "dsputil_x86.h"
#if HAVE_INLINE_ASM
#if HAVE_INLINE_ASM
...
@@ -797,6 +796,419 @@ DCT_SAD_FUNC(ssse3)
...
@@ -797,6 +796,419 @@ DCT_SAD_FUNC(ssse3)
#undef HSUM
#undef HSUM
#undef DCT_SAD
#undef DCT_SAD
DECLARE_ASM_CONST
(
8
,
uint64_t
,
round_tab
)[
3
]
=
{
0x0000000000000000ULL
,
0x0001000100010001ULL
,
0x0002000200020002ULL
,
};
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bone
)
=
0x0101010101010101LL
;
static
inline
void
sad8_1_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm4
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"psubusb %%mm0, %%mm2
\n\t
"
"psubusb %%mm4, %%mm0
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm5
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm5, %%mm1
\n\t
"
"por %%mm2, %%mm0
\n\t
"
"por %%mm1, %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm3, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_1_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"psadbw (%2), %%mm0
\n\t
"
"psadbw (%2, %3), %%mm1
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"lea (%1,%3,2), %1
\n\t
"
"lea (%2,%3,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
));
}
static
int
sad16_sse2
(
MpegEncContext
*
v
,
uint8_t
*
blk2
,
uint8_t
*
blk1
,
int
stride
,
int
h
)
{
int
ret
;
__asm__
volatile
(
"pxor %%xmm2, %%xmm2
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movdqu (%1), %%xmm0
\n\t
"
"movdqu (%1, %4), %%xmm1
\n\t
"
"psadbw (%2), %%xmm0
\n\t
"
"psadbw (%2, %4), %%xmm1
\n\t
"
"paddw %%xmm0, %%xmm2
\n\t
"
"paddw %%xmm1, %%xmm2
\n\t
"
"lea (%1,%4,2), %1
\n\t
"
"lea (%2,%4,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
"movhlps %%xmm2, %%xmm0
\n\t
"
"paddw %%xmm0, %%xmm2
\n\t
"
"movd %%xmm2, %3
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
),
"=r"
(
ret
)
:
"r"
((
x86_reg
)
stride
));
return
ret
;
}
static
inline
void
sad8_x2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"pavgb 1(%1), %%mm0
\n\t
"
"pavgb 1(%1, %3), %%mm1
\n\t
"
"psadbw (%2), %%mm0
\n\t
"
"psadbw (%2, %3), %%mm1
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"lea (%1,%3,2), %1
\n\t
"
"lea (%2,%3,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_y2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
"movq (%1), %%mm0
\n\t
"
"add %3, %1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm1
\n\t
"
"movq (%1, %3), %%mm2
\n\t
"
"pavgb %%mm1, %%mm0
\n\t
"
"pavgb %%mm2, %%mm1
\n\t
"
"psadbw (%2), %%mm0
\n\t
"
"psadbw (%2, %3), %%mm1
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"lea (%1,%3,2), %1
\n\t
"
"lea (%2,%3,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_4_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
"movq "
MANGLE
(
bone
)
", %%mm5
\n\t
"
"movq (%1), %%mm0
\n\t
"
"pavgb 1(%1), %%mm0
\n\t
"
"add %3, %1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm1
\n\t
"
"movq (%1,%3), %%mm2
\n\t
"
"pavgb 1(%1), %%mm1
\n\t
"
"pavgb 1(%1,%3), %%mm2
\n\t
"
"psubusb %%mm5, %%mm1
\n\t
"
"pavgb %%mm1, %%mm0
\n\t
"
"pavgb %%mm2, %%mm1
\n\t
"
"psadbw (%2), %%mm0
\n\t
"
"psadbw (%2,%3), %%mm1
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"lea (%1,%3,2), %1
\n\t
"
"lea (%2,%3,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_2_mmx
(
uint8_t
*
blk1a
,
uint8_t
*
blk1b
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"paddw %%mm0, %%mm1
\n\t
"
"paddw %%mm2, %%mm3
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm2
\n\t
"
"paddw %%mm5, %%mm1
\n\t
"
"paddw %%mm5, %%mm3
\n\t
"
"psrlw $1, %%mm1
\n\t
"
"psrlw $1, %%mm3
\n\t
"
"packuswb %%mm3, %%mm1
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm2, %%mm1
\n\t
"
"por %%mm4, %%mm1
\n\t
"
"movq %%mm1, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"add %4, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1a
-
len
),
"r"
(
blk1b
-
len
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_4_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"paddw %%mm3, %%mm1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq 1(%2, %%"
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddw %%mm4, %%mm2
\n\t
"
"paddw %%mm5, %%mm3
\n\t
"
"movq 16+"
MANGLE
(
round_tab
)
", %%mm5
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"paddw %%mm3, %%mm1
\n\t
"
"paddw %%mm5, %%mm0
\n\t
"
"paddw %%mm5, %%mm1
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm5
\n\t
"
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
"psubusb %%mm0, %%mm4
\n\t
"
"psubusb %%mm5, %%mm0
\n\t
"
"por %%mm4, %%mm0
\n\t
"
"movq %%mm0, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm4, %%mm6
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"movq %%mm3, %%mm1
\n\t
"
"add %4, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk1
-
len
+
stride
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
));
}
static
inline
int
sum_mmx
(
void
)
{
int
ret
;
__asm__
volatile
(
"movq %%mm6, %%mm0
\n\t
"
"psrlq $32, %%mm6
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"movq %%mm6, %%mm0
\n\t
"
"psrlq $16, %%mm6
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"movd %%mm6, %0
\n\t
"
:
"=r"
(
ret
));
return
ret
&
0xFFFF
;
}
static
inline
int
sum_mmxext
(
void
)
{
int
ret
;
__asm__
volatile
(
"movd %%mm6, %0
\n\t
"
:
"=r"
(
ret
));
return
ret
;
}
static
inline
void
sad8_x2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
sad8_2_mmx
(
blk1
,
blk1
+
1
,
blk2
,
stride
,
h
);
}
static
inline
void
sad8_y2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
sad8_2_mmx
(
blk1
,
blk1
+
stride
,
blk2
,
stride
,
h
);
}
#define PIX_SAD(suf) \
static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
:); \
\
sad8_1_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_x2a_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_y2a_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
::); \
\
sad8_4_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad16_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
:); \
\
sad8_1_ ## suf(blk1, blk2, stride, h); \
sad8_1_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_x2a_ ## suf(blk1, blk2, stride, h); \
sad8_x2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_y2a_ ## suf(blk1, blk2, stride, h); \
sad8_y2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
::); \
\
sad8_4_ ## suf(blk1, blk2, stride, h); \
sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
PIX_SAD
(
mmx
)
PIX_SAD
(
mmxext
)
#endif
/* HAVE_INLINE_ASM */
#endif
/* HAVE_INLINE_ASM */
int
ff_sse16_sse2
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
int
ff_sse16_sse2
(
MpegEncContext
*
v
,
uint8_t
*
pix1
,
uint8_t
*
pix2
,
...
@@ -821,22 +1233,49 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
...
@@ -821,22 +1233,49 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
if
(
INLINE_MMX
(
cpu_flags
))
{
if
(
INLINE_MMX
(
cpu_flags
))
{
c
->
sum_abs_dctelem
=
sum_abs_dctelem_mmx
;
c
->
sum_abs_dctelem
=
sum_abs_dctelem_mmx
;
c
->
pix_abs
[
0
][
0
]
=
sad16_mmx
;
c
->
pix_abs
[
0
][
1
]
=
sad16_x2_mmx
;
c
->
pix_abs
[
0
][
2
]
=
sad16_y2_mmx
;
c
->
pix_abs
[
0
][
3
]
=
sad16_xy2_mmx
;
c
->
pix_abs
[
1
][
0
]
=
sad8_mmx
;
c
->
pix_abs
[
1
][
1
]
=
sad8_x2_mmx
;
c
->
pix_abs
[
1
][
2
]
=
sad8_y2_mmx
;
c
->
pix_abs
[
1
][
3
]
=
sad8_xy2_mmx
;
c
->
sad
[
0
]
=
sad16_mmx
;
c
->
sad
[
1
]
=
sad8_mmx
;
c
->
sse
[
0
]
=
sse16_mmx
;
c
->
sse
[
0
]
=
sse16_mmx
;
c
->
sse
[
1
]
=
sse8_mmx
;
c
->
sse
[
1
]
=
sse8_mmx
;
c
->
vsad
[
4
]
=
vsad_intra16_mmx
;
c
->
vsad
[
4
]
=
vsad_intra16_mmx
;
c
->
nsse
[
0
]
=
nsse16_mmx
;
c
->
nsse
[
0
]
=
nsse16_mmx
;
c
->
nsse
[
1
]
=
nsse8_mmx
;
c
->
nsse
[
1
]
=
nsse8_mmx
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
vsad
[
0
]
=
vsad16_mmx
;
c
->
vsad
[
0
]
=
vsad16_mmx
;
}
}
}
}
if
(
INLINE_MMXEXT
(
cpu_flags
))
{
if
(
INLINE_MMXEXT
(
cpu_flags
))
{
c
->
sum_abs_dctelem
=
sum_abs_dctelem_mmxext
;
c
->
sum_abs_dctelem
=
sum_abs_dctelem_mmxext
;
c
->
vsad
[
4
]
=
vsad_intra16_mmxext
;
c
->
vsad
[
4
]
=
vsad_intra16_mmxext
;
c
->
pix_abs
[
0
][
0
]
=
sad16_mmxext
;
c
->
pix_abs
[
1
][
0
]
=
sad8_mmxext
;
c
->
sad
[
0
]
=
sad16_mmxext
;
c
->
sad
[
1
]
=
sad8_mmxext
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
pix_abs
[
0
][
1
]
=
sad16_x2_mmxext
;
c
->
pix_abs
[
0
][
2
]
=
sad16_y2_mmxext
;
c
->
pix_abs
[
0
][
3
]
=
sad16_xy2_mmxext
;
c
->
pix_abs
[
1
][
1
]
=
sad8_x2_mmxext
;
c
->
pix_abs
[
1
][
2
]
=
sad8_y2_mmxext
;
c
->
pix_abs
[
1
][
3
]
=
sad8_xy2_mmxext
;
c
->
vsad
[
0
]
=
vsad16_mmxext
;
c
->
vsad
[
0
]
=
vsad16_mmxext
;
}
}
}
}
...
@@ -845,6 +1284,10 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
...
@@ -845,6 +1284,10 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
c
->
sum_abs_dctelem
=
sum_abs_dctelem_sse2
;
c
->
sum_abs_dctelem
=
sum_abs_dctelem_sse2
;
}
}
if
(
INLINE_SSE2
(
cpu_flags
)
&&
!
(
cpu_flags
&
AV_CPU_FLAG_3DNOW
))
{
c
->
sad
[
0
]
=
sad16_sse2
;
}
#if HAVE_SSSE3_INLINE
#if HAVE_SSSE3_INLINE
if
(
INLINE_SSSE3
(
cpu_flags
))
{
if
(
INLINE_SSSE3
(
cpu_flags
))
{
c
->
sum_abs_dctelem
=
sum_abs_dctelem_ssse3
;
c
->
sum_abs_dctelem
=
sum_abs_dctelem_ssse3
;
...
@@ -875,6 +1318,4 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
...
@@ -875,6 +1318,4 @@ av_cold void ff_dsputil_init_x86(DSPContext *c, AVCodecContext *avctx)
c
->
hadamard8_diff
[
0
]
=
ff_hadamard8_diff16_ssse3
;
c
->
hadamard8_diff
[
0
]
=
ff_hadamard8_diff16_ssse3
;
c
->
hadamard8_diff
[
1
]
=
ff_hadamard8_diff_ssse3
;
c
->
hadamard8_diff
[
1
]
=
ff_hadamard8_diff_ssse3
;
}
}
ff_dsputil_init_pix_mmx
(
c
,
avctx
);
}
}
libavcodec/x86/dsputil_x86.h
deleted
100644 → 0
View file @
324ff594
/*
* MMX optimized DSP utils
* Copyright (c) 2007 Aurelien Jacobs <aurel@gnuage.org>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_X86_DSPUTIL_X86_H
#define AVCODEC_X86_DSPUTIL_X86_H
#include <stdint.h>
#include "libavcodec/avcodec.h"
#include "libavcodec/dsputil.h"
void
ff_dsputil_init_pix_mmx
(
DSPContext
*
c
,
AVCodecContext
*
avctx
);
#endif
/* AVCODEC_X86_DSPUTIL_X86_H */
libavcodec/x86/motion_est.c
deleted
100644 → 0
View file @
324ff594
/*
* MMX optimized motion estimation
* Copyright (c) 2001 Fabrice Bellard
* Copyright (c) 2002-2004 Michael Niedermayer
*
* mostly by Michael Niedermayer <michaelni@gmx.at>
*
* This file is part of Libav.
*
* Libav is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* Libav is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with Libav; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavutil/mem.h"
#include "libavutil/x86/asm.h"
#include "libavutil/x86/cpu.h"
#include "libavcodec/mpegvideo.h"
#include "dsputil_x86.h"
#if HAVE_INLINE_ASM
DECLARE_ASM_CONST
(
8
,
uint64_t
,
round_tab
)[
3
]
=
{
0x0000000000000000ULL
,
0x0001000100010001ULL
,
0x0002000200020002ULL
,
};
DECLARE_ASM_CONST
(
8
,
uint64_t
,
bone
)
=
0x0101010101010101LL
;
static
inline
void
sad8_1_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm4
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
"psubusb %%mm0, %%mm2
\n\t
"
"psubusb %%mm4, %%mm0
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm5
\n\t
"
"psubusb %%mm1, %%mm3
\n\t
"
"psubusb %%mm5, %%mm1
\n\t
"
"por %%mm2, %%mm0
\n\t
"
"por %%mm1, %%mm3
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm3, %%mm2
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm3
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm3, %%mm2
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"add %3, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_1_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"psadbw (%2), %%mm0
\n\t
"
"psadbw (%2, %3), %%mm1
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"lea (%1,%3,2), %1
\n\t
"
"lea (%2,%3,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
));
}
static
int
sad16_sse2
(
MpegEncContext
*
v
,
uint8_t
*
blk2
,
uint8_t
*
blk1
,
int
stride
,
int
h
)
{
int
ret
;
__asm__
volatile
(
"pxor %%xmm2, %%xmm2
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movdqu (%1), %%xmm0
\n\t
"
"movdqu (%1, %4), %%xmm1
\n\t
"
"psadbw (%2), %%xmm0
\n\t
"
"psadbw (%2, %4), %%xmm1
\n\t
"
"paddw %%xmm0, %%xmm2
\n\t
"
"paddw %%xmm1, %%xmm2
\n\t
"
"lea (%1,%4,2), %1
\n\t
"
"lea (%2,%4,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
"movhlps %%xmm2, %%xmm0
\n\t
"
"paddw %%xmm0, %%xmm2
\n\t
"
"movd %%xmm2, %3
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
),
"=r"
(
ret
)
:
"r"
((
x86_reg
)
stride
));
return
ret
;
}
static
inline
void
sad8_x2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm0
\n\t
"
"movq (%1, %3), %%mm1
\n\t
"
"pavgb 1(%1), %%mm0
\n\t
"
"pavgb 1(%1, %3), %%mm1
\n\t
"
"psadbw (%2), %%mm0
\n\t
"
"psadbw (%2, %3), %%mm1
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"lea (%1,%3,2), %1
\n\t
"
"lea (%2,%3,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_y2a_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
"movq (%1), %%mm0
\n\t
"
"add %3, %1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm1
\n\t
"
"movq (%1, %3), %%mm2
\n\t
"
"pavgb %%mm1, %%mm0
\n\t
"
"pavgb %%mm2, %%mm1
\n\t
"
"psadbw (%2), %%mm0
\n\t
"
"psadbw (%2, %3), %%mm1
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"lea (%1,%3,2), %1
\n\t
"
"lea (%2,%3,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_4_mmxext
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
__asm__
volatile
(
"movq "
MANGLE
(
bone
)
", %%mm5
\n\t
"
"movq (%1), %%mm0
\n\t
"
"pavgb 1(%1), %%mm0
\n\t
"
"add %3, %1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1), %%mm1
\n\t
"
"movq (%1,%3), %%mm2
\n\t
"
"pavgb 1(%1), %%mm1
\n\t
"
"pavgb 1(%1,%3), %%mm2
\n\t
"
"psubusb %%mm5, %%mm1
\n\t
"
"pavgb %%mm1, %%mm0
\n\t
"
"pavgb %%mm2, %%mm1
\n\t
"
"psadbw (%2), %%mm0
\n\t
"
"psadbw (%2,%3), %%mm1
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm1, %%mm6
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"lea (%1,%3,2), %1
\n\t
"
"lea (%2,%3,2), %2
\n\t
"
"sub $2, %0
\n\t
"
" jg 1b
\n\t
"
:
"+r"
(
h
),
"+r"
(
blk1
),
"+r"
(
blk2
)
:
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_2_mmx
(
uint8_t
*
blk1a
,
uint8_t
*
blk1b
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm1
\n\t
"
"movq (%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm1
\n\t
"
"punpckhbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"paddw %%mm0, %%mm1
\n\t
"
"paddw %%mm2, %%mm3
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm2
\n\t
"
"paddw %%mm5, %%mm1
\n\t
"
"paddw %%mm5, %%mm3
\n\t
"
"psrlw $1, %%mm1
\n\t
"
"psrlw $1, %%mm3
\n\t
"
"packuswb %%mm3, %%mm1
\n\t
"
"psubusb %%mm1, %%mm4
\n\t
"
"psubusb %%mm2, %%mm1
\n\t
"
"por %%mm4, %%mm1
\n\t
"
"movq %%mm1, %%mm0
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"paddw %%mm1, %%mm0
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"add %4, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1a
-
len
),
"r"
(
blk1b
-
len
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
));
}
static
inline
void
sad8_4_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
x86_reg
len
=
-
(
stride
*
h
);
__asm__
volatile
(
"movq (%1, %%"
REG_a
"), %%mm0
\n\t
"
"movq 1(%1, %%"
REG_a
"), %%mm2
\n\t
"
"movq %%mm0, %%mm1
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm1
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"paddw %%mm3, %%mm1
\n\t
"
".p2align 4
\n\t
"
"1:
\n\t
"
"movq (%2, %%"
REG_a
"), %%mm2
\n\t
"
"movq 1(%2, %%"
REG_a
"), %%mm4
\n\t
"
"movq %%mm2, %%mm3
\n\t
"
"movq %%mm4, %%mm5
\n\t
"
"punpcklbw %%mm7, %%mm2
\n\t
"
"punpckhbw %%mm7, %%mm3
\n\t
"
"punpcklbw %%mm7, %%mm4
\n\t
"
"punpckhbw %%mm7, %%mm5
\n\t
"
"paddw %%mm4, %%mm2
\n\t
"
"paddw %%mm5, %%mm3
\n\t
"
"movq 16+"
MANGLE
(
round_tab
)
", %%mm5
\n\t
"
"paddw %%mm2, %%mm0
\n\t
"
"paddw %%mm3, %%mm1
\n\t
"
"paddw %%mm5, %%mm0
\n\t
"
"paddw %%mm5, %%mm1
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm4
\n\t
"
"movq (%3, %%"
REG_a
"), %%mm5
\n\t
"
"psrlw $2, %%mm0
\n\t
"
"psrlw $2, %%mm1
\n\t
"
"packuswb %%mm1, %%mm0
\n\t
"
"psubusb %%mm0, %%mm4
\n\t
"
"psubusb %%mm5, %%mm0
\n\t
"
"por %%mm4, %%mm0
\n\t
"
"movq %%mm0, %%mm4
\n\t
"
"punpcklbw %%mm7, %%mm0
\n\t
"
"punpckhbw %%mm7, %%mm4
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"paddw %%mm4, %%mm6
\n\t
"
"movq %%mm2, %%mm0
\n\t
"
"movq %%mm3, %%mm1
\n\t
"
"add %4, %%"
REG_a
"
\n\t
"
" js 1b
\n\t
"
:
"+a"
(
len
)
:
"r"
(
blk1
-
len
),
"r"
(
blk1
-
len
+
stride
),
"r"
(
blk2
-
len
),
"r"
((
x86_reg
)
stride
));
}
static
inline
int
sum_mmx
(
void
)
{
int
ret
;
__asm__
volatile
(
"movq %%mm6, %%mm0
\n\t
"
"psrlq $32, %%mm6
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"movq %%mm6, %%mm0
\n\t
"
"psrlq $16, %%mm6
\n\t
"
"paddw %%mm0, %%mm6
\n\t
"
"movd %%mm6, %0
\n\t
"
:
"=r"
(
ret
));
return
ret
&
0xFFFF
;
}
static
inline
int
sum_mmxext
(
void
)
{
int
ret
;
__asm__
volatile
(
"movd %%mm6, %0
\n\t
"
:
"=r"
(
ret
));
return
ret
;
}
static
inline
void
sad8_x2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
sad8_2_mmx
(
blk1
,
blk1
+
1
,
blk2
,
stride
,
h
);
}
static
inline
void
sad8_y2a_mmx
(
uint8_t
*
blk1
,
uint8_t
*
blk2
,
int
stride
,
int
h
)
{
sad8_2_mmx
(
blk1
,
blk1
+
stride
,
blk2
,
stride
,
h
);
}
#define PIX_SAD(suf) \
static int sad8_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
:); \
\
sad8_1_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_x2a_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_y2a_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad8_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
assert(h == 8); \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
::); \
\
sad8_4_ ## suf(blk1, blk2, stride, 8); \
\
return sum_ ## suf(); \
} \
\
static int sad16_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
:); \
\
sad8_1_ ## suf(blk1, blk2, stride, h); \
sad8_1_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_x2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_x2a_ ## suf(blk1, blk2, stride, h); \
sad8_x2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_y2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
"movq %0, %%mm5 \n\t" \
:: "m" (round_tab[1])); \
\
sad8_y2a_ ## suf(blk1, blk2, stride, h); \
sad8_y2a_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
\
static int sad16_xy2_ ## suf(MpegEncContext *v, uint8_t *blk2, \
uint8_t *blk1, int stride, int h) \
{ \
__asm__ volatile ( \
"pxor %%mm7, %%mm7 \n\t" \
"pxor %%mm6, %%mm6 \n\t" \
::); \
\
sad8_4_ ## suf(blk1, blk2, stride, h); \
sad8_4_ ## suf(blk1 + 8, blk2 + 8, stride, h); \
\
return sum_ ## suf(); \
} \
PIX_SAD
(
mmx
)
PIX_SAD
(
mmxext
)
#endif
/* HAVE_INLINE_ASM */
av_cold
void
ff_dsputil_init_pix_mmx
(
DSPContext
*
c
,
AVCodecContext
*
avctx
)
{
#if HAVE_INLINE_ASM
int
cpu_flags
=
av_get_cpu_flags
();
if
(
INLINE_MMX
(
cpu_flags
))
{
c
->
pix_abs
[
0
][
0
]
=
sad16_mmx
;
c
->
pix_abs
[
0
][
1
]
=
sad16_x2_mmx
;
c
->
pix_abs
[
0
][
2
]
=
sad16_y2_mmx
;
c
->
pix_abs
[
0
][
3
]
=
sad16_xy2_mmx
;
c
->
pix_abs
[
1
][
0
]
=
sad8_mmx
;
c
->
pix_abs
[
1
][
1
]
=
sad8_x2_mmx
;
c
->
pix_abs
[
1
][
2
]
=
sad8_y2_mmx
;
c
->
pix_abs
[
1
][
3
]
=
sad8_xy2_mmx
;
c
->
sad
[
0
]
=
sad16_mmx
;
c
->
sad
[
1
]
=
sad8_mmx
;
}
if
(
INLINE_MMXEXT
(
cpu_flags
))
{
c
->
pix_abs
[
0
][
0
]
=
sad16_mmxext
;
c
->
pix_abs
[
1
][
0
]
=
sad8_mmxext
;
c
->
sad
[
0
]
=
sad16_mmxext
;
c
->
sad
[
1
]
=
sad8_mmxext
;
if
(
!
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
pix_abs
[
0
][
1
]
=
sad16_x2_mmxext
;
c
->
pix_abs
[
0
][
2
]
=
sad16_y2_mmxext
;
c
->
pix_abs
[
0
][
3
]
=
sad16_xy2_mmxext
;
c
->
pix_abs
[
1
][
1
]
=
sad8_x2_mmxext
;
c
->
pix_abs
[
1
][
2
]
=
sad8_y2_mmxext
;
c
->
pix_abs
[
1
][
3
]
=
sad8_xy2_mmxext
;
}
}
if
(
INLINE_SSE2
(
cpu_flags
)
&&
!
(
cpu_flags
&
AV_CPU_FLAG_3DNOW
))
{
c
->
sad
[
0
]
=
sad16_sse2
;
}
#endif
/* HAVE_INLINE_ASM */
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment