Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Contribute to GitLab
Sign in / Register
Toggle navigation
F
ffmpeg.wasm-core
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Linshizhi
ffmpeg.wasm-core
Commits
26301caa
Commit
26301caa
authored
Jul 09, 2012
by
Diego Biurrun
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
x86: mmx2 ---> mmxext in asm constructs
parent
da39cac8
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
31 changed files
with
317 additions
and
312 deletions
+317
-312
ac3dsp.asm
libavcodec/x86/ac3dsp.asm
+2
-2
ac3dsp_init.c
libavcodec/x86/ac3dsp_init.c
+2
-2
dsputil.asm
libavcodec/x86/dsputil.asm
+4
-4
dsputil_mmx.c
libavcodec/x86/dsputil_mmx.c
+28
-28
dsputilenc.asm
libavcodec/x86/dsputilenc.asm
+1
-1
dsputilenc_mmx.c
libavcodec/x86/dsputilenc_mmx.c
+3
-3
h264_chromamc.asm
libavcodec/x86/h264_chromamc.asm
+7
-7
h264_chromamc_10bit.asm
libavcodec/x86/h264_chromamc_10bit.asm
+2
-2
h264_deblock.asm
libavcodec/x86/h264_deblock.asm
+10
-10
h264_deblock_10bit.asm
libavcodec/x86/h264_deblock_10bit.asm
+2
-2
h264_idct.asm
libavcodec/x86/h264_idct.asm
+24
-22
h264_idct_10bit.asm
libavcodec/x86/h264_idct_10bit.asm
+1
-1
h264_intrapred.asm
libavcodec/x86/h264_intrapred.asm
+13
-13
h264_intrapred_10bit.asm
libavcodec/x86/h264_intrapred_10bit.asm
+10
-10
h264_intrapred_init.c
libavcodec/x86/h264_intrapred_init.c
+40
-40
h264_weight.asm
libavcodec/x86/h264_weight.asm
+6
-6
h264dsp_init.c
libavcodec/x86/h264dsp_init.c
+54
-53
pngdsp.asm
libavcodec/x86/pngdsp.asm
+1
-1
pngdsp_init.c
libavcodec/x86/pngdsp_init.c
+3
-3
rv34dsp.asm
libavcodec/x86/rv34dsp.asm
+3
-3
rv34dsp_init.c
libavcodec/x86/rv34dsp_init.c
+5
-5
rv40dsp.asm
libavcodec/x86/rv40dsp.asm
+2
-2
rv40dsp_init.c
libavcodec/x86/rv40dsp_init.c
+15
-15
vc1dsp_init.c
libavcodec/x86/vc1dsp_init.c
+3
-3
vp3dsp.asm
libavcodec/x86/vp3dsp.asm
+2
-2
vp3dsp_init.c
libavcodec/x86/vp3dsp_init.c
+9
-7
vp8dsp.asm
libavcodec/x86/vp8dsp.asm
+15
-15
vp8dsp_init.c
libavcodec/x86/vp8dsp_init.c
+43
-43
x86util.asm
libavutil/x86/x86util.asm
+1
-1
output.asm
libswscale/x86/output.asm
+2
-2
swscale.c
libswscale/x86/swscale.c
+4
-4
No files found.
libavcodec/x86/ac3dsp.asm
View file @
26301caa
...
...
@@ -97,7 +97,7 @@ AC3_EXPONENT_MIN
por
%1
,
%2
pshuflw
%2
,
%1
,
q0001
por
%1
,
%2
%elif
cpuflag
(
mmx
2
)
%elif
cpuflag
(
mmx
ext
)
pshufw
%2
,
%1
,
q0032
por
%1
,
%2
pshufw
%2
,
%1
,
q0001
...
...
@@ -153,7 +153,7 @@ cglobal ac3_max_msb_abs_int16, 2,2,5, src, len
INIT_MMX
mmx
%define
ABS2
ABS2_MMX
AC3_MAX_MSB_ABS_INT16
or_abs
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
%define
ABS2
ABS2_MMXEXT
AC3_MAX_MSB_ABS_INT16
min_max
INIT_XMM
sse2
...
...
libavcodec/x86/ac3dsp_init.c
View file @
26301caa
...
...
@@ -31,7 +31,7 @@ extern void ff_ac3_exponent_min_mmxext(uint8_t *exp, int num_reuse_blocks, int n
extern
void
ff_ac3_exponent_min_sse2
(
uint8_t
*
exp
,
int
num_reuse_blocks
,
int
nb_coefs
);
extern
int
ff_ac3_max_msb_abs_int16_mmx
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_mmx
2
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_mmx
ext
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_sse2
(
const
int16_t
*
src
,
int
len
);
extern
int
ff_ac3_max_msb_abs_int16_ssse3
(
const
int16_t
*
src
,
int
len
);
...
...
@@ -182,7 +182,7 @@ av_cold void ff_ac3dsp_init_x86(AC3DSPContext *c, int bit_exact)
}
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
ac3_exponent_min
=
ff_ac3_exponent_min_mmxext
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_mmx
2
;
c
->
ac3_max_msb_abs_int16
=
ff_ac3_max_msb_abs_int16_mmx
ext
;
}
if
(
EXTERNAL_SSE
(
mm_flags
))
{
c
->
float_to_fixed24
=
ff_float_to_fixed24_sse
;
...
...
libavcodec/x86/dsputil.asm
View file @
26301caa
...
...
@@ -108,7 +108,7 @@ cglobal scalarproduct_and_madd_int16_%1, 4,4,8, v1, v2, v3, order, mul
%endmacro
INIT_MMX
SCALARPRODUCT
mmx
2
SCALARPRODUCT
mmx
ext
INIT_XMM
SCALARPRODUCT
sse2
...
...
@@ -327,8 +327,8 @@ APPLY_WINDOW_INT16 ssse3_atom, 0, 1
APPLY_WINDOW_INT16
ssse3
,
0
,
1
; void add_hfyu_median_prediction_mmx
2
(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
cglobal
add_hfyu_median_prediction_mmx
2
,
6
,
6
,
0
,
dst
,
top
,
diff
,
w
,
left
,
left_top
; void add_hfyu_median_prediction_mmx
ext
(uint8_t *dst, const uint8_t *top, const uint8_t *diff, int w, int *left, int *left_top)
cglobal
add_hfyu_median_prediction_mmx
ext
,
6
,
6
,
0
,
dst
,
top
,
diff
,
w
,
left
,
left_top
movq
mm0
,
[topq]
movq
mm2
,
mm0
movd
mm4
,
[
left_topq
]
...
...
@@ -804,7 +804,7 @@ ALIGN 128
mov
valh
,
vall
%if
%1
>=
8
movd
mm0
,
vald
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
pshufw
mm0
,
mm0
,
0
%else
; mmx
punpcklwd
mm0
,
mm0
...
...
libavcodec/x86/dsputil_mmx.c
View file @
26301caa
...
...
@@ -2045,21 +2045,21 @@ PREFETCH(prefetch_3dnow, prefetch)
void
ff_put_h264_chroma_mc8_rnd_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc8_rnd_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc8_rnd_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc8_rnd_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_h264_chroma_mc4_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc4_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
void
ff_avg_h264_chroma_mc4_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc4_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_h264_chroma_mc2_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
void
ff_put_h264_chroma_mc2_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_h264_chroma_mc2_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
void
ff_avg_h264_chroma_mc2_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_h264_chroma_mc8_rnd_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
...
...
@@ -2077,10 +2077,10 @@ void ff_ ## OP ## _h264_chroma_mc ## NUM ## _ ## DEPTH ## _ ## OPT \
(uint8_t *dst, uint8_t *src, \
int stride, int h, int x, int y);
CHROMA_MC
(
put
,
2
,
10
,
mmx
2
)
CHROMA_MC
(
avg
,
2
,
10
,
mmx
2
)
CHROMA_MC
(
put
,
4
,
10
,
mmx
2
)
CHROMA_MC
(
avg
,
4
,
10
,
mmx
2
)
CHROMA_MC
(
put
,
2
,
10
,
mmx
ext
)
CHROMA_MC
(
avg
,
2
,
10
,
mmx
ext
)
CHROMA_MC
(
put
,
4
,
10
,
mmx
ext
)
CHROMA_MC
(
avg
,
4
,
10
,
mmx
ext
)
CHROMA_MC
(
put
,
8
,
10
,
sse2
)
CHROMA_MC
(
avg
,
8
,
10
,
sse2
)
CHROMA_MC
(
put
,
8
,
10
,
avx
)
...
...
@@ -2283,13 +2283,13 @@ static void vector_clipf_sse(float *dst, const float *src,
#endif
/* HAVE_INLINE_ASM */
int32_t
ff_scalarproduct_int16_mmx
2
(
const
int16_t
*
v1
,
const
int16_t
*
v2
,
int
order
);
int32_t
ff_scalarproduct_int16_mmx
ext
(
const
int16_t
*
v1
,
const
int16_t
*
v2
,
int
order
);
int32_t
ff_scalarproduct_int16_sse2
(
const
int16_t
*
v1
,
const
int16_t
*
v2
,
int
order
);
int32_t
ff_scalarproduct_and_madd_int16_mmx
2
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
int32_t
ff_scalarproduct_and_madd_int16_mmx
ext
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
int32_t
ff_scalarproduct_and_madd_int16_sse2
(
int16_t
*
v1
,
const
int16_t
*
v2
,
const
int16_t
*
v3
,
int
order
,
int
mul
);
...
...
@@ -2313,9 +2313,9 @@ void ff_apply_window_int16_ssse3_atom(int16_t *output, const int16_t *input,
void
ff_bswap32_buf_ssse3
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_bswap32_buf_sse2
(
uint32_t
*
dst
,
const
uint32_t
*
src
,
int
w
);
void
ff_add_hfyu_median_prediction_mmx
2
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
int
w
,
int
*
left
,
int
*
left_top
);
void
ff_add_hfyu_median_prediction_mmx
ext
(
uint8_t
*
dst
,
const
uint8_t
*
top
,
const
uint8_t
*
diff
,
int
w
,
int
*
left
,
int
*
left_top
);
int
ff_add_hfyu_left_prediction_ssse3
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
int
w
,
int
left
);
int
ff_add_hfyu_left_prediction_sse4
(
uint8_t
*
dst
,
const
uint8_t
*
src
,
...
...
@@ -2548,24 +2548,24 @@ static void dsputil_init_mmxext(DSPContext *c, AVCodecContext *avctx,
#if HAVE_YASM
if
(
!
high_bit_depth
&&
CONFIG_H264CHROMA
)
{
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_rnd_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
1
]
=
ff_avg_h264_chroma_mc4_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
2
]
=
ff_avg_h264_chroma_mc2_mmx
2
;
c
->
put_h264_chroma_pixels_tab
[
2
]
=
ff_put_h264_chroma_mc2_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
0
]
=
ff_avg_h264_chroma_mc8_rnd_mmx
ext
;
c
->
avg_h264_chroma_pixels_tab
[
1
]
=
ff_avg_h264_chroma_mc4_mmx
ext
;
c
->
avg_h264_chroma_pixels_tab
[
2
]
=
ff_avg_h264_chroma_mc2_mmx
ext
;
c
->
put_h264_chroma_pixels_tab
[
2
]
=
ff_put_h264_chroma_mc2_mmx
ext
;
}
if
(
bit_depth
==
10
&&
CONFIG_H264CHROMA
)
{
c
->
put_h264_chroma_pixels_tab
[
2
]
=
ff_put_h264_chroma_mc2_10_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
2
]
=
ff_avg_h264_chroma_mc2_10_mmx
2
;
c
->
put_h264_chroma_pixels_tab
[
1
]
=
ff_put_h264_chroma_mc4_10_mmx
2
;
c
->
avg_h264_chroma_pixels_tab
[
1
]
=
ff_avg_h264_chroma_mc4_10_mmx
2
;
c
->
put_h264_chroma_pixels_tab
[
2
]
=
ff_put_h264_chroma_mc2_10_mmx
ext
;
c
->
avg_h264_chroma_pixels_tab
[
2
]
=
ff_avg_h264_chroma_mc2_10_mmx
ext
;
c
->
put_h264_chroma_pixels_tab
[
1
]
=
ff_put_h264_chroma_mc4_10_mmx
ext
;
c
->
avg_h264_chroma_pixels_tab
[
1
]
=
ff_avg_h264_chroma_mc4_10_mmx
ext
;
}
/* slower than cmov version on AMD */
if
(
!
(
mm_flags
&
AV_CPU_FLAG_3DNOW
))
c
->
add_hfyu_median_prediction
=
ff_add_hfyu_median_prediction_mmx
2
;
c
->
add_hfyu_median_prediction
=
ff_add_hfyu_median_prediction_mmx
ext
;
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_mmx
2
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_mmx
2
;
c
->
scalarproduct_int16
=
ff_scalarproduct_int16_mmx
ext
;
c
->
scalarproduct_and_madd_int16
=
ff_scalarproduct_and_madd_int16_mmx
ext
;
if
(
avctx
->
flags
&
CODEC_FLAG_BITEXACT
)
{
c
->
apply_window_int16
=
ff_apply_window_int16_mmxext_ba
;
...
...
libavcodec/x86/dsputilenc.asm
View file @
26301caa
...
...
@@ -265,7 +265,7 @@ HADAMARD8_DIFF_MMX mmx
%define
ABS1
ABS1_MMXEXT
%define
HSUM
HSUM_MMXEXT
HADAMARD8_DIFF_MMX
mmx
2
HADAMARD8_DIFF_MMX
mmx
ext
INIT_XMM
%define
ABS2
ABS2_MMXEXT
...
...
libavcodec/x86/dsputilenc_mmx.c
View file @
26301caa
...
...
@@ -1104,7 +1104,7 @@ int ff_hadamard8_diff16_##cpu(void *s, uint8_t *src1, uint8_t *src2, \
int stride, int h);
hadamard_func
(
mmx
)
hadamard_func
(
mmx
2
)
hadamard_func
(
mmx
ext
)
hadamard_func
(
sse2
)
hadamard_func
(
ssse3
)
...
...
@@ -1195,8 +1195,8 @@ void ff_dsputilenc_init_mmx(DSPContext* c, AVCodecContext *avctx)
c
->
hadamard8_diff
[
1
]
=
ff_hadamard8_diff_mmx
;
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
hadamard8_diff
[
0
]
=
ff_hadamard8_diff16_mmx
2
;
c
->
hadamard8_diff
[
1
]
=
ff_hadamard8_diff_mmx
2
;
c
->
hadamard8_diff
[
0
]
=
ff_hadamard8_diff16_mmx
ext
;
c
->
hadamard8_diff
[
1
]
=
ff_hadamard8_diff_mmx
ext
;
}
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
...
...
libavcodec/x86/h264_chromamc.asm
View file @
26301caa
...
...
@@ -442,17 +442,17 @@ chroma_mc8_mmx_func put, vc1, nornd_mmx
chroma_mc8_mmx_func
put
,
rv40
,
mmx
chroma_mc4_mmx_func
put
,
h264
,
mmx
chroma_mc4_mmx_func
put
,
rv40
,
mmx
chroma_mc2_mmx_func
put
,
h264
,
mmx
2
chroma_mc2_mmx_func
put
,
h264
,
mmx
ext
%define
CHROMAMC_AVG
DIRECT_AVG
%define
CHROMAMC_AVG4
COPY_AVG
%define
PAVG
pavgb
chroma_mc8_mmx_func
avg
,
h264
,
rnd_mmx
2
chroma_mc8_mmx_func
avg
,
vc1
,
nornd_mmx
2
chroma_mc8_mmx_func
avg
,
rv40
,
mmx
2
chroma_mc4_mmx_func
avg
,
h264
,
mmx
2
chroma_mc4_mmx_func
avg
,
rv40
,
mmx
2
chroma_mc2_mmx_func
avg
,
h264
,
mmx
2
chroma_mc8_mmx_func
avg
,
h264
,
rnd_mmx
ext
chroma_mc8_mmx_func
avg
,
vc1
,
nornd_mmx
ext
chroma_mc8_mmx_func
avg
,
rv40
,
mmx
ext
chroma_mc4_mmx_func
avg
,
h264
,
mmx
ext
chroma_mc4_mmx_func
avg
,
rv40
,
mmx
ext
chroma_mc2_mmx_func
avg
,
h264
,
mmx
ext
%define
PAVG
pavgusb
chroma_mc8_mmx_func
avg
,
h264
,
rnd_3dnow
...
...
libavcodec/x86/h264_chromamc_10bit.asm
View file @
26301caa
...
...
@@ -253,7 +253,7 @@ INIT_XMM sse2
CHROMA_MC8
put
INIT_XMM
avx
CHROMA_MC8
put
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
CHROMA_MC4
put
CHROMA_MC2
put
...
...
@@ -262,6 +262,6 @@ INIT_XMM sse2
CHROMA_MC8
avg
INIT_XMM
avx
CHROMA_MC8
avg
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
CHROMA_MC4
avg
CHROMA_MC2
avg
libavcodec/x86/h264_deblock.asm
View file @
26301caa
...
...
@@ -504,7 +504,7 @@ cglobal deblock_h_luma_8, 0,5
RET
%endmacro
; DEBLOCK_LUMA
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
DEBLOCK_LUMA
v8
,
8
INIT_XMM
sse2
DEBLOCK_LUMA
v
,
16
...
...
@@ -783,11 +783,11 @@ DEBLOCK_LUMA_INTRA v
INIT_XMM
avx
DEBLOCK_LUMA_INTRA
v
%if
ARCH_X86_64
==
0
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
DEBLOCK_LUMA_INTRA
v8
%endif
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
%macro
CHROMA_V_START
0
dec
r2d
; alpha-1
...
...
@@ -818,7 +818,7 @@ cglobal deblock_v_chroma_8, 5,6
movq
m1
,
[
t5
+
r1
]
movq
m2
,
[r0]
movq
m3
,
[
r0
+
r1
]
call
ff_chroma_inter_body_mmx
2
call
ff_chroma_inter_body_mmx
ext
movq
[
t5
+
r1
]
,
m1
movq
[r0],
m2
RET
...
...
@@ -842,7 +842,7 @@ cglobal deblock_h_chroma_8, 5,7
TRANSPOSE4x8_LOAD
bw
,
wd
,
dq
,
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
movq
buf0
,
m0
movq
buf1
,
m3
call
ff_chroma_inter_body_mmx
2
call
ff_chroma_inter_body_mmx
ext
movq
m0
,
buf0
movq
m3
,
buf1
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
...
...
@@ -852,7 +852,7 @@ cglobal deblock_h_chroma_8, 5,7
RET
ALIGN
16
ff_chroma_inter_body_mmx
2
:
ff_chroma_inter_body_mmx
ext
:
LOAD_MASK
r2d
,
r3d
movd
m6
,
[r4]
; tc0
punpcklbw
m6
,
m6
...
...
@@ -885,7 +885,7 @@ cglobal deblock_v_chroma_intra_8, 4,5
movq
m1
,
[
t5
+
r1
]
movq
m2
,
[r0]
movq
m3
,
[
r0
+
r1
]
call
ff_chroma_intra_body_mmx
2
call
ff_chroma_intra_body_mmx
ext
movq
[
t5
+
r1
]
,
m1
movq
[r0],
m2
RET
...
...
@@ -896,12 +896,12 @@ cglobal deblock_v_chroma_intra_8, 4,5
cglobal
deblock_h_chroma_intra_8
,
4
,
6
CHROMA_H_START
TRANSPOSE4x8_LOAD
bw
,
wd
,
dq
,
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
call
ff_chroma_intra_body_mmx
2
call
ff_chroma_intra_body_mmx
ext
TRANSPOSE8x4B_STORE
PASS8ROWS
(
t5
,
r0
,
r1
,
t6
)
RET
ALIGN
16
ff_chroma_intra_body_mmx
2
:
ff_chroma_intra_body_mmx
ext
:
LOAD_MASK
r2d
,
r3d
movq
m5
,
m1
movq
m6
,
m2
...
...
@@ -1025,7 +1025,7 @@ ff_chroma_intra_body_mmx2:
jl
%%
.
b_idx_loop
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
h264_loop_filter_strength
,
9
,
9
,
0
,
bs
,
nnz
,
ref
,
mv
,
bidir
,
edges
,
\
step
,
mask_mv0
,
mask_mv1
,
field
%define
b_idxq
bidirq
...
...
libavcodec/x86/h264_deblock_10bit.asm
View file @
26301caa
...
...
@@ -791,7 +791,7 @@ cglobal deblock_h_luma_intra_10, 4,7,8*(mmsize/16)
%endmacro
%if
ARCH_X86_64
==
0
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
DEBLOCK_LUMA
DEBLOCK_LUMA_INTRA
INIT_XMM
sse2
...
...
@@ -906,7 +906,7 @@ cglobal deblock_v_chroma_intra_10, 4,6-(mmsize/16),8*(mmsize/16)
%endmacro
%if
ARCH_X86_64
==
0
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
DEBLOCK_CHROMA
%endif
INIT_XMM
sse2
...
...
libavcodec/x86/h264_idct.asm
View file @
26301caa
...
...
@@ -286,14 +286,14 @@ cglobal h264_idct8_add_8_sse2, 3, 4, 10
%endmacro
INIT_MMX
; ff_h264_idct_dc_add_mmx
2
(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_dc_add_8_mmx
2
,
3
,
3
,
0
; ff_h264_idct_dc_add_mmx
ext
(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct_dc_add_8_mmx
ext
,
3
,
3
,
0
DC_ADD_MMXEXT_INIT
r1
,
r2
DC_ADD_MMXEXT_OP
movh
,
r0
,
r2
,
r1
RET
; ff_h264_idct8_dc_add_mmx
2
(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8_mmx
2
,
3
,
3
,
0
; ff_h264_idct8_dc_add_mmx
ext
(uint8_t *dst, int16_t *block, int stride)
cglobal
h264_idct8_dc_add_8_mmx
ext
,
3
,
3
,
0
DC_ADD_MMXEXT_INIT
r1
,
r2
DC_ADD_MMXEXT_OP
mova
,
r0
,
r2
,
r1
lea
r0
,
[
r0
+
r2
*
4
]
...
...
@@ -354,9 +354,9 @@ cglobal h264_idct8_add4_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block, str
ADD
rsp
,
pad
RET
; ff_h264_idct_add16_mmx
2
(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_8_mmx
2
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; ff_h264_idct_add16_mmx
ext
(uint8_t *dst, const int *block_offset,
;
DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16_8_mmx
ext
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -421,9 +421,10 @@ cglobal h264_idct_add16intra_8_mmx, 5, 7 + npicregs, 0, dst, block_offset, block
jl
.
nextblock
REP_RET
; ff_h264_idct_add16intra_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_8_mmx2
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; ff_h264_idct_add16intra_mmxext(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride,
; const uint8_t nnzc[6*8])
cglobal
h264_idct_add16intra_8_mmxext
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
xor
r5
,
r5
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
...
...
@@ -463,9 +464,10 @@ cglobal h264_idct_add16intra_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, blo
jl
.
nextblock
REP_RET
; ff_h264_idct8_add4_mmx2(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_8_mmx2
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; ff_h264_idct8_add4_mmxext(uint8_t *dst, const int *block_offset,
; DCTELEM *block, int stride,
; const uint8_t nnzc[6*8])
cglobal
h264_idct8_add4_8_mmxext
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
%
assign
pad
128
+
4
-
(
stack_offset
&
7
)
SUB
rsp
,
pad
...
...
@@ -620,7 +622,7 @@ cglobal h264_idct_add8_8_mmx, 5, 8 + npicregs, 0, dst1, block_offset, block, str
call
h264_idct_add8_mmx_plane
RET
h264_idct_add8_mmx
2
_plane
:
h264_idct_add8_mmx
ext
_plane
:
.
nextblock
:
movzx
r6
,
byte
[
scan8
+
r5
]
movzx
r6
,
byte
[
r4
+
r6
]
...
...
@@ -661,9 +663,9 @@ h264_idct_add8_mmx2_plane:
jnz
.
nextblock
rep
ret
; ff_h264_idct_add8_mmx
2
(uint8_t **dest, const int *block_offset,
; DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_8_mmx
2
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
; ff_h264_idct_add8_mmx
ext
(uint8_t **dest, const int *block_offset,
;
DCTELEM *block, int stride, const uint8_t nnzc[6*8])
cglobal
h264_idct_add8_8_mmx
ext
,
5
,
8
+
npicregs
,
0
,
dst1
,
block_offset
,
block
,
stride
,
nnzc
,
cntr
,
coeff
,
dst2
,
picreg
mov
r5
,
16
add
r2
,
512
%if
ARCH_X86_64
...
...
@@ -672,7 +674,7 @@ cglobal h264_idct_add8_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, st
%ifdef
PIC
lea
picregq
,
[
scan8_mem
]
%endif
call
h264_idct_add8_mmx
2
_plane
call
h264_idct_add8_mmx
ext
_plane
mov
r5
,
32
add
r2
,
384
%if
ARCH_X86_64
...
...
@@ -680,12 +682,12 @@ cglobal h264_idct_add8_8_mmx2, 5, 8 + npicregs, 0, dst1, block_offset, block, st
%else
add
r0mp
,
gprsize
%endif
call
h264_idct_add8_mmx
2
_plane
call
h264_idct_add8_mmx
ext
_plane
RET
INIT_MMX
; r0 = uint8_t *dst, r2 = int16_t *block, r3 = int stride, r6=clobbered
h264_idct_dc_add8_mmx
2
:
h264_idct_dc_add8_mmx
ext
:
movd
m0
,
[
r2
]
; 0 0 X D
punpcklwd
m0
,
[
r2
+
32
]
; x X d D
paddsw
m0
,
[
pw_32
]
...
...
@@ -779,7 +781,7 @@ cglobal h264_idct_add16_8_sse2, 5, 5 + ARCH_X86_64, 8
%else
add
r0
,
r0m
%endif
call
h264_idct_dc_add8_mmx
2
call
h264_idct_dc_add8_mmx
ext
.
cycle%1
end
:
%if
%1
<
7
add
r2
,
64
...
...
@@ -828,7 +830,7 @@ cglobal h264_idct_add16intra_8_sse2, 5, 7 + ARCH_X86_64, 8
mov
r0
,
[r0]
add
r0
,
dword
[
r1
+
(
%1
&
1
)
*
8
+
64
*
(
1
+
(
%1
>>
1
))
]
%endif
call
h264_idct_dc_add8_mmx
2
call
h264_idct_dc_add8_mmx
ext
.
cycle%1
end
:
%if
%1
==
1
add
r2
,
384
+
64
...
...
libavcodec/x86/h264_idct_10bit.asm
View file @
26301caa
...
...
@@ -178,7 +178,7 @@ IDCT_ADD16_10
mova
[
%1
+
%3
]
,
m4
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
h264_idct_dc_add_10
,
3
,
3
movd
m0
,
[r1]
paddd
m0
,
[
pd_32
]
...
...
libavcodec/x86/h264_intrapred.asm
View file @
26301caa
...
...
@@ -120,7 +120,7 @@ cglobal pred16x16_horizontal_8, 2,3
INIT_MMX
mmx
PRED16x16_H
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_H
INIT_XMM
ssse3
PRED16x16_H
...
...
@@ -180,7 +180,7 @@ cglobal pred16x16_dc_8, 2,7
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_DC
INIT_XMM
sse2
PRED16x16_DC
...
...
@@ -229,7 +229,7 @@ cglobal pred16x16_tm_vp8_8, 2,5
INIT_MMX
mmx
PRED16x16_TM
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_TM
INIT_XMM
sse2
...
...
@@ -309,14 +309,14 @@ cglobal pred16x16_plane_%1_8, 2,9,7
movhlps
m1
,
m0
%endif
paddw
m0
,
m1
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
PSHUFLW
m1
,
m0
,
0xE
%elif
cpuflag
(
mmx
)
mova
m1
,
m0
psrlq
m1
,
32
%endif
paddw
m0
,
m1
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
PSHUFLW
m1
,
m0
,
0x1
%elif
cpuflag
(
mmx
)
mova
m1
,
m0
...
...
@@ -536,7 +536,7 @@ INIT_MMX mmx
H264_PRED16x16_PLANE
h264
H264_PRED16x16_PLANE
rv40
H264_PRED16x16_PLANE
svq3
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
H264_PRED16x16_PLANE
h264
H264_PRED16x16_PLANE
rv40
H264_PRED16x16_PLANE
svq3
...
...
@@ -582,7 +582,7 @@ cglobal pred8x8_plane_8, 2,9,7
paddw
m0
,
m1
%if
notcpuflag
(
ssse3
)
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
PSHUFLW
m1
,
m0
,
0xE
%elif
cpuflag
(
mmx
)
mova
m1
,
m0
...
...
@@ -591,7 +591,7 @@ cglobal pred8x8_plane_8, 2,9,7
paddw
m0
,
m1
%endif
; !ssse3
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
PSHUFLW
m1
,
m0
,
0x1
%elif
cpuflag
(
mmx
)
mova
m1
,
m0
...
...
@@ -716,7 +716,7 @@ ALIGN 16
INIT_MMX
mmx
H264_PRED8x8_PLANE
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
H264_PRED8x8_PLANE
INIT_XMM
sse2
H264_PRED8x8_PLANE
...
...
@@ -763,7 +763,7 @@ cglobal pred8x8_horizontal_8, 2,3
INIT_MMX
mmx
PRED8x8_H
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED8x8_H
INIT_MMX
ssse3
PRED8x8_H
...
...
@@ -941,7 +941,7 @@ cglobal pred8x8_tm_vp8_8, 2,6
INIT_MMX
mmx
PRED8x8_TM
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED8x8_TM
INIT_XMM
sse2
...
...
@@ -2442,7 +2442,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
sub
r3d
,
r4d
movd
mm2
,
r1d
movd
mm4
,
r3d
%if
cpuflag
(
mmx
2
)
%if
cpuflag
(
mmx
ext
)
pshufw
mm2
,
mm2
,
0
pshufw
mm4
,
mm4
,
0
%else
...
...
@@ -2465,7 +2465,7 @@ cglobal pred4x4_tm_vp8_8, 3,6
INIT_MMX
mmx
PRED4x4_TM
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED4x4_TM
INIT_XMM
ssse3
...
...
libavcodec/x86/h264_intrapred_10bit.asm
View file @
26301caa
...
...
@@ -182,7 +182,7 @@ PRED4x4_HD
HADDD
%1
,
%2
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
pred4x4_dc_10
,
3
,
3
sub
r0
,
r2
lea
r1
,
[
r0
+
r2
*
2
]
...
...
@@ -261,7 +261,7 @@ PRED4x4_VL
;-----------------------------------------------------------------------------
; void pred4x4_horizontal_up(pixel *src, const pixel *topright, int stride)
;-----------------------------------------------------------------------------
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
pred4x4_horizontal_up_10
,
3
,
3
sub
r0
,
r2
lea
r1
,
[
r0
+
r2
*
2
]
...
...
@@ -410,7 +410,7 @@ cglobal pred8x8_dc_10, 2, 6
RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED8x8_DC
pshufw
INIT_XMM
sse2
PRED8x8_DC
pshuflw
...
...
@@ -524,7 +524,7 @@ cglobal pred8x8l_128_dc_10, 4, 4
RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED8x8L_128_DC
INIT_XMM
sse2
PRED8x8L_128_DC
...
...
@@ -1007,7 +1007,7 @@ cglobal pred16x16_vertical_10, 2, 3
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_VERTICAL
INIT_XMM
sse2
PRED16x16_VERTICAL
...
...
@@ -1031,7 +1031,7 @@ cglobal pred16x16_horizontal_10, 2, 3
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_HORIZONTAL
INIT_XMM
sse2
PRED16x16_HORIZONTAL
...
...
@@ -1077,7 +1077,7 @@ cglobal pred16x16_dc_10, 2, 6
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_DC
INIT_XMM
sse2
PRED16x16_DC
...
...
@@ -1109,7 +1109,7 @@ cglobal pred16x16_top_dc_10, 2, 3
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_TOP_DC
INIT_XMM
sse2
PRED16x16_TOP_DC
...
...
@@ -1146,7 +1146,7 @@ cglobal pred16x16_left_dc_10, 2, 6
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_LEFT_DC
INIT_XMM
sse2
PRED16x16_LEFT_DC
...
...
@@ -1167,7 +1167,7 @@ cglobal pred16x16_128_dc_10, 2,3
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
PRED16x16_128_DC
INIT_XMM
sse2
PRED16x16_128_DC
libavcodec/x86/h264_intrapred_init.c
View file @
26301caa
...
...
@@ -27,7 +27,7 @@ void ff_pred4x4_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
const uint8_t *topright, \
ptrdiff_t stride);
PRED4x4
(
dc
,
10
,
mmx
2
)
PRED4x4
(
dc
,
10
,
mmx
ext
)
PRED4x4
(
down_left
,
10
,
sse2
)
PRED4x4
(
down_left
,
10
,
avx
)
PRED4x4
(
down_right
,
10
,
sse2
)
...
...
@@ -38,7 +38,7 @@ PRED4x4(vertical_left, 10, avx)
PRED4x4
(
vertical_right
,
10
,
sse2
)
PRED4x4
(
vertical_right
,
10
,
ssse3
)
PRED4x4
(
vertical_right
,
10
,
avx
)
PRED4x4
(
horizontal_up
,
10
,
mmx
2
)
PRED4x4
(
horizontal_up
,
10
,
mmx
ext
)
PRED4x4
(
horizontal_down
,
10
,
sse2
)
PRED4x4
(
horizontal_down
,
10
,
ssse3
)
PRED4x4
(
horizontal_down
,
10
,
avx
)
...
...
@@ -47,7 +47,7 @@ PRED4x4(horizontal_down, 10, avx)
void ff_pred8x8_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
ptrdiff_t stride);
PRED8x8
(
dc
,
10
,
mmx
2
)
PRED8x8
(
dc
,
10
,
mmx
ext
)
PRED8x8
(
dc
,
10
,
sse2
)
PRED8x8
(
top_dc
,
10
,
sse2
)
PRED8x8
(
plane
,
10
,
sse2
)
...
...
@@ -62,7 +62,7 @@ void ff_pred8x8l_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
PRED8x8L
(
dc
,
10
,
sse2
)
PRED8x8L
(
dc
,
10
,
avx
)
PRED8x8L
(
128
_dc
,
10
,
mmx
2
)
PRED8x8L
(
128
_dc
,
10
,
mmx
ext
)
PRED8x8L
(
128
_dc
,
10
,
sse2
)
PRED8x8L
(
top_dc
,
10
,
sse2
)
PRED8x8L
(
top_dc
,
10
,
avx
)
...
...
@@ -88,42 +88,42 @@ PRED8x8L(horizontal_up, 10, avx)
void ff_pred16x16_ ## TYPE ## _ ## DEPTH ## _ ## OPT (uint8_t *src, \
ptrdiff_t stride);
PRED16x16
(
dc
,
10
,
mmx
2
)
PRED16x16
(
dc
,
10
,
mmx
ext
)
PRED16x16
(
dc
,
10
,
sse2
)
PRED16x16
(
top_dc
,
10
,
mmx
2
)
PRED16x16
(
top_dc
,
10
,
mmx
ext
)
PRED16x16
(
top_dc
,
10
,
sse2
)
PRED16x16
(
128
_dc
,
10
,
mmx
2
)
PRED16x16
(
128
_dc
,
10
,
mmx
ext
)
PRED16x16
(
128
_dc
,
10
,
sse2
)
PRED16x16
(
left_dc
,
10
,
mmx
2
)
PRED16x16
(
left_dc
,
10
,
mmx
ext
)
PRED16x16
(
left_dc
,
10
,
sse2
)
PRED16x16
(
vertical
,
10
,
mmx
2
)
PRED16x16
(
vertical
,
10
,
mmx
ext
)
PRED16x16
(
vertical
,
10
,
sse2
)
PRED16x16
(
horizontal
,
10
,
mmx
2
)
PRED16x16
(
horizontal
,
10
,
mmx
ext
)
PRED16x16
(
horizontal
,
10
,
sse2
)
/* 8-bit versions */
PRED16x16
(
vertical
,
8
,
mmx
)
PRED16x16
(
vertical
,
8
,
sse
)
PRED16x16
(
horizontal
,
8
,
mmx
)
PRED16x16
(
horizontal
,
8
,
mmx
2
)
PRED16x16
(
horizontal
,
8
,
mmx
ext
)
PRED16x16
(
horizontal
,
8
,
ssse3
)
PRED16x16
(
dc
,
8
,
mmx
2
)
PRED16x16
(
dc
,
8
,
mmx
ext
)
PRED16x16
(
dc
,
8
,
sse2
)
PRED16x16
(
dc
,
8
,
ssse3
)
PRED16x16
(
plane_h264
,
8
,
mmx
)
PRED16x16
(
plane_h264
,
8
,
mmx
2
)
PRED16x16
(
plane_h264
,
8
,
mmx
ext
)
PRED16x16
(
plane_h264
,
8
,
sse2
)
PRED16x16
(
plane_h264
,
8
,
ssse3
)
PRED16x16
(
plane_rv40
,
8
,
mmx
)
PRED16x16
(
plane_rv40
,
8
,
mmx
2
)
PRED16x16
(
plane_rv40
,
8
,
mmx
ext
)
PRED16x16
(
plane_rv40
,
8
,
sse2
)
PRED16x16
(
plane_rv40
,
8
,
ssse3
)
PRED16x16
(
plane_svq3
,
8
,
mmx
)
PRED16x16
(
plane_svq3
,
8
,
mmx
2
)
PRED16x16
(
plane_svq3
,
8
,
mmx
ext
)
PRED16x16
(
plane_svq3
,
8
,
sse2
)
PRED16x16
(
plane_svq3
,
8
,
ssse3
)
PRED16x16
(
tm_vp8
,
8
,
mmx
)
PRED16x16
(
tm_vp8
,
8
,
mmx
2
)
PRED16x16
(
tm_vp8
,
8
,
mmx
ext
)
PRED16x16
(
tm_vp8
,
8
,
sse2
)
PRED8x8
(
top_dc
,
8
,
mmxext
)
...
...
@@ -131,14 +131,14 @@ PRED8x8(dc_rv40, 8, mmxext)
PRED8x8
(
dc
,
8
,
mmxext
)
PRED8x8
(
vertical
,
8
,
mmx
)
PRED8x8
(
horizontal
,
8
,
mmx
)
PRED8x8
(
horizontal
,
8
,
mmx
2
)
PRED8x8
(
horizontal
,
8
,
mmx
ext
)
PRED8x8
(
horizontal
,
8
,
ssse3
)
PRED8x8
(
plane
,
8
,
mmx
)
PRED8x8
(
plane
,
8
,
mmx
2
)
PRED8x8
(
plane
,
8
,
mmx
ext
)
PRED8x8
(
plane
,
8
,
sse2
)
PRED8x8
(
plane
,
8
,
ssse3
)
PRED8x8
(
tm_vp8
,
8
,
mmx
)
PRED8x8
(
tm_vp8
,
8
,
mmx
2
)
PRED8x8
(
tm_vp8
,
8
,
mmx
ext
)
PRED8x8
(
tm_vp8
,
8
,
sse2
)
PRED8x8
(
tm_vp8
,
8
,
ssse3
)
...
...
@@ -175,7 +175,7 @@ PRED4x4(vertical_right, 8, mmxext)
PRED4x4
(
horizontal_up
,
8
,
mmxext
)
PRED4x4
(
horizontal_down
,
8
,
mmxext
)
PRED4x4
(
tm_vp8
,
8
,
mmx
)
PRED4x4
(
tm_vp8
,
8
,
mmx
2
)
PRED4x4
(
tm_vp8
,
8
,
mmx
ext
)
PRED4x4
(
tm_vp8
,
8
,
ssse3
)
PRED4x4
(
vertical_vp8
,
8
,
mmxext
)
...
...
@@ -210,10 +210,10 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
}
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
h
->
pred16x16
[
HOR_PRED8x8
]
=
ff_pred16x16_horizontal_8_mmx
2
;
h
->
pred16x16
[
DC_PRED8x8
]
=
ff_pred16x16_dc_8_mmx
2
;
h
->
pred16x16
[
HOR_PRED8x8
]
=
ff_pred16x16_horizontal_8_mmx
ext
;
h
->
pred16x16
[
DC_PRED8x8
]
=
ff_pred16x16_dc_8_mmx
ext
;
if
(
chroma_format_idc
==
1
)
h
->
pred8x8
[
HOR_PRED8x8
]
=
ff_pred8x8_horizontal_8_mmx
2
;
h
->
pred8x8
[
HOR_PRED8x8
]
=
ff_pred8x8_horizontal_8_mmx
ext
;
h
->
pred8x8l
[
TOP_DC_PRED
]
=
ff_pred8x8l_top_dc_8_mmxext
;
h
->
pred8x8l
[
DC_PRED
]
=
ff_pred8x8l_dc_8_mmxext
;
h
->
pred8x8l
[
HOR_PRED
]
=
ff_pred8x8l_horizontal_8_mmxext
;
...
...
@@ -243,20 +243,20 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
}
}
if
(
codec_id
==
AV_CODEC_ID_VP8
)
{
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_tm_vp8_8_mmx
2
;
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_tm_vp8_8_mmx
ext
;
h
->
pred8x8
[
DC_PRED8x8
]
=
ff_pred8x8_dc_rv40_8_mmxext
;
h
->
pred8x8
[
PLANE_PRED8x8
]
=
ff_pred8x8_tm_vp8_8_mmx
2
;
h
->
pred4x4
[
TM_VP8_PRED
]
=
ff_pred4x4_tm_vp8_8_mmx
2
;
h
->
pred8x8
[
PLANE_PRED8x8
]
=
ff_pred8x8_tm_vp8_8_mmx
ext
;
h
->
pred4x4
[
TM_VP8_PRED
]
=
ff_pred4x4_tm_vp8_8_mmx
ext
;
h
->
pred4x4
[
VERT_PRED
]
=
ff_pred4x4_vertical_vp8_8_mmxext
;
}
else
{
if
(
chroma_format_idc
==
1
)
h
->
pred8x8
[
PLANE_PRED8x8
]
=
ff_pred8x8_plane_8_mmx
2
;
h
->
pred8x8
[
PLANE_PRED8x8
]
=
ff_pred8x8_plane_8_mmx
ext
;
if
(
codec_id
==
AV_CODEC_ID_SVQ3
)
{
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_svq3_8_mmx
2
;
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_svq3_8_mmx
ext
;
}
else
if
(
codec_id
==
AV_CODEC_ID_RV40
)
{
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_rv40_8_mmx
2
;
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_rv40_8_mmx
ext
;
}
else
{
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_h264_8_mmx
2
;
h
->
pred16x16
[
PLANE_PRED8x8
]
=
ff_pred16x16_plane_h264_8_mmx
ext
;
}
}
}
...
...
@@ -320,20 +320,20 @@ void ff_h264_pred_init_x86(H264PredContext *h, int codec_id, const int bit_depth
}
}
else
if
(
bit_depth
==
10
)
{
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
h
->
pred4x4
[
DC_PRED
]
=
ff_pred4x4_dc_10_mmx
2
;
h
->
pred4x4
[
HOR_UP_PRED
]
=
ff_pred4x4_horizontal_up_10_mmx
2
;
h
->
pred4x4
[
DC_PRED
]
=
ff_pred4x4_dc_10_mmx
ext
;
h
->
pred4x4
[
HOR_UP_PRED
]
=
ff_pred4x4_horizontal_up_10_mmx
ext
;
if
(
chroma_format_idc
==
1
)
h
->
pred8x8
[
DC_PRED8x8
]
=
ff_pred8x8_dc_10_mmx
2
;
h
->
pred8x8
[
DC_PRED8x8
]
=
ff_pred8x8_dc_10_mmx
ext
;
h
->
pred8x8l
[
DC_128_PRED
]
=
ff_pred8x8l_128_dc_10_mmx
2
;
h
->
pred8x8l
[
DC_128_PRED
]
=
ff_pred8x8l_128_dc_10_mmx
ext
;
h
->
pred16x16
[
DC_PRED8x8
]
=
ff_pred16x16_dc_10_mmx
2
;
h
->
pred16x16
[
TOP_DC_PRED8x8
]
=
ff_pred16x16_top_dc_10_mmx
2
;
h
->
pred16x16
[
DC_128_PRED8x8
]
=
ff_pred16x16_128_dc_10_mmx
2
;
h
->
pred16x16
[
LEFT_DC_PRED8x8
]
=
ff_pred16x16_left_dc_10_mmx
2
;
h
->
pred16x16
[
VERT_PRED8x8
]
=
ff_pred16x16_vertical_10_mmx
2
;
h
->
pred16x16
[
HOR_PRED8x8
]
=
ff_pred16x16_horizontal_10_mmx
2
;
h
->
pred16x16
[
DC_PRED8x8
]
=
ff_pred16x16_dc_10_mmx
ext
;
h
->
pred16x16
[
TOP_DC_PRED8x8
]
=
ff_pred16x16_top_dc_10_mmx
ext
;
h
->
pred16x16
[
DC_128_PRED8x8
]
=
ff_pred16x16_128_dc_10_mmx
ext
;
h
->
pred16x16
[
LEFT_DC_PRED8x8
]
=
ff_pred16x16_left_dc_10_mmx
ext
;
h
->
pred16x16
[
VERT_PRED8x8
]
=
ff_pred16x16_vertical_10_mmx
ext
;
h
->
pred16x16
[
HOR_PRED8x8
]
=
ff_pred16x16_horizontal_10_mmx
ext
;
}
if
(
EXTERNAL_SSE2
(
mm_flags
))
{
h
->
pred4x4
[
DIAG_DOWN_LEFT_PRED
]
=
ff_pred4x4_down_left_10_sse2
;
...
...
libavcodec/x86/h264_weight.asm
View file @
26301caa
...
...
@@ -71,7 +71,7 @@ SECTION .text
%endmacro
INIT_MMX
cglobal
h264_weight_16_mmx
2
,
6
,
6
,
0
cglobal
h264_weight_16_mmx
ext
,
6
,
6
,
0
WEIGHT_SETUP
.
nextrow
:
WEIGHT_OP
0
,
4
...
...
@@ -96,7 +96,7 @@ cglobal h264_weight_%1_%3, 6, 6, %2
%endmacro
INIT_MMX
WEIGHT_FUNC_MM
8
,
0
,
mmx
2
WEIGHT_FUNC_MM
8
,
0
,
mmx
ext
INIT_XMM
WEIGHT_FUNC_MM
16
,
8
,
sse2
...
...
@@ -121,7 +121,7 @@ cglobal h264_weight_%1_%3, 6, 6, %2
%endmacro
INIT_MMX
WEIGHT_FUNC_HALF_MM
4
,
0
,
mmx
2
WEIGHT_FUNC_HALF_MM
4
,
0
,
mmx
ext
INIT_XMM
WEIGHT_FUNC_HALF_MM
8
,
8
,
sse2
...
...
@@ -175,7 +175,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2
%endmacro
INIT_MMX
cglobal
h264_biweight_16_mmx
2
,
7
,
8
,
0
cglobal
h264_biweight_16_mmx
ext
,
7
,
8
,
0
BIWEIGHT_SETUP
movifnidn
r3d
,
r3m
.
nextrow
:
...
...
@@ -210,7 +210,7 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
%endmacro
INIT_MMX
BIWEIGHT_FUNC_MM
8
,
0
,
mmx
2
BIWEIGHT_FUNC_MM
8
,
0
,
mmx
ext
INIT_XMM
BIWEIGHT_FUNC_MM
16
,
8
,
sse2
...
...
@@ -239,7 +239,7 @@ cglobal h264_biweight_%1_%3, 7, 8, %2
%endmacro
INIT_MMX
BIWEIGHT_FUNC_HALF_MM
4
,
0
,
mmx
2
BIWEIGHT_FUNC_HALF_MM
4
,
0
,
mmx
ext
INIT_XMM
BIWEIGHT_FUNC_HALF_MM
8
,
8
,
sse2
...
...
libavcodec/x86/h264dsp_init.c
View file @
26301caa
This diff is collapsed.
Click to expand it.
libavcodec/x86/pngdsp.asm
View file @
26301caa
...
...
@@ -166,7 +166,7 @@ cglobal add_png_paeth_prediction, 5, 7, %1, dst, src, top, w, bpp, end, cntr
RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
ADD_PAETH_PRED_FN
0
INIT_MMX
ssse3
...
...
libavcodec/x86/pngdsp_init.c
View file @
26301caa
...
...
@@ -23,8 +23,8 @@
#include "libavutil/x86/cpu.h"
#include "libavcodec/pngdsp.h"
void
ff_add_png_paeth_prediction_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_png_paeth_prediction_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_png_paeth_prediction_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
uint8_t
*
top
,
int
w
,
int
bpp
);
void
ff_add_bytes_l2_mmx
(
uint8_t
*
dst
,
uint8_t
*
src1
,
...
...
@@ -41,7 +41,7 @@ void ff_pngdsp_init_x86(PNGDSPContext *dsp)
dsp
->
add_bytes_l2
=
ff_add_bytes_l2_mmx
;
#endif
if
(
EXTERNAL_MMXEXT
(
flags
))
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction_mmx
2
;
dsp
->
add_paeth_prediction
=
ff_add_png_paeth_prediction_mmx
ext
;
if
(
EXTERNAL_SSE2
(
flags
))
dsp
->
add_bytes_l2
=
ff_add_bytes_l2_sse2
;
if
(
EXTERNAL_SSSE3
(
flags
))
...
...
libavcodec/x86/rv34dsp.asm
View file @
26301caa
...
...
@@ -57,7 +57,7 @@ cglobal rv34_idct_%1, 1, 2, 0
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
%define
IDCT_DC
IDCT_DC_ROUND
rv34_idct
dc
%define
IDCT_DC
IDCT_DC_NOROUND
...
...
@@ -133,7 +133,7 @@ cglobal rv34_idct_dc_add, 3, 3
mova
mm5
,
[
pd_512
]
; 0x200
%endmacro
; ff_rv34_idct_add_mmx
2
(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
; ff_rv34_idct_add_mmx
ext
(uint8_t *dst, ptrdiff_t stride, DCTELEM *block);
%macro
COL_TRANSFORM
4
pshufw
mm3
,
%2
,
0xDD
; col. 1,3,1,3
pshufw
%2
,
%2
,
0x88
; col. 0,2,0,2
...
...
@@ -154,7 +154,7 @@ cglobal rv34_idct_dc_add, 3, 3
packuswb
%2
,
%2
movd
%1
,
%2
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
rv34_idct_add
,
3
,
3
,
0
,
d
,
s
,
b
ROW_TRANSFORM
bq
COL_TRANSFORM
[dq],
mm0
,
[
pw_col_coeffs
+
0
]
,
[
pw_col_coeffs
+
8
]
...
...
libavcodec/x86/rv34dsp_init.c
View file @
26301caa
...
...
@@ -25,11 +25,11 @@
#include "libavcodec/dsputil.h"
#include "libavcodec/rv34dsp.h"
void
ff_rv34_idct_dc_mmx
2
(
DCTELEM
*
block
);
void
ff_rv34_idct_dc_noround_mmx
2
(
DCTELEM
*
block
);
void
ff_rv34_idct_dc_mmx
ext
(
DCTELEM
*
block
);
void
ff_rv34_idct_dc_noround_mmx
ext
(
DCTELEM
*
block
);
void
ff_rv34_idct_dc_add_mmx
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
int
dc
);
void
ff_rv34_idct_dc_add_sse4
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
int
dc
);
void
ff_rv34_idct_add_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
DCTELEM
*
block
);
void
ff_rv34_idct_add_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
stride
,
DCTELEM
*
block
);
av_cold
void
ff_rv34dsp_init_x86
(
RV34DSPContext
*
c
,
DSPContext
*
dsp
)
{
...
...
@@ -38,8 +38,8 @@ av_cold void ff_rv34dsp_init_x86(RV34DSPContext* c, DSPContext *dsp)
if
(
EXTERNAL_MMX
(
mm_flags
))
c
->
rv34_idct_dc_add
=
ff_rv34_idct_dc_add_mmx
;
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
rv34_inv_transform_dc
=
ff_rv34_idct_dc_noround_mmx
2
;
c
->
rv34_idct_add
=
ff_rv34_idct_add_mmx
2
;
c
->
rv34_inv_transform_dc
=
ff_rv34_idct_dc_noround_mmx
ext
;
c
->
rv34_idct_add
=
ff_rv34_idct_add_mmx
ext
;
}
if
(
EXTERNAL_SSE4
(
mm_flags
))
c
->
rv34_idct_dc_add
=
ff_rv34_idct_dc_add_sse4
;
...
...
libavcodec/x86/rv40dsp.asm
View file @
26301caa
...
...
@@ -240,7 +240,7 @@ INIT_MMX mmx
FILTER_V
put
FILTER_H
put
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
FILTER_V
avg
FILTER_H
avg
...
...
@@ -486,7 +486,7 @@ cglobal rv40_weight_func_%1_%2, 6, 7, 8
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
RV40_WEIGHT
rnd
,
8
,
3
RV40_WEIGHT
rnd
,
16
,
4
RV40_WEIGHT
nornd
,
8
,
3
...
...
libavcodec/x86/rv40dsp_init.c
View file @
26301caa
...
...
@@ -34,15 +34,15 @@
#if HAVE_YASM
void
ff_put_rv40_chroma_mc8_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc8_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc8_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc8_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_rv40_chroma_mc4_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc4_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc4_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_rv40_chroma_mc4_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
...
...
@@ -55,7 +55,7 @@ void ff_rv40_weight_func_nornd_16_##opt(uint8_t *dst, uint8_t *src1, uint8_t *sr
int w1, int w2, ptrdiff_t stride); \
void ff_rv40_weight_func_nornd_8_##opt (uint8_t *dst, uint8_t *src1, uint8_t *src2, \
int w1, int w2, ptrdiff_t stride);
DECLARE_WEIGHT
(
mmx
2
)
DECLARE_WEIGHT
(
mmx
ext
)
DECLARE_WEIGHT
(
sse2
)
DECLARE_WEIGHT
(
ssse3
)
...
...
@@ -150,9 +150,9 @@ QPEL_MC_DECL(avg_, _sse2)
QPEL_MC_DECL
(
put_
,
_mmx
)
#define ff_put_rv40_qpel_h_mmx
2
ff_put_rv40_qpel_h_mmx
#define ff_put_rv40_qpel_v_mmx
2
ff_put_rv40_qpel_v_mmx
QPEL_MC_DECL
(
avg_
,
_mmx
2
)
#define ff_put_rv40_qpel_h_mmx
ext
ff_put_rv40_qpel_h_mmx
#define ff_put_rv40_qpel_v_mmx
ext
ff_put_rv40_qpel_v_mmx
QPEL_MC_DECL
(
avg_
,
_mmx
ext
)
#define ff_put_rv40_qpel_h_3dnow ff_put_rv40_qpel_h_mmx
#define ff_put_rv40_qpel_v_3dnow ff_put_rv40_qpel_v_mmx
...
...
@@ -206,14 +206,14 @@ void ff_rv40dsp_init_x86(RV34DSPContext *c, DSPContext *dsp)
#endif
}
if
(
EXTERNAL_MMXEXT
(
mm_flags
))
{
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_mmx2
;
c
->
avg_chroma_pixels_tab
[
1
]
=
ff_avg_rv40_chroma_mc4_mmx2
;
c
->
rv40_weight_pixels_tab
[
0
][
0
]
=
ff_rv40_weight_func_rnd_16_mmx
2
;
c
->
rv40_weight_pixels_tab
[
0
][
1
]
=
ff_rv40_weight_func_rnd_8_mmx
2
;
c
->
rv40_weight_pixels_tab
[
1
][
0
]
=
ff_rv40_weight_func_nornd_16_mmx
2
;
c
->
rv40_weight_pixels_tab
[
1
][
1
]
=
ff_rv40_weight_func_nornd_8_mmx
2
;
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_mmxext
;
c
->
avg_chroma_pixels_tab
[
1
]
=
ff_avg_rv40_chroma_mc4_mmxext
;
c
->
rv40_weight_pixels_tab
[
0
][
0
]
=
ff_rv40_weight_func_rnd_16_mmx
ext
;
c
->
rv40_weight_pixels_tab
[
0
][
1
]
=
ff_rv40_weight_func_rnd_8_mmx
ext
;
c
->
rv40_weight_pixels_tab
[
1
][
0
]
=
ff_rv40_weight_func_nornd_16_mmx
ext
;
c
->
rv40_weight_pixels_tab
[
1
][
1
]
=
ff_rv40_weight_func_nornd_8_mmx
ext
;
#if ARCH_X86_32
QPEL_MC_SET
(
avg_
,
_mmx
2
)
QPEL_MC_SET
(
avg_
,
_mmx
ext
)
#endif
}
else
if
(
EXTERNAL_AMD3DNOW
(
mm_flags
))
{
c
->
avg_chroma_pixels_tab
[
0
]
=
ff_avg_rv40_chroma_mc8_3dnow
;
...
...
libavcodec/x86/vc1dsp_init.c
View file @
26301caa
...
...
@@ -64,8 +64,8 @@ static void vc1_h_loop_filter16_sse4(uint8_t *src, int stride, int pq)
void
ff_put_vc1_chroma_mc8_nornd_mmx
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_vc1_chroma_mc8_nornd_mmx
2
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_vc1_chroma_mc8_nornd_mmx
ext
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_avg_vc1_chroma_mc8_nornd_3dnow
(
uint8_t
*
dst
,
uint8_t
*
src
,
int
stride
,
int
h
,
int
x
,
int
y
);
void
ff_put_vc1_chroma_mc8_nornd_ssse3
(
uint8_t
*
dst
,
uint8_t
*
src
,
...
...
@@ -99,7 +99,7 @@ av_cold void ff_vc1dsp_init_x86(VC1DSPContext *dsp)
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
{
ASSIGN_LF
(
mmxext
);
dsp
->
avg_no_rnd_vc1_chroma_pixels_tab
[
0
]
=
ff_avg_vc1_chroma_mc8_nornd_mmx
2
;
dsp
->
avg_no_rnd_vc1_chroma_pixels_tab
[
0
]
=
ff_avg_vc1_chroma_mc8_nornd_mmx
ext
;
}
else
if
(
mm_flags
&
AV_CPU_FLAG_3DNOW
)
{
dsp
->
avg_no_rnd_vc1_chroma_pixels_tab
[
0
]
=
ff_avg_vc1_chroma_mc8_nornd_3dnow
;
}
...
...
libavcodec/x86/vp3dsp.asm
View file @
26301caa
...
...
@@ -101,7 +101,7 @@ SECTION .text
mov
[
r0
+
r3
-
1
]
,
r2w
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
vp3_v_loop_filter
,
3
,
4
%if
ARCH_X86_64
movsxd
r1
,
r1d
...
...
@@ -633,7 +633,7 @@ vp3_idct_funcs
movq
[
r0
+
r3
]
,
m5
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
vp3_idct_dc_add
,
3
,
4
%if
ARCH_X86_64
movsxd
r1
,
r1d
...
...
libavcodec/x86/vp3dsp_init.c
View file @
26301caa
...
...
@@ -31,11 +31,13 @@ void ff_vp3_idct_add_mmx(uint8_t *dest, int line_size, DCTELEM *block);
void
ff_vp3_idct_put_sse2
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_vp3_idct_add_sse2
(
uint8_t
*
dest
,
int
line_size
,
DCTELEM
*
block
);
void
ff_vp3_idct_dc_add_mmx
2
(
uint8_t
*
dest
,
int
line_size
,
const
DCTELEM
*
block
);
void
ff_vp3_idct_dc_add_mmx
ext
(
uint8_t
*
dest
,
int
line_size
,
const
DCTELEM
*
block
);
void
ff_vp3_v_loop_filter_mmx2
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
void
ff_vp3_h_loop_filter_mmx2
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
void
ff_vp3_v_loop_filter_mmxext
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
void
ff_vp3_h_loop_filter_mmxext
(
uint8_t
*
src
,
int
stride
,
int
*
bounding_values
);
av_cold
void
ff_vp3dsp_init_x86
(
VP3DSPContext
*
c
,
int
flags
)
{
...
...
@@ -50,11 +52,11 @@ av_cold void ff_vp3dsp_init_x86(VP3DSPContext *c, int flags)
#endif
if
(
EXTERNAL_MMXEXT
(
cpuflags
))
{
c
->
idct_dc_add
=
ff_vp3_idct_dc_add_mmx
2
;
c
->
idct_dc_add
=
ff_vp3_idct_dc_add_mmx
ext
;
if
(
!
(
flags
&
CODEC_FLAG_BITEXACT
))
{
c
->
v_loop_filter
=
ff_vp3_v_loop_filter_mmx
2
;
c
->
h_loop_filter
=
ff_vp3_h_loop_filter_mmx
2
;
c
->
v_loop_filter
=
ff_vp3_v_loop_filter_mmx
ext
;
c
->
h_loop_filter
=
ff_vp3_h_loop_filter_mmx
ext
;
}
}
...
...
libavcodec/x86/vp8dsp.asm
View file @
26301caa
...
...
@@ -338,7 +338,7 @@ INIT_XMM ssse3
FILTER_SSSE3
8
; 4x4 block, H-only 4-tap filter
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
put_vp8_epel4_h4
,
6
,
6
+
npicregs
,
0
,
dst
,
dststride
,
src
,
srcstride
,
height
,
mx
,
picreg
shl
mxd
,
4
%ifdef
PIC
...
...
@@ -386,7 +386,7 @@ cglobal put_vp8_epel4_h4, 6, 6 + npicregs, 0, dst, dststride, src, srcstride, he
REP_RET
; 4x4 block, H-only 6-tap filter
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
cglobal
put_vp8_epel4_h6
,
6
,
6
+
npicregs
,
0
,
dst
,
dststride
,
src
,
srcstride
,
height
,
mx
,
picreg
lea
mxd
,
[
mxq
*
3
]
%ifdef
PIC
...
...
@@ -673,7 +673,7 @@ cglobal put_vp8_epel%1_v6, 7, 7, 8, dst, dststride, src, srcstride, height, picr
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
FILTER_V
4
INIT_XMM
sse2
FILTER_V
8
...
...
@@ -769,7 +769,7 @@ cglobal put_vp8_bilinear%1_h, 6, 6 + npicregs, 7, dst, dststride, src, srcstride
REP_RET
%endmacro
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
FILTER_BILINEAR
4
INIT_XMM
sse2
FILTER_BILINEAR
8
...
...
@@ -1611,7 +1611,7 @@ cglobal vp8_%1_loop_filter_simple, 3, %2, 8, dst, stride, flim, cntr
INIT_MMX
mmx
SIMPLE_LOOPFILTER
v
,
4
SIMPLE_LOOPFILTER
h
,
5
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
SIMPLE_LOOPFILTER
v
,
4
SIMPLE_LOOPFILTER
h
,
5
%endif
...
...
@@ -1835,7 +1835,7 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr
psubusb
m6
,
m5
; q2-q1
por
m6
,
m4
; abs(q2-q1)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m4
,
m_flimI
pxor
m3
,
m3
psubusb
m0
,
m4
...
...
@@ -1875,7 +1875,7 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr
psubusb
m1
,
m3
; p1-p0
psubusb
m6
,
m2
; p0-p1
por
m1
,
m6
; abs(p1-p0)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m6
,
m1
psubusb
m1
,
m4
psubusb
m6
,
m_hevthr
...
...
@@ -1906,7 +1906,7 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr
psubusb
m1
,
m5
; q0-q1
psubusb
m7
,
m4
; q1-q0
por
m1
,
m7
; abs(q1-q0)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m7
,
m1
psubusb
m1
,
m6
psubusb
m7
,
m_hevthr
...
...
@@ -2014,14 +2014,14 @@ cglobal vp8_%1_loop_filter16y_inner, 5, 5, 13, dst, stride, flimE, flimI, hevthr
%else
mova
m6
,
m_maskres
%endif
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m7
,
[
pb_1
]
%else
; mmxext/sse2
pxor
m7
,
m7
%endif
pand
m0
,
m6
pand
m1
,
m6
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
paddusb
m0
,
m7
pand
m1
,
[
pb_FE
]
pandn
m7
,
m0
...
...
@@ -2097,7 +2097,7 @@ INNER_LOOPFILTER h, 16
INNER_LOOPFILTER
v
,
8
INNER_LOOPFILTER
h
,
8
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
INNER_LOOPFILTER
v
,
16
INNER_LOOPFILTER
h
,
16
INNER_LOOPFILTER
v
,
8
...
...
@@ -2343,7 +2343,7 @@ cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, dst1, stride, flimE, flimI, hevt
psubusb
m6
,
m5
; q2-q1
por
m6
,
m4
; abs(q2-q1)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m4
,
m_flimI
pxor
m3
,
m3
psubusb
m0
,
m4
...
...
@@ -2383,7 +2383,7 @@ cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, dst1, stride, flimE, flimI, hevt
psubusb
m1
,
m3
; p1-p0
psubusb
m6
,
m2
; p0-p1
por
m1
,
m6
; abs(p1-p0)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m6
,
m1
psubusb
m1
,
m4
psubusb
m6
,
m_hevthr
...
...
@@ -2414,7 +2414,7 @@ cglobal vp8_%1_loop_filter16y_mbedge, 5, 5, 15, dst1, stride, flimE, flimI, hevt
psubusb
m1
,
m5
; q0-q1
psubusb
m7
,
m4
; q1-q0
por
m1
,
m7
; abs(q1-q0)
%if
notcpuflag
(
mmx
2
)
%if
notcpuflag
(
mmx
ext
)
mova
m7
,
m1
psubusb
m1
,
m6
psubusb
m7
,
m_hevthr
...
...
@@ -2755,7 +2755,7 @@ MBEDGE_LOOPFILTER h, 16
MBEDGE_LOOPFILTER
v
,
8
MBEDGE_LOOPFILTER
h
,
8
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
MBEDGE_LOOPFILTER
v
,
16
MBEDGE_LOOPFILTER
h
,
16
MBEDGE_LOOPFILTER
v
,
8
...
...
libavcodec/x86/vp8dsp_init.c
View file @
26301caa
...
...
@@ -30,16 +30,16 @@
/*
* MC functions
*/
extern
void
ff_put_vp8_epel4_h4_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_h4_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_h6_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_h6_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_v4_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_v4_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_epel4_v6_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_epel4_v6_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
...
...
@@ -81,7 +81,7 @@ extern void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear4_h_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_bilinear4_h_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear8_h_sse2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
...
...
@@ -94,7 +94,7 @@ extern void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear4_v_mmx
2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
extern
void
ff_put_vp8_bilinear4_v_mmx
ext
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
uint8_t
*
src
,
ptrdiff_t
srcstride
,
int
height
,
int
mx
,
int
my
);
extern
void
ff_put_vp8_bilinear8_v_sse2
(
uint8_t
*
dst
,
ptrdiff_t
dststride
,
...
...
@@ -140,16 +140,16 @@ static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
}
#if ARCH_X86_32
TAP_W8
(
mmx
2
,
epel
,
h4
)
TAP_W8
(
mmx
2
,
epel
,
h6
)
TAP_W16
(
mmx
2
,
epel
,
h6
)
TAP_W8
(
mmx
2
,
epel
,
v4
)
TAP_W8
(
mmx
2
,
epel
,
v6
)
TAP_W16
(
mmx
2
,
epel
,
v6
)
TAP_W8
(
mmx
2
,
bilinear
,
h
)
TAP_W16
(
mmx
2
,
bilinear
,
h
)
TAP_W8
(
mmx
2
,
bilinear
,
v
)
TAP_W16
(
mmx
2
,
bilinear
,
v
)
TAP_W8
(
mmx
ext
,
epel
,
h4
)
TAP_W8
(
mmx
ext
,
epel
,
h6
)
TAP_W16
(
mmx
ext
,
epel
,
h6
)
TAP_W8
(
mmx
ext
,
epel
,
v4
)
TAP_W8
(
mmx
ext
,
epel
,
v6
)
TAP_W16
(
mmx
ext
,
epel
,
v6
)
TAP_W8
(
mmx
ext
,
bilinear
,
h
)
TAP_W16
(
mmx
ext
,
bilinear
,
h
)
TAP_W8
(
mmx
ext
,
bilinear
,
v
)
TAP_W16
(
mmx
ext
,
bilinear
,
v
)
#endif
TAP_W16
(
sse2
,
epel
,
h6
)
...
...
@@ -178,13 +178,13 @@ static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT
#if ARCH_X86_32
#define HVTAPMMX(x, y) \
HVTAP(mmx
2
, 8, x, y, 4, 8) \
HVTAP(mmx
2
, 8, x, y, 8, 16)
HVTAP(mmx
ext
, 8, x, y, 4, 8) \
HVTAP(mmx
ext
, 8, x, y, 8, 16)
HVTAP
(
mmx
2
,
8
,
6
,
6
,
16
,
16
)
HVTAP
(
mmx
ext
,
8
,
6
,
6
,
16
,
16
)
#else
#define HVTAPMMX(x, y) \
HVTAP(mmx
2
, 8, x, y, 4, 8)
HVTAP(mmx
ext
, 8, x, y, 4, 8)
#endif
HVTAPMMX
(
4
,
4
)
...
...
@@ -219,10 +219,10 @@ static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
dst, dststride, tmp, SIZE, height, mx, my); \
}
HVBILIN
(
mmx
2
,
8
,
4
,
8
)
HVBILIN
(
mmx
ext
,
8
,
4
,
8
)
#if ARCH_X86_32
HVBILIN
(
mmx
2
,
8
,
8
,
16
)
HVBILIN
(
mmx
2
,
8
,
16
,
16
)
HVBILIN
(
mmx
ext
,
8
,
8
,
16
)
HVBILIN
(
mmx
ext
,
8
,
16
,
16
)
#endif
HVBILIN
(
sse2
,
8
,
8
,
16
)
HVBILIN
(
sse2
,
8
,
16
,
16
)
...
...
@@ -284,7 +284,7 @@ extern void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU, \
int e, int i, int hvt);
DECLARE_LOOP_FILTER
(
mmx
)
DECLARE_LOOP_FILTER
(
mmx
2
)
DECLARE_LOOP_FILTER
(
mmx
ext
)
DECLARE_LOOP_FILTER
(
sse2
)
DECLARE_LOOP_FILTER
(
ssse3
)
DECLARE_LOOP_FILTER
(
sse4
)
...
...
@@ -352,26 +352,26 @@ av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
/* note that 4-tap width=16 functions are missing because w=16
* is only used for luma, and luma is always a copy or sixtap. */
if
(
mm_flags
&
AV_CPU_FLAG_MMXEXT
)
{
VP8_MC_FUNC
(
2
,
4
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
2
,
4
,
mmx
2
);
VP8_MC_FUNC
(
2
,
4
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
2
,
4
,
mmx
ext
);
#if ARCH_X86_32
VP8_LUMA_MC_FUNC
(
0
,
16
,
mmx
2
);
VP8_MC_FUNC
(
1
,
8
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
0
,
16
,
mmx
2
);
VP8_BILINEAR_MC_FUNC
(
1
,
8
,
mmx2
);
c
->
vp8_v_loop_filter_simple
=
ff_vp8_v_loop_filter_simple_mmx2
;
c
->
vp8_h_loop_filter_simple
=
ff_vp8_h_loop_filter_simple_mmx2
;
c
->
vp8_v_loop_filter16y_inner
=
ff_vp8_v_loop_filter16y_inner_mmx
2
;
c
->
vp8_h_loop_filter16y_inner
=
ff_vp8_h_loop_filter16y_inner_mmx
2
;
c
->
vp8_v_loop_filter8uv_inner
=
ff_vp8_v_loop_filter8uv_inner_mmx
2
;
c
->
vp8_h_loop_filter8uv_inner
=
ff_vp8_h_loop_filter8uv_inner_mmx
2
;
c
->
vp8_v_loop_filter16y
=
ff_vp8_v_loop_filter16y_mbedge_mmx
2
;
c
->
vp8_h_loop_filter16y
=
ff_vp8_h_loop_filter16y_mbedge_mmx
2
;
c
->
vp8_v_loop_filter8uv
=
ff_vp8_v_loop_filter8uv_mbedge_mmx
2
;
c
->
vp8_h_loop_filter8uv
=
ff_vp8_h_loop_filter8uv_mbedge_mmx
2
;
VP8_LUMA_MC_FUNC
(
0
,
16
,
mmx
ext
);
VP8_MC_FUNC
(
1
,
8
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
0
,
16
,
mmx
ext
);
VP8_BILINEAR_MC_FUNC
(
1
,
8
,
mmxext
);
c
->
vp8_v_loop_filter_simple
=
ff_vp8_v_loop_filter_simple_mmxext
;
c
->
vp8_h_loop_filter_simple
=
ff_vp8_h_loop_filter_simple_mmxext
;
c
->
vp8_v_loop_filter16y_inner
=
ff_vp8_v_loop_filter16y_inner_mmx
ext
;
c
->
vp8_h_loop_filter16y_inner
=
ff_vp8_h_loop_filter16y_inner_mmx
ext
;
c
->
vp8_v_loop_filter8uv_inner
=
ff_vp8_v_loop_filter8uv_inner_mmx
ext
;
c
->
vp8_h_loop_filter8uv_inner
=
ff_vp8_h_loop_filter8uv_inner_mmx
ext
;
c
->
vp8_v_loop_filter16y
=
ff_vp8_v_loop_filter16y_mbedge_mmx
ext
;
c
->
vp8_h_loop_filter16y
=
ff_vp8_h_loop_filter16y_mbedge_mmx
ext
;
c
->
vp8_v_loop_filter8uv
=
ff_vp8_v_loop_filter8uv_mbedge_mmx
ext
;
c
->
vp8_h_loop_filter8uv
=
ff_vp8_h_loop_filter8uv_mbedge_mmx
ext
;
#endif
}
...
...
libavutil/x86/x86util.asm
View file @
26301caa
...
...
@@ -555,7 +555,7 @@
%if
mmsize
==
16
pshuflw
%1
,
%2
,
(
%3
)
*
0x55
punpcklqdq
%1
,
%1
%elif
cpuflag
(
mmx
2
)
%elif
cpuflag
(
mmx
ext
)
pshufw
%1
,
%2
,
(
%3
)
*
0x55
%else
%
ifnidn
%1
,
%2
...
...
libswscale/x86/output.asm
View file @
26301caa
...
...
@@ -247,7 +247,7 @@ cglobal yuv2planeX_%1, %3, 8, %2, filter, fltsize, src, dst, w, dither, offset
%endmacro
%if
ARCH_X86_32
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
yuv2planeX_fn
8
,
0
,
7
yuv2planeX_fn
9
,
0
,
5
yuv2planeX_fn
10
,
0
,
5
...
...
@@ -388,7 +388,7 @@ INIT_MMX mmx
yuv2plane1_fn
8
,
0
,
5
yuv2plane1_fn
16
,
0
,
3
INIT_MMX
mmx
2
INIT_MMX
mmx
ext
yuv2plane1_fn
9
,
0
,
3
yuv2plane1_fn
10
,
0
,
3
%endif
...
...
libswscale/x86/swscale.c
View file @
26301caa
...
...
@@ -250,7 +250,7 @@ extern void ff_yuv2planeX_ ## size ## _ ## opt(const int16_t *filter, int filter
VSCALEX_FUNC(10, opt)
#if ARCH_X86_32
VSCALEX_FUNCS
(
mmx
2
);
VSCALEX_FUNCS
(
mmx
ext
);
#endif
VSCALEX_FUNCS
(
sse2
);
VSCALEX_FUNCS
(
sse4
);
...
...
@@ -267,7 +267,7 @@ extern void ff_yuv2plane1_ ## size ## _ ## opt(const int16_t *src, uint8_t *dst,
VSCALE_FUNC(16, opt1)
#if ARCH_X86_32
VSCALE_FUNCS
(
mmx
,
mmx
2
);
VSCALE_FUNCS
(
mmx
,
mmx
ext
);
#endif
VSCALE_FUNCS
(
sse2
,
sse2
);
VSCALE_FUNC
(
16
,
sse4
);
...
...
@@ -360,7 +360,7 @@ switch(c->dstBpc){ \
if
(
EXTERNAL_MMX
(
cpu_flags
))
{
ASSIGN_MMX_SCALE_FUNC
(
c
->
hyScale
,
c
->
hLumFilterSize
,
mmx
,
mmx
);
ASSIGN_MMX_SCALE_FUNC
(
c
->
hcScale
,
c
->
hChrFilterSize
,
mmx
,
mmx
);
ASSIGN_VSCALE_FUNC
(
c
->
yuv2plane1
,
mmx
,
mmx
2
,
cpu_flags
&
AV_CPU_FLAG_MMXEXT
);
ASSIGN_VSCALE_FUNC
(
c
->
yuv2plane1
,
mmx
,
mmx
ext
,
cpu_flags
&
AV_CPU_FLAG_MMXEXT
);
switch
(
c
->
srcFormat
)
{
case
AV_PIX_FMT_Y400A
:
...
...
@@ -393,7 +393,7 @@ switch(c->dstBpc){ \
}
}
if
(
EXTERNAL_MMXEXT
(
cpu_flags
))
{
ASSIGN_VSCALEX_FUNC
(
c
->
yuv2planeX
,
mmx
2
,
,
1
);
ASSIGN_VSCALEX_FUNC
(
c
->
yuv2planeX
,
mmx
ext
,
,
1
);
}
#endif
/* ARCH_X86_32 */
#define ASSIGN_SSE_SCALE_FUNC(hscalefn, filtersize, opt1, opt2) \
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment