Commit 9dc45d1f authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: lavc: share more constants

Reviewed-by: 's avatar"Ronald S. Bultje" <rsbultje@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent bfb988b1
...@@ -32,7 +32,7 @@ pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768 ...@@ -32,7 +32,7 @@ pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7 pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
; used in ff_ac3_extract_exponents() ; used in ff_ac3_extract_exponents()
pd_1: times 4 dd 1 cextern pd_1
pd_151: times 4 dd 151 pd_151: times 4 dd 151
; used in ff_apply_window_int16() ; used in ff_apply_window_int16()
......
...@@ -53,12 +53,19 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x200 ...@@ -53,12 +53,19 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x200
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL, DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL }; 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL }; DECLARE_ALIGNED(32, const ymm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL,
0x0000000000000000ULL, 0x0000000000000000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL, DECLARE_ALIGNED(32, const ymm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL,
0x0101010101010101ULL, 0x0101010101010101ULL }; 0x0101010101010101ULL, 0x0101010101010101ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_2) = { 0x0202020202020202ULL, 0x0202020202020202ULL,
0x0202020202020202ULL, 0x0202020202020202ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL, DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL,
0x0303030303030303ULL, 0x0303030303030303ULL }; 0x0303030303030303ULL, 0x0303030303030303ULL };
DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL; DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL }; DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_1) = { 0x0000000100000001ULL, 0x0000000100000001ULL,
0x0000000100000001ULL, 0x0000000100000001ULL };
...@@ -49,7 +49,9 @@ extern const xmm_reg ff_pw_2048; ...@@ -49,7 +49,9 @@ extern const xmm_reg ff_pw_2048;
extern const xmm_reg ff_pw_8192; extern const xmm_reg ff_pw_8192;
extern const ymm_reg ff_pw_m1; extern const ymm_reg ff_pw_m1;
extern const ymm_reg ff_pb_0;
extern const ymm_reg ff_pb_1; extern const ymm_reg ff_pb_1;
extern const ymm_reg ff_pb_2;
extern const ymm_reg ff_pb_3; extern const ymm_reg ff_pb_3;
extern const xmm_reg ff_pb_80; extern const xmm_reg ff_pb_80;
extern const xmm_reg ff_pb_F8; extern const xmm_reg ff_pb_F8;
...@@ -57,4 +59,6 @@ extern const uint64_t ff_pb_FC; ...@@ -57,4 +59,6 @@ extern const uint64_t ff_pb_FC;
extern const xmm_reg ff_ps_neg; extern const xmm_reg ff_ps_neg;
extern const ymm_reg ff_pd_1;
#endif /* AVCODEC_X86_CONSTANTS_H */ #endif /* AVCODEC_X86_CONSTANTS_H */
...@@ -28,7 +28,7 @@ SECTION_RODATA 32 ...@@ -28,7 +28,7 @@ SECTION_RODATA 32
cextern pw_16 cextern pw_16
cextern pw_1 cextern pw_1
pb_0: times 32 db 0 ; we do not use cextern here as old llvm-gcc fails to align it correctly cextern pb_0
pw_pixel_max: times 8 dw ((1 << 10)-1) pw_pixel_max: times 8 dw ((1 << 10)-1)
......
...@@ -30,8 +30,8 @@ pw_bi_12: times 16 dw (1 << 12) ...@@ -30,8 +30,8 @@ pw_bi_12: times 16 dw (1 << 12)
max_pixels_8: times 16 dw ((1 << 8)-1) max_pixels_8: times 16 dw ((1 << 8)-1)
max_pixels_10: times 16 dw ((1 << 10)-1) max_pixels_10: times 16 dw ((1 << 10)-1)
max_pixels_12: times 16 dw ((1 << 12)-1) max_pixels_12: times 16 dw ((1 << 12)-1)
zero: times 8 dd 0 cextern pd_1
one_per_32: times 8 dd 1 cextern pb_0
SECTION_TEXT 32 SECTION_TEXT 32
%macro EPEL_TABLE 4 %macro EPEL_TABLE 4
...@@ -665,9 +665,9 @@ QPEL_TABLE 10, 8, w, avx2 ...@@ -665,9 +665,9 @@ QPEL_TABLE 10, 8, w, avx2
%if %2 == 8 %if %2 == 8
packuswb %3, %4 packuswb %3, %4
%else %else
CLIPW %3, [zero], [max_pixels_%2] CLIPW %3, [pb_0], [max_pixels_%2]
%if (%1 > 8 && notcpuflag(avx)) || %1 > 16 %if (%1 > 8 && notcpuflag(avx)) || %1 > 16
CLIPW %4, [zero], [max_pixels_%2] CLIPW %4, [pb_0], [max_pixels_%2]
%endif %endif
%endif %endif
%endmacro %endmacro
...@@ -1428,7 +1428,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh ...@@ -1428,7 +1428,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh
punpcklwd m2, m2 punpcklwd m2, m2
%endif %endif
dec SHIFT dec SHIFT
movdqu m5, [one_per_32] movdqu m5, [pd_1]
movd m6, SHIFT movd m6, SHIFT
pshufd m2, m2, 0 pshufd m2, m2, 0
mov SHIFT, oxm mov SHIFT, oxm
...@@ -1465,7 +1465,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh ...@@ -1465,7 +1465,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh
%if %2 == 8 %if %2 == 8
packuswb m0, m0 packuswb m0, m0
%else %else
CLIPW m0, [zero], [max_pixels_%2] CLIPW m0, [pb_0], [max_pixels_%2]
%endif %endif
PEL_%2STORE%1 dstq, m0, m1 PEL_%2STORE%1 dstq, m0, m1
add dstq, dststrideq ; dst += dststride add dstq, dststrideq ; dst += dststride
...@@ -1536,7 +1536,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2, ...@@ -1536,7 +1536,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2,
%if %2 == 8 %if %2 == 8
packuswb m0, m0 packuswb m0, m0
%else %else
CLIPW m0, [zero], [max_pixels_%2] CLIPW m0, [pb_0], [max_pixels_%2]
%endif %endif
PEL_%2STORE%1 dstq, m0, m1 PEL_%2STORE%1 dstq, m0, m1
add dstq, dststrideq ; dst += dststride add dstq, dststrideq ; dst += dststride
......
...@@ -27,7 +27,6 @@ SECTION_RODATA 32 ...@@ -27,7 +27,6 @@ SECTION_RODATA 32
pw_mask10: times 16 dw 0x03FF pw_mask10: times 16 dw 0x03FF
pw_mask12: times 16 dw 0x0FFF pw_mask12: times 16 dw 0x0FFF
pb_2: times 32 db 2
pw_m2: times 16 dw -2 pw_m2: times 16 dw -2
pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1 pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1 pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
...@@ -35,6 +34,7 @@ cextern pw_m1 ...@@ -35,6 +34,7 @@ cextern pw_m1
cextern pw_1 cextern pw_1
cextern pw_2 cextern pw_2
cextern pb_1 cextern pb_1
cextern pb_2
SECTION_TEXT SECTION_TEXT
......
...@@ -23,7 +23,7 @@ ...@@ -23,7 +23,7 @@
%include "libavutil/x86/x86util.asm" %include "libavutil/x86/x86util.asm"
SECTION_RODATA SECTION_RODATA
pb_f: times 16 db 15 cextern pb_15
pb_zzzzzzzz77777777: times 8 db -1 pb_zzzzzzzz77777777: times 8 db -1
pb_7: times 8 db 7 pb_7: times 8 db 7
pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11 pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
...@@ -157,7 +157,7 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left ...@@ -157,7 +157,7 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
INIT_XMM sse4 INIT_XMM sse4
cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
mova m5, [pb_f] mova m5, [pb_15]
mova m6, [pb_zzzzzzzz77777777] mova m6, [pb_zzzzzzzz77777777]
mova m4, [pb_zzzz3333zzzzbbbb] mova m4, [pb_zzzz3333zzzzbbbb]
mova m3, [pb_zz11zz55zz99zzdd] mova m3, [pb_zz11zz55zz99zzdd]
......
...@@ -64,8 +64,6 @@ pb_6xm1_BDF_0to6: times 6 db -1 ...@@ -64,8 +64,6 @@ pb_6xm1_BDF_0to6: times 6 db -1
db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6 db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6
pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15 pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
pb_2: times 32 db 2
pb_15: times 16 db 15
pb_15x0_1xm1: times 15 db 0 pb_15x0_1xm1: times 15 db 0
db -1 db -1
pb_0to2_5x3: db 0, 1, 2 pb_0to2_5x3: db 0, 1, 2
...@@ -76,7 +74,9 @@ pb_6x0_2xm1: times 6 db 0 ...@@ -76,7 +74,9 @@ pb_6x0_2xm1: times 6 db 0
times 2 db -1 times 2 db -1
cextern pb_1 cextern pb_1
cextern pb_2
cextern pb_3 cextern pb_3
cextern pb_15
cextern pw_2 cextern pw_2
cextern pw_4 cextern pw_4
cextern pw_8 cextern pw_8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment