Commit 9dc45d1f authored by Christophe Gisquet's avatar Christophe Gisquet Committed by Michael Niedermayer

x86: lavc: share more constants

Reviewed-by: 's avatar"Ronald S. Bultje" <rsbultje@gmail.com>
Signed-off-by: 's avatarMichael Niedermayer <michaelni@gmx.at>
parent bfb988b1
......@@ -32,7 +32,7 @@ pw_bap_mul1: dw 21846, 21846, 0, 32768, 21846, 21846, 0, 32768
pw_bap_mul2: dw 5, 7, 0, 7, 5, 7, 0, 7
; used in ff_ac3_extract_exponents()
pd_1: times 4 dd 1
cextern pd_1
pd_151: times 4 dd 151
; used in ff_apply_window_int16()
......
......@@ -53,12 +53,19 @@ DECLARE_ALIGNED(16, const xmm_reg, ff_pw_8192) = { 0x2000200020002000ULL, 0x200
DECLARE_ALIGNED(32, const ymm_reg, ff_pw_m1) = { 0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL,
0xFFFFFFFFFFFFFFFFULL, 0xFFFFFFFFFFFFFFFFULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_0) = { 0x0000000000000000ULL, 0x0000000000000000ULL,
0x0000000000000000ULL, 0x0000000000000000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_1) = { 0x0101010101010101ULL, 0x0101010101010101ULL,
0x0101010101010101ULL, 0x0101010101010101ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_2) = { 0x0202020202020202ULL, 0x0202020202020202ULL,
0x0202020202020202ULL, 0x0202020202020202ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pb_3) = { 0x0303030303030303ULL, 0x0303030303030303ULL,
0x0303030303030303ULL, 0x0303030303030303ULL };
DECLARE_ALIGNED(32, const xmm_reg, ff_pb_15) = { 0x0F0F0F0F0F0F0F0FULL, 0x0F0F0F0F0F0F0F0FULL };
DECLARE_ALIGNED(16, const xmm_reg, ff_pb_80) = { 0x8080808080808080ULL, 0x8080808080808080ULL };
DECLARE_ALIGNED(8, const uint64_t, ff_pb_FC) = 0xFCFCFCFCFCFCFCFCULL;
DECLARE_ALIGNED(16, const xmm_reg, ff_ps_neg) = { 0x8000000080000000ULL, 0x8000000080000000ULL };
DECLARE_ALIGNED(32, const ymm_reg, ff_pd_1) = { 0x0000000100000001ULL, 0x0000000100000001ULL,
0x0000000100000001ULL, 0x0000000100000001ULL };
......@@ -49,7 +49,9 @@ extern const xmm_reg ff_pw_2048;
extern const xmm_reg ff_pw_8192;
extern const ymm_reg ff_pw_m1;
extern const ymm_reg ff_pb_0;
extern const ymm_reg ff_pb_1;
extern const ymm_reg ff_pb_2;
extern const ymm_reg ff_pb_3;
extern const xmm_reg ff_pb_80;
extern const xmm_reg ff_pb_F8;
......@@ -57,4 +59,6 @@ extern const uint64_t ff_pb_FC;
extern const xmm_reg ff_ps_neg;
extern const ymm_reg ff_pd_1;
#endif /* AVCODEC_X86_CONSTANTS_H */
......@@ -28,7 +28,7 @@ SECTION_RODATA 32
cextern pw_16
cextern pw_1
pb_0: times 32 db 0 ; we do not use cextern here as old llvm-gcc fails to align it correctly
cextern pb_0
pw_pixel_max: times 8 dw ((1 << 10)-1)
......
......@@ -30,8 +30,8 @@ pw_bi_12: times 16 dw (1 << 12)
max_pixels_8: times 16 dw ((1 << 8)-1)
max_pixels_10: times 16 dw ((1 << 10)-1)
max_pixels_12: times 16 dw ((1 << 12)-1)
zero: times 8 dd 0
one_per_32: times 8 dd 1
cextern pd_1
cextern pb_0
SECTION_TEXT 32
%macro EPEL_TABLE 4
......@@ -665,9 +665,9 @@ QPEL_TABLE 10, 8, w, avx2
%if %2 == 8
packuswb %3, %4
%else
CLIPW %3, [zero], [max_pixels_%2]
CLIPW %3, [pb_0], [max_pixels_%2]
%if (%1 > 8 && notcpuflag(avx)) || %1 > 16
CLIPW %4, [zero], [max_pixels_%2]
CLIPW %4, [pb_0], [max_pixels_%2]
%endif
%endif
%endmacro
......@@ -1428,7 +1428,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh
punpcklwd m2, m2
%endif
dec SHIFT
movdqu m5, [one_per_32]
movdqu m5, [pd_1]
movd m6, SHIFT
pshufd m2, m2, 0
mov SHIFT, oxm
......@@ -1465,7 +1465,7 @@ cglobal hevc_put_hevc_uni_w%1_%2, 6, 6, 7, dst, dststride, src, srcstride, heigh
%if %2 == 8
packuswb m0, m0
%else
CLIPW m0, [zero], [max_pixels_%2]
CLIPW m0, [pb_0], [max_pixels_%2]
%endif
PEL_%2STORE%1 dstq, m0, m1
add dstq, dststrideq ; dst += dststride
......@@ -1536,7 +1536,7 @@ cglobal hevc_put_hevc_bi_w%1_%2, 5, 7, 10, dst, dststride, src, srcstride, src2,
%if %2 == 8
packuswb m0, m0
%else
CLIPW m0, [zero], [max_pixels_%2]
CLIPW m0, [pb_0], [max_pixels_%2]
%endif
PEL_%2STORE%1 dstq, m0, m1
add dstq, dststrideq ; dst += dststride
......
......@@ -27,7 +27,6 @@ SECTION_RODATA 32
pw_mask10: times 16 dw 0x03FF
pw_mask12: times 16 dw 0x0FFF
pb_2: times 32 db 2
pw_m2: times 16 dw -2
pb_edge_shuffle: times 2 db 1, 2, 0, 3, 4, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
pb_eo: db -1, 0, 1, 0, 0, -1, 0, 1, -1, -1, 1, 1, 1, -1, -1, 1
......@@ -35,6 +34,7 @@ cextern pw_m1
cextern pw_1
cextern pw_2
cextern pb_1
cextern pb_2
SECTION_TEXT
......
......@@ -23,7 +23,7 @@
%include "libavutil/x86/x86util.asm"
SECTION_RODATA
pb_f: times 16 db 15
cextern pb_15
pb_zzzzzzzz77777777: times 8 db -1
pb_7: times 8 db 7
pb_zzzz3333zzzzbbbb: db -1,-1,-1,-1,3,3,3,3,-1,-1,-1,-1,11,11,11,11
......@@ -157,7 +157,7 @@ cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
INIT_XMM sse4
cglobal add_hfyu_left_pred, 3,3,7, dst, src, w, left
mova m5, [pb_f]
mova m5, [pb_15]
mova m6, [pb_zzzzzzzz77777777]
mova m4, [pb_zzzz3333zzzzbbbb]
mova m3, [pb_zz11zz55zz99zzdd]
......
......@@ -64,8 +64,6 @@ pb_6xm1_BDF_0to6: times 6 db -1
db 11, 13, 15, 0, 1, 2, 3, 4, 5, 6
pb_02468ACE_13579BDF: db 0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15
pb_2: times 32 db 2
pb_15: times 16 db 15
pb_15x0_1xm1: times 15 db 0
db -1
pb_0to2_5x3: db 0, 1, 2
......@@ -76,7 +74,9 @@ pb_6x0_2xm1: times 6 db 0
times 2 db -1
cextern pb_1
cextern pb_2
cextern pb_3
cextern pb_15
cextern pw_2
cextern pw_4
cextern pw_8
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment