Commit dc34109f authored by jiepan's avatar jiepan Committed by V8 LUCI CQ

[x64] Implement 256-bit assembler for vmov[au]p*

Bug: v8:12228
Change-Id: I21b2ee1e640ba75227a03d765bb1552eff68e3fa
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3293415Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Jie Pan <jie.pan@intel.com>
Cr-Commit-Position: refs/heads/main@{#78033}
parent daf8df62
......@@ -3741,22 +3741,27 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
emit(imm8);
}
void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL128, k66, k0F, kWIG);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL128, k66, k0F, kWIG);
emit(op);
emit_sse_operand(dst, src2);
}
#define VPD(SIMDRegister, length) \
void Assembler::vpd(byte op, SIMDRegister dst, SIMDRegister src1, \
SIMDRegister src2) { \
DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, src1, src2, k##length, k66, k0F, kWIG); \
emit(op); \
emit_sse_operand(dst, src2); \
} \
\
void Assembler::vpd(byte op, SIMDRegister dst, SIMDRegister src1, \
Operand src2) { \
DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, src1, src2, k##length, k66, k0F, kWIG); \
emit(op); \
emit_sse_operand(dst, src2); \
}
VPD(XMMRegister, L128)
VPD(YMMRegister, L256)
#undef VPD
void Assembler::vucomiss(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
......
......@@ -1596,13 +1596,21 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
}
void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
void vmovaps(YMMRegister dst, YMMRegister src) { vps(0x28, dst, ymm0, src); }
void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
void vmovaps(YMMRegister dst, Operand src) { vps(0x28, dst, ymm0, src); }
void vmovups(XMMRegister dst, XMMRegister src) { vps(0x10, dst, xmm0, src); }
void vmovups(YMMRegister dst, YMMRegister src) { vps(0x10, dst, ymm0, src); }
void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
void vmovups(YMMRegister dst, Operand src) { vps(0x10, dst, ymm0, src); }
void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
void vmovups(Operand dst, YMMRegister src) { vps(0x11, src, ymm0, dst); }
void vmovapd(XMMRegister dst, XMMRegister src) { vpd(0x28, dst, xmm0, src); }
void vmovapd(YMMRegister dst, YMMRegister src) { vpd(0x28, dst, ymm0, src); }
void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
void vmovupd(YMMRegister dst, Operand src) { vpd(0x10, dst, ymm0, src); }
void vmovupd(Operand dst, XMMRegister src) { vpd(0x11, src, xmm0, dst); }
void vmovupd(Operand dst, YMMRegister src) { vpd(0x11, src, ymm0, dst); }
void vmovmskps(Register dst, XMMRegister src) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vps(0x50, idst, xmm0, src);
......@@ -1802,7 +1810,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8);
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vpd(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
// AVX2 instructions
#define AVX2_INSTRUCTION(instr, prefix, escape1, escape2, opcode) \
......
......@@ -2536,6 +2536,10 @@ TEST(AssemblerX64Regmove256bit) {
__ vmovdqu(ymm10, ymm11);
__ vmovdqu(ymm9, Operand(rbx, rcx, times_4, 10000));
__ vmovdqu(Operand(rbx, rcx, times_4, 10000), ymm0);
__ vmovaps(ymm3, ymm1);
__ vmovups(Operand(rcx, rdx, times_4, 10000), ymm2);
__ vmovapd(ymm0, ymm5);
__ vmovupd(ymm6, Operand(r8, r9, times_4, 10000));
__ vbroadcastss(ymm7, Operand(rbx, rcx, times_4, 10000));
__ vmovddup(ymm3, ymm2);
__ vmovddup(ymm4, Operand(rbx, rcx, times_4, 10000));
......@@ -2564,6 +2568,15 @@ TEST(AssemblerX64Regmove256bit) {
// vmovdqu YMMWORD PTR [rbx+rcx*4+0x2710],ymm0
0xC5, 0xFE, 0x7F, 0x84, 0x8B, 0x10, 0x27, 0x00, 0x00,
// vmovaps ymm3, ymm1
0xC5, 0xFC, 0x28, 0xD9,
// vmovups YMMWORD PTR [rcx+rdx*4+0x2710], ymm2
0xC5, 0xFC, 0x11, 0x94, 0x91, 0x10, 0x27, 0x00, 0x00,
// vmovapd ymm0, ymm5
0xC5, 0xFD, 0x28, 0xC5,
// vmovupd ymm6, YMMWORD PTR [r8+r9*4+0x2710]
0xC4, 0x81, 0x7D, 0x10, 0xB4, 0x88, 0x10, 0x27, 0x00, 0x00,
// vbroadcastss ymm7, DWORD PTR [rbx+rcx*4+0x2710]
0xc4, 0xe2, 0x7d, 0x18, 0xbc, 0x8b, 0x10, 0x27, 0x00, 0x00,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment