Commit a54f38e1 authored by jiepan's avatar jiepan Committed by V8 LUCI CQ

[x64] Implement 256-bit assembler for vshufps

Bug: v8:12228
Change-Id: I233efc9fc4636c25baba6a689f7038331fd1f32b
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3303806Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Jie Pan <jie.pan@intel.com>
Cr-Commit-Position: refs/heads/main@{#78598}
parent 4b8d0489
......@@ -3761,6 +3761,16 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
emit(imm8);
}
void Assembler::vps(byte op, YMMRegister dst, YMMRegister src1,
YMMRegister src2, byte imm8) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL256, kNoPrefix, k0F, kWIG);
emit(op);
emit_sse_operand(dst, src2);
emit(imm8);
}
#define VPD(SIMDRegister, length) \
void Assembler::vpd(byte op, SIMDRegister dst, SIMDRegister src1, \
SIMDRegister src2) { \
......
......@@ -1596,6 +1596,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
vps(0xC6, dst, src1, src2, imm8);
}
void vshufps(YMMRegister dst, YMMRegister src1, YMMRegister src2, byte imm8) {
vps(0xC6, dst, src1, src2, imm8);
}
void vmovaps(XMMRegister dst, XMMRegister src) { vps(0x28, dst, xmm0, src); }
void vmovaps(YMMRegister dst, YMMRegister src) { vps(0x28, dst, ymm0, src); }
......@@ -1811,6 +1814,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vps(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8);
void vps(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2,
byte imm8);
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
......
......@@ -2681,6 +2681,7 @@ TEST(AssemblerX64FloatingPoint256bit) {
__ vhaddps(ymm0, ymm1, Operand(rbx, rcx, times_4, 10000));
__ vblendvps(ymm0, ymm3, ymm5, ymm9);
__ vblendvpd(ymm7, ymm4, ymm3, ymm1);
__ vshufps(ymm3, ymm1, ymm2, 0x75);
CodeDesc desc;
masm.GetCode(isolate, &desc);
......@@ -2712,7 +2713,9 @@ TEST(AssemblerX64FloatingPoint256bit) {
// vblendvps ymm0, ymm3, ymm5, ymm9
0xC4, 0xE3, 0x65, 0x4A, 0xC5, 0x90,
// vblendvpd ymm7, ymm4, ymm3, ymm1
0xC4, 0xE3, 0x5D, 0x4B, 0xFB, 0x10};
0xC4, 0xE3, 0x5D, 0x4B, 0xFB, 0x10,
// vshufps ymm3, ymm1, ymm2, 0x75
0xC5, 0xF4, 0xC6, 0xDA, 0x75};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
......
......@@ -1415,6 +1415,9 @@ UNINITIALIZED_TEST(DisasmX64YMMRegister) {
COMPARE("c5ff12a48b10270000 vmovddup ymm4,[rbx+rcx*4+0x2710]",
vmovddup(ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5fe16ca vmovshdup ymm1,ymm2", vmovshdup(ymm1, ymm2));
COMPARE("c5f4c6da73 vshufps ymm3,ymm1,ymm2,0x73",
vshufps(ymm3, ymm1, ymm2, 115));
}
if (!CpuFeatures::IsSupported(AVX2)) return;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment