Commit ed7e3de9 authored by Yolanda Chen's avatar Yolanda Chen Committed by V8 LUCI CQ

[x64] Implement 256-bit assembly for vhaddps

Bug: v8:12228
Change-Id: Ie1f569c450f84a862c754b844e36349b1533872d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3194633Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Yolanda Chen <yolanda.chen@intel.com>
Cr-Commit-Position: refs/heads/main@{#77202}
parent a5692811
......@@ -1378,15 +1378,16 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSE_BINOP_INSTRUCTION_LIST(AVX_SSE_BINOP)
#undef AVX_SSE_BINOP
#define AVX_3(instr, opcode, impl) \
void instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
impl(opcode, dst, src1, src2); \
} \
void instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
impl(opcode, dst, src1, src2); \
#define AVX_3(instr, opcode, impl, SIMDRegister) \
void instr(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
impl(opcode, dst, src1, src2); \
} \
void instr(SIMDRegister dst, SIMDRegister src1, Operand src2) { \
impl(opcode, dst, src1, src2); \
}
AVX_3(vhaddps, 0x7c, vsd)
AVX_3(vhaddps, 0x7c, vsd, XMMRegister)
AVX_3(vhaddps, 0x7c, vsd, YMMRegister)
#define AVX_SCALAR(instr, prefix, escape, opcode) \
void v##instr(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
......@@ -1513,11 +1514,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
}
void vsd(byte op, XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
template <typename Reg, typename Op>
void vsd(byte op, Reg dst, Reg src1, Op src2) {
vinstr(op, dst, src1, src2, kF2, k0F, kWIG, AVX);
}
void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
......
......@@ -2629,6 +2629,8 @@ TEST(AssemblerX64FloatingPoint256bit) {
__ vsubps(ymm10, ymm11, ymm12);
__ vroundps(ymm9, ymm2, kRoundUp);
__ vroundpd(ymm9, ymm2, kRoundToNearest);
__ vhaddps(ymm1, ymm2, ymm3);
__ vhaddps(ymm0, ymm1, Operand(rbx, rcx, times_4, 10000));
CodeDesc desc;
masm.GetCode(isolate, &desc);
......@@ -2648,7 +2650,11 @@ TEST(AssemblerX64FloatingPoint256bit) {
// vroundps ymm9, ymm2, 0xA
0xC4, 0x63, 0x7D, 0x08, 0xCA, 0x0A,
// vroundpd ymm9, ymm2, 0x8
0xC4, 0x63, 0x7D, 0x09, 0xCA, 0x08};
0xC4, 0x63, 0x7D, 0x09, 0xCA, 0x08,
// VHADDPS ymm1, ymm2, ymm3
0xC5, 0xEF, 0x7C, 0xCB,
// VHADDPS ymm0, ymm1, YMMWORD PTR [rbx+rcx*4+0x2710]
0xc5, 0xf7, 0x7c, 0x84, 0x8b, 0x10, 0x27, 0x00, 0x00};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
......
......@@ -349,6 +349,8 @@ TEST(DisasmX64) {
__ vxorps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vhaddps(xmm0, xmm1, xmm9);
__ vhaddps(xmm0, xmm1, Operand(rbx, rcx, times_4, 10000));
__ vhaddps(ymm0, ymm1, ymm2);
__ vhaddps(ymm0, ymm1, Operand(rbx, rcx, times_4, 10000));
__ vpcmpeqd(xmm0, xmm15, xmm5);
__ vpcmpeqd(xmm15, xmm0, Operand(rbx, rcx, times_4, 10000));
......@@ -699,6 +701,8 @@ struct DisassemblerTester {
int pc_offset() { return assm_.pc_offset(); }
Assembler* assm() { return &assm_; }
v8::internal::byte buffer_[kAssemblerBufferSize];
Assembler assm_;
disasm::NameConverter converter_;
......@@ -1223,27 +1227,15 @@ UNINITIALIZED_TEST(DisasmX64CheckOutputSSE2) {
UNINITIALIZED_TEST(DisasmX64YMMRegister) {
if (!CpuFeatures::IsSupported(AVX)) return;
v8::internal::byte buffer[8192];
Assembler assm(AssemblerOptions{},
ExternalAssemblerBuffer(buffer, sizeof buffer));
CpuFeatureScope fscope(&assm, AVX);
__ vmovdqa(ymm0, ymm1);
base::Vector<char> actual = base::Vector<char>::New(37);
disasm::NameConverter converter;
disasm::Disassembler disassembler(converter);
disassembler.InstructionDecode(actual, buffer);
#ifdef OBJECT_PRINT
fprintf(stdout, "Disassembled buffer: %s\n", actual.begin());
#endif
base::Vector<const char> expected =
base::StaticCharVector("c5fd6fc1 vmovdqa ymm0,ymm1\0");
CHECK_EQ(expected, actual);
DisassemblerTester t;
CpuFeatureScope fscope(t.assm(), AVX);
actual.Dispose();
// Short immediate instructions
COMPARE("c5fd6fc1 vmovdqa ymm0,ymm1", vmovdqa(ymm0, ymm1));
COMPARE("c5f77cc2 vhaddps ymm0,ymm1,ymm2",
vhaddps(ymm0, ymm1, ymm2));
COMPARE("c5f77c848b10270000 vhaddps ymm0,ymm1,[rbx+rcx*4+0x2710]",
vhaddps(ymm0, ymm1, Operand(rbx, rcx, times_4, 10000)));
}
#undef __
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment