Commit c48ec6f7 authored by Yolanda Chen's avatar Yolanda Chen Committed by V8 LUCI CQ

[x64] Implement 256-bit assembly for SSE2_UNOP instructions

The SSE2_UNOP instructions have various src and dst register types for
256-bit AVX. One of them, the ucomisd instruction does not support YMM.
Other two: vcvtpd2ps and vcvttpd2dq use XMM as dst register. We extend
the Operand type to Operand256 to represent m256 to distiguish with the
128-bit AVX instruction.

Since this is a small suite, we explicitly specify the operand type for
each instruction.

Bug: v8:12228
Change-Id: I07c8168bd49f75eb8e4df8d6adfcfb37c1d34fff
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3518423Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Yolanda Chen <yolanda.chen@intel.com>
Cr-Commit-Position: refs/heads/main@{#80020}
parent 765eb7fa
......@@ -3807,9 +3807,9 @@ void Assembler::vps(byte op, YMMRegister dst, YMMRegister src1,
emit(imm8);
}
#define VPD(SIMDRegister, length) \
void Assembler::vpd(byte op, SIMDRegister dst, SIMDRegister src1, \
SIMDRegister src2) { \
#define VPD(DSTRegister, SRCRegister, length) \
void Assembler::vpd(byte op, DSTRegister dst, SRCRegister src1, \
SRCRegister src2) { \
DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); \
emit_vex_prefix(dst, src1, src2, k##length, k66, k0F, kWIG); \
......@@ -3817,7 +3817,7 @@ void Assembler::vps(byte op, YMMRegister dst, YMMRegister src1,
emit_sse_operand(dst, src2); \
} \
\
void Assembler::vpd(byte op, SIMDRegister dst, SIMDRegister src1, \
void Assembler::vpd(byte op, DSTRegister dst, SRCRegister src1, \
Operand src2) { \
DCHECK(IsEnabled(AVX)); \
EnsureSpace ensure_space(this); \
......@@ -3825,8 +3825,9 @@ void Assembler::vps(byte op, YMMRegister dst, YMMRegister src1,
emit(op); \
emit_sse_operand(dst, src2); \
}
VPD(XMMRegister, L128)
VPD(YMMRegister, L256)
VPD(XMMRegister, XMMRegister, L128)
VPD(XMMRegister, YMMRegister, L256)
VPD(YMMRegister, YMMRegister, L256)
#undef VPD
void Assembler::vucomiss(XMMRegister dst, XMMRegister src) {
......
......@@ -287,6 +287,28 @@ class V8_EXPORT_PRIVATE Operand {
Data data_;
};
class V8_EXPORT_PRIVATE Operand256 : public Operand {
public:
// [base + disp/r]
V8_INLINE Operand256(Register base, int32_t disp) : Operand(base, disp) {}
// [base + index*scale + disp/r]
V8_INLINE Operand256(Register base, Register index, ScaleFactor scale,
int32_t disp)
: Operand(base, index, scale, disp) {}
// [index*scale + disp/r]
V8_INLINE Operand256(Register index, ScaleFactor scale, int32_t disp)
: Operand(index, scale, disp) {}
Operand256(const Operand256&) V8_NOEXCEPT = default;
Operand256& operator=(const Operand256&) V8_NOEXCEPT = default;
private:
friend class Operand;
};
ASSERT_TRIVIALLY_COPYABLE(Operand);
static_assert(sizeof(Operand) <= 2 * kSystemPointerSize,
"Operand must be small enough to pass it by value");
......@@ -1046,6 +1068,24 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSE2_UNOP_INSTRUCTION_LIST(DECLARE_SSE2_UNOP_AVX_INSTRUCTION)
#undef DECLARE_SSE2_UNOP_AVX_INSTRUCTION
#define DECLARE_SSE2_UNOP_AVX_YMM_INSTRUCTION( \
instruction, opcode, DSTRegister, SRCRegister, MemOperand) \
void v##instruction(DSTRegister dst, SRCRegister src) { \
vpd(0x##opcode, dst, ymm0, src); \
} \
void v##instruction(DSTRegister dst, MemOperand src) { \
vpd(0x##opcode, dst, ymm0, src); \
}
DECLARE_SSE2_UNOP_AVX_YMM_INSTRUCTION(sqrtpd, 51, YMMRegister, YMMRegister,
Operand)
DECLARE_SSE2_UNOP_AVX_YMM_INSTRUCTION(cvtpd2ps, 5A, XMMRegister, YMMRegister,
Operand256)
DECLARE_SSE2_UNOP_AVX_YMM_INSTRUCTION(cvtps2dq, 5B, YMMRegister, YMMRegister,
Operand)
DECLARE_SSE2_UNOP_AVX_YMM_INSTRUCTION(cvttpd2dq, E6, XMMRegister, YMMRegister,
Operand256)
#undef DECLARE_SSE2_UNOP_AVX_YMM_INSTRUCTION
// SSE3
void lddqu(XMMRegister dst, Operand src);
void movddup(XMMRegister dst, Operand src);
......@@ -1863,8 +1903,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
byte imm8);
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
void vpd(byte op, XMMRegister dst, YMMRegister src1, YMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vpd(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
void vpd(byte op, XMMRegister dst, YMMRegister src1, Operand src2);
// AVX2 instructions
#define AVX2_INSTRUCTION(instr, prefix, escape1, escape2, opcode) \
......
......@@ -1537,14 +1537,17 @@ int DisassemblerX64::AVXInstruction(byte* data) {
SSE2_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE
#define DECLARE_SSE_UNOP_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
#define DECLARE_SSE_UNOP_AVX_DIS_CASE(instruction, opcode, SIMDRegister) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,", NameOfAVXRegister(regop)); \
AppendToBuffer("v" #instruction " %s,", NameOf##SIMDRegister(regop)); \
current += PrintRightAVXOperand(current); \
break; \
}
SSE2_UNOP_INSTRUCTION_LIST(DECLARE_SSE_UNOP_AVX_DIS_CASE)
DECLARE_SSE_UNOP_AVX_DIS_CASE(ucomisd, 2E, AVXRegister)
DECLARE_SSE_UNOP_AVX_DIS_CASE(sqrtpd, 51, AVXRegister)
DECLARE_SSE_UNOP_AVX_DIS_CASE(cvtpd2ps, 5A, XMMRegister)
DECLARE_SSE_UNOP_AVX_DIS_CASE(cvtps2dq, 5B, AVXRegister)
DECLARE_SSE_UNOP_AVX_DIS_CASE(cvttpd2dq, E6, XMMRegister)
#undef DECLARE_SSE_UNOP_AVX_DIS_CASE
default:
UnimplementedInstruction();
......
......@@ -2682,6 +2682,14 @@ TEST(AssemblerX64FloatingPoint256bit) {
__ vblendvps(ymm0, ymm3, ymm5, ymm9);
__ vblendvpd(ymm7, ymm4, ymm3, ymm1);
__ vshufps(ymm3, ymm1, ymm2, 0x75);
__ vsqrtpd(ymm1, ymm2);
__ vsqrtpd(ymm1, Operand(rbx, rcx, times_4, 10000));
__ vcvtpd2ps(xmm1, ymm2);
__ vcvtpd2ps(xmm2, Operand256(rbx, rcx, times_4, 10000));
__ vcvtps2dq(ymm3, ymm4);
__ vcvtps2dq(ymm5, Operand(rbx, rcx, times_4, 10000));
__ vcvttpd2dq(xmm6, ymm8);
__ vcvttpd2dq(xmm10, Operand256(rbx, rcx, times_4, 10000));
CodeDesc desc;
masm.GetCode(isolate, &desc);
......@@ -2715,7 +2723,23 @@ TEST(AssemblerX64FloatingPoint256bit) {
// vblendvpd ymm7, ymm4, ymm3, ymm1
0xC4, 0xE3, 0x5D, 0x4B, 0xFB, 0x10,
// vshufps ymm3, ymm1, ymm2, 0x75
0xC5, 0xF4, 0xC6, 0xDA, 0x75};
0xC5, 0xF4, 0xC6, 0xDA, 0x75,
// vsqrtpd ymm1, ymm2
0xC5, 0xFD, 0x51, 0xCA,
// vsqrtpd ymm1, YMMWORD PTR [rbx+rcx*4+0x2710]
0xC5, 0xFD, 0x51, 0x8C, 0x8B, 0x10, 0x27, 0x00, 0x00,
// vcvtpd2ps xmm1, ymm2
0xC5, 0xFD, 0x5A, 0xCA,
// vcvtpd2ps xmm2, YMMWORD PTR [rbx+rcx*4+0x2710]
0xC5, 0xFD, 0x5A, 0x94, 0x8B, 0x10, 0x27, 0x00, 0x00,
// vcvtps2dq ymm3, ymm4
0xC5, 0xFD, 0x5B, 0xDC,
// vcvtps2dq ymm5, YMMWORD PTR [rbx+rcx*4+0x2710]
0xC5, 0xFD, 0x5B, 0xAC, 0x8B, 0x10, 0x27, 0x00, 0x00,
// vcvttpd2dq xmm6, ymm8
0xC4, 0xC1, 0x7D, 0xE6, 0xF0,
// vcvttpd2dq xmm10, YMMWORD PTR [rbx+rcx*4+0x2710]
0xC5, 0x7D, 0xE6, 0x94, 0x8B, 0x10, 0x27, 0x00, 0x00};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
......
......@@ -1445,6 +1445,21 @@ UNINITIALIZED_TEST(DisasmX64YMMRegister) {
vcmpnlepd(ymm5, ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5dcc2e90d vcmpps ymm5,ymm4,ymm1, (ge)",
vcmpgeps(ymm5, ymm4, ymm1));
// SSE2_UNOP
COMPARE("c5fd51ca vsqrtpd ymm1,ymm2", vsqrtpd(ymm1, ymm2));
COMPARE("c5fd518c8b10270000 vsqrtpd ymm1,[rbx+rcx*4+0x2710]",
vsqrtpd(ymm1, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5fd5adc vcvtpd2ps xmm3,ymm4", vcvtpd2ps(xmm3, ymm4));
COMPARE("c5fd5aa48b10270000 vcvtpd2ps xmm4,[rbx+rcx*4+0x2710]",
vcvtpd2ps(xmm4, Operand256(rbx, rcx, times_4, 10000)));
COMPARE("c5fd5bdc vcvtps2dq ymm3,ymm4", vcvtps2dq(ymm3, ymm4));
COMPARE("c5fd5bac8b10270000 vcvtps2dq ymm5,[rbx+rcx*4+0x2710]",
vcvtps2dq(ymm5, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c4c17de6f8 vcvttpd2dq xmm7,ymm8",
vcvttpd2dq(xmm7, ymm8));
COMPARE("c57de68c8b10270000 vcvttpd2dq xmm9,[rbx+rcx*4+0x2710]",
vcvttpd2dq(xmm9, Operand256(rbx, rcx, times_4, 10000)));
}
if (!CpuFeatures::IsSupported(AVX2)) return;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment