Commit 9ba6aff2 authored by jiepan's avatar jiepan Committed by V8 LUCI CQ

[x64] Implement 256-bit assembler for cmp ops

Bug: v8:12228
Change-Id: Iab09881d9c8bcd851fd89bf5d6bbd3f2cfb0f3d0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3303808Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Jie Pan <jie.pan@intel.com>
Cr-Commit-Position: refs/heads/main@{#79838}
parent 9afe4c04
......@@ -1629,41 +1629,64 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vps(0xC2, dst, src1, src2);
emit(cmp);
}
void vcmpps(YMMRegister dst, YMMRegister src1, YMMRegister src2, int8_t cmp) {
vps(0xC2, dst, src1, src2);
emit(cmp);
}
void vcmpps(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
vps(0xC2, dst, src1, src2);
emit(cmp);
}
void vcmpps(YMMRegister dst, YMMRegister src1, Operand src2, int8_t cmp) {
vps(0xC2, dst, src1, src2);
emit(cmp);
}
void vcmppd(XMMRegister dst, XMMRegister src1, XMMRegister src2, int8_t cmp) {
vpd(0xC2, dst, src1, src2);
emit(cmp);
}
void vcmppd(YMMRegister dst, YMMRegister src1, YMMRegister src2, int8_t cmp) {
vpd(0xC2, dst, src1, src2);
emit(cmp);
}
void vcmppd(XMMRegister dst, XMMRegister src1, Operand src2, int8_t cmp) {
vpd(0xC2, dst, src1, src2);
emit(cmp);
}
#define AVX_CMP_P(instr, imm8) \
void instr##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
vcmpps(dst, src1, src2, imm8); \
} \
void instr##ps(XMMRegister dst, XMMRegister src1, Operand src2) { \
vcmpps(dst, src1, src2, imm8); \
} \
void instr##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
vcmppd(dst, src1, src2, imm8); \
} \
void instr##pd(XMMRegister dst, XMMRegister src1, Operand src2) { \
vcmppd(dst, src1, src2, imm8); \
}
AVX_CMP_P(vcmpeq, 0x0)
AVX_CMP_P(vcmplt, 0x1)
AVX_CMP_P(vcmple, 0x2)
AVX_CMP_P(vcmpunord, 0x3)
AVX_CMP_P(vcmpneq, 0x4)
AVX_CMP_P(vcmpnlt, 0x5)
AVX_CMP_P(vcmpnle, 0x6)
AVX_CMP_P(vcmpge, 0xd)
void vcmppd(YMMRegister dst, YMMRegister src1, Operand src2, int8_t cmp) {
vpd(0xC2, dst, src1, src2);
emit(cmp);
}
#define AVX_CMP_P(instr, imm8, SIMDRegister) \
void instr##ps(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
vcmpps(dst, src1, src2, imm8); \
} \
void instr##ps(SIMDRegister dst, SIMDRegister src1, Operand src2) { \
vcmpps(dst, src1, src2, imm8); \
} \
void instr##pd(SIMDRegister dst, SIMDRegister src1, SIMDRegister src2) { \
vcmppd(dst, src1, src2, imm8); \
} \
void instr##pd(SIMDRegister dst, SIMDRegister src1, Operand src2) { \
vcmppd(dst, src1, src2, imm8); \
}
AVX_CMP_P(vcmpeq, 0x0, XMMRegister)
AVX_CMP_P(vcmpeq, 0x0, YMMRegister)
AVX_CMP_P(vcmplt, 0x1, XMMRegister)
AVX_CMP_P(vcmplt, 0x1, YMMRegister)
AVX_CMP_P(vcmple, 0x2, XMMRegister)
AVX_CMP_P(vcmple, 0x2, YMMRegister)
AVX_CMP_P(vcmpunord, 0x3, XMMRegister)
AVX_CMP_P(vcmpunord, 0x3, YMMRegister)
AVX_CMP_P(vcmpneq, 0x4, XMMRegister)
AVX_CMP_P(vcmpneq, 0x4, YMMRegister)
AVX_CMP_P(vcmpnlt, 0x5, XMMRegister)
AVX_CMP_P(vcmpnlt, 0x5, YMMRegister)
AVX_CMP_P(vcmpnle, 0x6, XMMRegister)
AVX_CMP_P(vcmpnle, 0x6, YMMRegister)
AVX_CMP_P(vcmpge, 0xd, XMMRegister)
AVX_CMP_P(vcmpge, 0xd, YMMRegister)
#undef AVX_CMP_P
......
......@@ -2827,6 +2827,53 @@ TEST(AssemblerX64Integer256bit) {
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
TEST(AssemblerX64CmpOperations256bit) {
if (!CpuFeatures::IsSupported(AVX)) return;
CcTest::InitializeVM();
v8::HandleScope scope(CcTest::isolate());
auto buffer = AllocateAssemblerBuffer();
Isolate* isolate = CcTest::i_isolate();
Assembler masm(AssemblerOptions{}, buffer->CreateView());
CpuFeatureScope fscope(&masm, AVX);
__ vcmpeqps(ymm1, ymm2, ymm4);
__ vcmpltpd(ymm4, ymm7, Operand(rcx, rdx, times_4, 10000));
__ vcmpleps(ymm9, ymm8, Operand(r8, r11, times_8, 10000));
__ vcmpunordpd(ymm3, ymm7, ymm8);
__ vcmpneqps(ymm3, ymm5, ymm9);
__ vcmpnltpd(ymm10, ymm12, Operand(r12, r11, times_4, 10000));
__ vcmpnleps(ymm9, ymm11, Operand(r10, r9, times_8, 10000));
__ vcmpgepd(ymm13, ymm3, ymm12);
CodeDesc desc;
masm.GetCode(isolate, &desc);
#ifdef OBJECT_PRINT
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
StdoutStream os;
code->Print(os);
#endif
byte expected[] = {
// vcmpeqps ymm1, ymm2, ymm4
0xC5, 0xEC, 0xC2, 0xCC, 0x00,
// vcmpltpd ymm4, ymm7, YMMWORD PTR [rcx+rdx*4+0x2710]
0xC5, 0xC5, 0xC2, 0xA4, 0x91, 0x10, 0x27, 0x00, 0x00, 0x01,
// vcmpleps ymm9, ymm8, YMMWORD PTR [r8+r11*8+0x2710]
0xC4, 0x01, 0x3C, 0xC2, 0x8C, 0xD8, 0x10, 0x27, 0x00, 0x00, 0x02,
// vcmpunordpd ymm3, ymm7, ymm8
0xC4, 0xC1, 0x45, 0xC2, 0xD8, 0x03,
// vcmpneqps ymm3, ymm5, ymm9
0xC4, 0xC1, 0x54, 0xC2, 0xD9, 0x04,
// vcmpnltpd ymm10, ymm12, YMMWORD PTR [r12+r11*4+0x2710]
0xC4, 0x01, 0x1D, 0xC2, 0x94, 0x9C, 0x10, 0x27, 0x00, 0x00, 0x05,
// vcmpnleps ymm9, ymm11, YMMWORD PTR [r10+r9*8+0x2710]
0xC4, 0x01, 0x24, 0xC2, 0x8C, 0xCA, 0x10, 0x27, 0x00, 0x00, 0x06,
// vcmpgepd ymm13, ymm3, ymm12
0xC4, 0x41, 0x65, 0xC2, 0xEC, 0x0D};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
TEST(CpuFeatures_ProbeImpl) {
// Support for a newer extension implies support for the older extensions.
CHECK_IMPLIES(CpuFeatures::IsSupported(FMA3), CpuFeatures::IsSupported(AVX));
......
......@@ -1415,9 +1415,26 @@ UNINITIALIZED_TEST(DisasmX64YMMRegister) {
COMPARE("c5ff12a48b10270000 vmovddup ymm4,[rbx+rcx*4+0x2710]",
vmovddup(ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5fe16ca vmovshdup ymm1,ymm2", vmovshdup(ymm1, ymm2));
COMPARE("c5f4c6da73 vshufps ymm3,ymm1,ymm2,0x73",
vshufps(ymm3, ymm1, ymm2, 115));
// vcmp
COMPARE("c5dcc2e900 vcmpps ymm5,ymm4,ymm1, (eq)",
vcmpeqps(ymm5, ymm4, ymm1));
COMPARE("c5ddc2ac8b1027000001 vcmppd ymm5,ymm4,[rbx+rcx*4+0x2710], (lt)",
vcmpltpd(ymm5, ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5ddc2e902 vcmppd ymm5,ymm4,ymm1, (le)",
vcmplepd(ymm5, ymm4, ymm1));
COMPARE("c5dcc2ac8b1027000003 vcmpps ymm5,ymm4,[rbx+rcx*4+0x2710], (unord)",
vcmpunordps(ymm5, ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5dcc2e904 vcmpps ymm5,ymm4,ymm1, (neq)",
vcmpneqps(ymm5, ymm4, ymm1));
COMPARE("c5ddc2ac8b1027000005 vcmppd ymm5,ymm4,[rbx+rcx*4+0x2710], (nlt)",
vcmpnltpd(ymm5, ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5ddc2ac8b1027000006 vcmppd ymm5,ymm4,[rbx+rcx*4+0x2710], (nle)",
vcmpnlepd(ymm5, ymm4, Operand(rbx, rcx, times_4, 10000)));
COMPARE("c5dcc2e90d vcmpps ymm5,ymm4,ymm1, (ge)",
vcmpgeps(ymm5, ymm4, ymm1));
}
if (!CpuFeatures::IsSupported(AVX2)) return;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment