Commit 1c381f9a authored by Andrew Brown's avatar Andrew Brown Committed by V8 LUCI CQ

[x64] Implement some common 256-bit assembler instructions

This change implements longer-width SIMD instructions in the x64
assembler by adding 256-bit versions to one of the conversion macros.
This emits mostly floating-point arithmetic and some boolean operations;
see `SSE_UNOP_INSTRUCTION_LIST` and `SSE_BINOP_INSTRUCTION_LIST`.

Design doc: https://docs.google.com/document/d/1VWZbkO5c_DdxlJObmSLN_9zQUZELVgXyudbpzv5WQM0

Change-Id: I36d56ee09d6b71f66734342cb37bfc9d4801d654
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3123648Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Shiyu Zhang <shiyu.zhang@intel.com>
Cr-Commit-Position: refs/heads/main@{#76593}
parent b76d25dd
......@@ -3704,6 +3704,15 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
emit_sse_operand(dst, src2);
}
void Assembler::vps(byte op, YMMRegister dst, YMMRegister src1,
YMMRegister src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL256, kNone, k0F, kWIG);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
......@@ -3712,6 +3721,14 @@ void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2) {
emit_sse_operand(dst, src2);
}
void Assembler::vps(byte op, YMMRegister dst, YMMRegister src1, Operand src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, src1, src2, kL256, kNone, k0F, kWIG);
emit(op);
emit_sse_operand(dst, src2);
}
void Assembler::vps(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2, byte imm8) {
DCHECK(IsEnabled(AVX));
......
......@@ -1371,6 +1371,12 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
} \
void v##instr(XMMRegister dst, Operand src2) { \
vps(0x##opcode, dst, xmm0, src2); \
} \
void v##instr(YMMRegister dst, YMMRegister src2) { \
vps(0x##opcode, dst, ymm0, src2); \
} \
void v##instr(YMMRegister dst, Operand src2) { \
vps(0x##opcode, dst, ymm0, src2); \
}
SSE_UNOP_INSTRUCTION_LIST(AVX_SSE_UNOP)
#undef AVX_SSE_UNOP
......@@ -1381,6 +1387,12 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
} \
void v##instr(XMMRegister dst, XMMRegister src1, Operand src2) { \
vps(0x##opcode, dst, src1, src2); \
} \
void v##instr(YMMRegister dst, YMMRegister src1, YMMRegister src2) { \
vps(0x##opcode, dst, src1, src2); \
} \
void v##instr(YMMRegister dst, YMMRegister src1, Operand src2) { \
vps(0x##opcode, dst, src1, src2); \
}
SSE_BINOP_INSTRUCTION_LIST(AVX_SSE_BINOP)
#undef AVX_SSE_BINOP
......@@ -1697,7 +1709,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
}
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vps(byte op, YMMRegister dst, YMMRegister src1, YMMRegister src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vps(byte op, YMMRegister dst, YMMRegister src1, Operand src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
byte imm8);
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
......
......@@ -2550,6 +2550,37 @@ TEST(AssemblerX64Regmove256bit) {
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
TEST(AssemblerX64FloatingPoint256bit) {
if (!CpuFeatures::IsSupported(AVX)) return;
CcTest::InitializeVM();
v8::HandleScope scope(CcTest::isolate());
auto buffer = AllocateAssemblerBuffer();
Isolate* isolate = CcTest::i_isolate();
Assembler masm(AssemblerOptions{}, buffer->CreateView());
CpuFeatureScope fscope(&masm, AVX);
__ vsqrtps(ymm0, ymm1);
__ vunpcklps(ymm2, ymm3, ymm14);
__ vsubps(ymm10, ymm11, ymm12);
CodeDesc desc;
masm.GetCode(isolate, &desc);
#ifdef OBJECT_PRINT
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
StdoutStream os;
code->Print(os);
#endif
byte expected[] = {// VSQRTPS
0xC5, 0xFC, 0x51, 0xC1,
// VUNPCKLPS
0xC4, 0xC1, 0x64, 0x14, 0xD6,
// VSUBPS
0xC4, 0x41, 0x24, 0x5C, 0xD4};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
TEST(CpuFeatures_ProbeImpl) {
// Support for a newer extension implies support for the older extensions.
CHECK_IMPLIES(CpuFeatures::IsSupported(FMA3), CpuFeatures::IsSupported(AVX));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment