Commit c73400e1 authored by Andrew Brown's avatar Andrew Brown Committed by V8 LUCI CQ

[x64] Provide initial infrastructure for 256-bit assembly

As a first step toward generating longer-width SIMD (see design doc),
this change adds the ability to emit 256-bit instructions in the x64
assembler. The `YMMRegister` class indicates that a 256-bit instruction
should be emitted (versus a 128-bit instruction for `XMMRegister`). This
also includes a sample implementation for `vmovdqa` and `vmovdqu` and
the encoded bits are checked against known-good output from NASM.

Design doc: https://docs.google.com/document/d/1VWZbkO5c_DdxlJObmSLN_9zQUZELVgXyudbpzv5WQM0

Change-Id: I18a88565d731786c3a1cedc2293a3a2e78ae838a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3111269
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76443}
parent 09413a88
......@@ -3601,6 +3601,14 @@ void Assembler::vmovdqa(XMMRegister dst, XMMRegister src) {
emit_sse_operand(dst, src);
}
void Assembler::vmovdqa(YMMRegister dst, YMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL256, k66, k0F, kWIG);
emit(0x6F);
emit_sse_operand(dst, src);
}
void Assembler::vmovdqu(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
......@@ -3625,6 +3633,14 @@ void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
emit_sse_operand(src, dst);
}
void Assembler::vmovdqu(YMMRegister dst, YMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src, xmm0, dst, kL256, kF3, k0F, kWIG);
emit(0x7F);
emit_sse_operand(src, dst);
}
void Assembler::vmovlps(XMMRegister dst, XMMRegister src1, Operand src2) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
......
......@@ -1352,9 +1352,11 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
void vmovdqa(XMMRegister dst, Operand src);
void vmovdqa(XMMRegister dst, XMMRegister src);
void vmovdqa(YMMRegister dst, YMMRegister src);
void vmovdqu(XMMRegister dst, Operand src);
void vmovdqu(Operand dst, XMMRegister src);
void vmovdqu(XMMRegister dst, XMMRegister src);
void vmovdqu(YMMRegister dst, YMMRegister src);
void vmovlps(XMMRegister dst, XMMRegister src1, Operand src2);
void vmovlps(Operand dst, XMMRegister src);
......
......@@ -155,6 +155,24 @@ constexpr Register arg_reg_4 = rcx;
V(xmm13) \
V(xmm14)
#define YMM_REGISTERS(V) \
V(ymm0) \
V(ymm1) \
V(ymm2) \
V(ymm3) \
V(ymm4) \
V(ymm5) \
V(ymm6) \
V(ymm7) \
V(ymm8) \
V(ymm9) \
V(ymm10) \
V(ymm11) \
V(ymm12) \
V(ymm13) \
V(ymm14) \
V(ymm15)
// Returns the number of padding slots needed for stack pointer alignment.
constexpr int ArgumentPaddingSlots(int argument_count) {
// No argument padding required.
......@@ -171,6 +189,17 @@ enum DoubleRegisterCode {
kDoubleAfterLast
};
enum YMMRegisterCode {
#define REGISTER_CODE(R) kYMMCode_##R,
YMM_REGISTERS(REGISTER_CODE)
#undef REGISTER_CODE
kYMMAfterLast
};
static_assert(static_cast<int>(kDoubleAfterLast) ==
static_cast<int>(kYMMAfterLast),
"The number of XMM register codes must match the number of YMM "
"register codes");
class XMMRegister : public RegisterBase<XMMRegister, kDoubleAfterLast> {
public:
// Return the high bit of the register code as a 0 or 1. Used often
......@@ -180,7 +209,7 @@ class XMMRegister : public RegisterBase<XMMRegister, kDoubleAfterLast> {
// in modR/M, SIB, and opcode bytes.
int low_bits() const { return code() & 0x7; }
private:
protected:
friend class RegisterBase<XMMRegister, kDoubleAfterLast>;
explicit constexpr XMMRegister(int code) : RegisterBase(code) {}
};
......@@ -189,6 +218,22 @@ ASSERT_TRIVIALLY_COPYABLE(XMMRegister);
static_assert(sizeof(XMMRegister) == sizeof(int),
"XMMRegister can efficiently be passed by value");
class YMMRegister : public XMMRegister {
public:
static constexpr YMMRegister from_code(int code) {
DCHECK(base::IsInRange(code, 0, XMMRegister::kNumRegisters - 1));
return YMMRegister(code);
}
private:
friend class XMMRegister;
explicit constexpr YMMRegister(int code) : XMMRegister(code) {}
};
ASSERT_TRIVIALLY_COPYABLE(YMMRegister);
static_assert(sizeof(YMMRegister) == sizeof(int),
"YMMRegister can efficiently be passed by value");
using FloatRegister = XMMRegister;
using DoubleRegister = XMMRegister;
......@@ -201,9 +246,15 @@ DOUBLE_REGISTERS(DECLARE_REGISTER)
#undef DECLARE_REGISTER
constexpr DoubleRegister no_dreg = DoubleRegister::no_reg();
#define DECLARE_REGISTER(R) \
constexpr YMMRegister R = YMMRegister::from_code(kYMMCode_##R);
YMM_REGISTERS(DECLARE_REGISTER)
#undef DECLARE_REGISTER
// Define {RegisterName} methods for the register types.
DEFINE_REGISTER_NAMES(Register, GENERAL_REGISTERS)
DEFINE_REGISTER_NAMES(XMMRegister, DOUBLE_REGISTERS)
DEFINE_REGISTER_NAMES(YMMRegister, YMM_REGISTERS)
// Give alias names to registers for calling conventions.
constexpr Register kReturnRegister0 = rax;
......
......@@ -26,6 +26,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <cstdlib>
#include <cstring>
#include <iostream>
#include "src/base/numbers/double.h"
......@@ -2520,6 +2521,34 @@ TEST(AssemblerX64vmovups) {
CHECK_EQ(-1.5, f.Call(1.5, -1.5));
}
TEST(AssemblerX64Regmove256bit) {
if (!CpuFeatures::IsSupported(AVX)) return;
CcTest::InitializeVM();
v8::HandleScope scope(CcTest::isolate());
auto buffer = AllocateAssemblerBuffer();
Isolate* isolate = CcTest::i_isolate();
Assembler masm(AssemblerOptions{}, buffer->CreateView());
CpuFeatureScope fscope(&masm, AVX);
__ vmovdqa(ymm0, ymm1);
__ vmovdqu(ymm10, ymm11);
CodeDesc desc;
masm.GetCode(isolate, &desc);
#ifdef OBJECT_PRINT
Handle<Code> code =
Factory::CodeBuilder(isolate, desc, CodeKind::FOR_TESTING).Build();
StdoutStream os;
code->Print(os);
#endif
byte expected[] = {// VMOVDQA
0xC5, 0xFD, 0x6F, 0xC1,
// VMOVDQU
0xC4, 0x41, 0x7E, 0x7F, 0xDA};
CHECK_EQ(0, memcmp(expected, desc.buffer, sizeof(expected)));
}
TEST(CpuFeatures_ProbeImpl) {
// Support for a newer extension implies support for the older extensions.
CHECK_IMPLIES(CpuFeatures::IsSupported(FMA3), CpuFeatures::IsSupported(AVX));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment