Commit fd39fdc5 authored by bradnelson's avatar bradnelson Committed by Commit bot

Adding x64 simd assembler support.

Based on assembler changes from this patch:
https://codereview.chromium.org/90643003/

BUG=https://bugs.chromium.org/p/v8/issues/detail?id=4124
R=titzer@chromium.org,bbudge@chromium.org
LOG=N

Review-Url: https://codereview.chromium.org/1994083003
Cr-Commit-Position: refs/heads/master@{#36383}
parent de7d47e2
......@@ -179,12 +179,11 @@ void Assembler::emit_optional_rex_32(Register rm_reg) {
if (rm_reg.high_bit()) emit(0x41);
}
void Assembler::emit_optional_rex_32(XMMRegister rm_reg) {
if (rm_reg.high_bit()) emit(0x41);
void Assembler::emit_optional_rex_32(XMMRegister reg) {
byte rex_bits = (reg.code() & 0x8) >> 1;
if (rex_bits != 0) emit(0x40 | rex_bits);
}
void Assembler::emit_optional_rex_32(const Operand& op) {
if (op.rex_ != 0) emit(0x40 | op.rex_);
}
......
......@@ -2890,6 +2890,18 @@ void Assembler::pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
emit(imm8);
}
void Assembler::insertps(XMMRegister dst, XMMRegister src, byte imm8) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
DCHECK(is_uint8(imm8));
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x3A);
emit(0x21);
emit_sse_operand(dst, src);
emit(imm8);
}
void Assembler::movsd(const Operand& dst, XMMRegister src) {
DCHECK(!IsEnabled(AVX));
......@@ -3211,6 +3223,38 @@ void Assembler::psrld(XMMRegister reg, byte imm8) {
emit(imm8);
}
void Assembler::cmpps(XMMRegister dst, XMMRegister src, int8_t cmp) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xC2);
emit_sse_operand(dst, src);
emit(cmp);
}
void Assembler::cmpeqps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x0);
}
void Assembler::cmpltps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x1);
}
void Assembler::cmpleps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x2);
}
void Assembler::cmpneqps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x4);
}
void Assembler::cmpnltps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x5);
}
void Assembler::cmpnleps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x6);
}
void Assembler::cvttss2si(Register dst, const Operand& src) {
DCHECK(!IsEnabled(AVX));
......@@ -4192,6 +4236,263 @@ void Assembler::rorxl(Register dst, const Operand& src, byte imm8) {
emit(imm8);
}
void Assembler::minps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5D);
emit_sse_operand(dst, src);
}
void Assembler::minps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5D);
emit_sse_operand(dst, src);
}
void Assembler::maxps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5F);
emit_sse_operand(dst, src);
}
void Assembler::maxps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5F);
emit_sse_operand(dst, src);
}
void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x53);
emit_sse_operand(dst, src);
}
void Assembler::rcpps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x53);
emit_sse_operand(dst, src);
}
void Assembler::rsqrtps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x52);
emit_sse_operand(dst, src);
}
void Assembler::rsqrtps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x52);
emit_sse_operand(dst, src);
}
void Assembler::sqrtps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x51);
emit_sse_operand(dst, src);
}
void Assembler::sqrtps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x51);
emit_sse_operand(dst, src);
}
void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5B);
emit_sse_operand(dst, src);
}
void Assembler::cvtdq2ps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5B);
emit_sse_operand(dst, src);
}
void Assembler::movups(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
if (src.low_bits() == 4) {
// Try to avoid an unnecessary SIB byte.
emit_optional_rex_32(src, dst);
emit(0x0F);
emit(0x11);
emit_sse_operand(src, dst);
} else {
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x10);
emit_sse_operand(dst, src);
}
}
void Assembler::movups(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x10);
emit_sse_operand(dst, src);
}
void Assembler::movups(const Operand& dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(src, dst);
emit(0x0F);
emit(0x11);
emit_sse_operand(src, dst);
}
void Assembler::paddd(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xFE);
emit_sse_operand(dst, src);
}
void Assembler::paddd(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xFE);
emit_sse_operand(dst, src);
}
void Assembler::psubd(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xFA);
emit_sse_operand(dst, src);
}
void Assembler::psubd(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xFA);
emit_sse_operand(dst, src);
}
void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x38);
emit(0x40);
emit_sse_operand(dst, src);
}
void Assembler::pmulld(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xF4);
emit_sse_operand(dst, src);
}
void Assembler::pmuludq(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xF4);
emit_sse_operand(dst, src);
}
void Assembler::pmuludq(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0xF4);
emit_sse_operand(dst, src);
}
void Assembler::punpackldq(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x62);
emit_sse_operand(dst, src);
}
void Assembler::punpackldq(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x62);
emit_sse_operand(dst, src);
}
void Assembler::psrldq(XMMRegister dst, uint8_t shift) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst);
emit(0x0F);
emit(0x73);
emit_sse_operand(dst);
emit(shift);
}
void Assembler::cvtps2dq(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5B);
emit_sse_operand(dst, src);
}
void Assembler::cvtps2dq(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x5B);
emit_sse_operand(dst, src);
}
void Assembler::pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
EnsureSpace ensure_space(this);
emit(0x66);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x70);
emit_sse_operand(dst, src);
emit(shuffle);
}
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
Register ireg = { reg.code() };
......@@ -4219,6 +4520,10 @@ void Assembler::emit_sse_operand(Register dst, XMMRegister src) {
emit(0xC0 | (dst.low_bits() << 3) | src.low_bits());
}
void Assembler::emit_sse_operand(XMMRegister dst) {
emit(0xD8 | dst.low_bits());
}
void Assembler::db(uint8_t data) {
EnsureSpace ensure_space(this);
......
......@@ -1157,16 +1157,53 @@ class Assembler : public AssemblerBase {
void punpckhdq(XMMRegister dst, XMMRegister src);
// SSE 4.1 instruction
void insertps(XMMRegister dst, XMMRegister src, byte imm8);
void extractps(Register dst, XMMRegister src, byte imm8);
void pextrd(Register dst, XMMRegister src, int8_t imm8);
void pinsrd(XMMRegister dst, Register src, int8_t imm8);
void pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
void cmpeqps(XMMRegister dst, XMMRegister src);
void cmpltps(XMMRegister dst, XMMRegister src);
void cmpleps(XMMRegister dst, XMMRegister src);
void cmpneqps(XMMRegister dst, XMMRegister src);
void cmpnltps(XMMRegister dst, XMMRegister src);
void cmpnleps(XMMRegister dst, XMMRegister src);
void minps(XMMRegister dst, XMMRegister src);
void minps(XMMRegister dst, const Operand& src);
void maxps(XMMRegister dst, XMMRegister src);
void maxps(XMMRegister dst, const Operand& src);
void rcpps(XMMRegister dst, XMMRegister src);
void rcpps(XMMRegister dst, const Operand& src);
void rsqrtps(XMMRegister dst, XMMRegister src);
void rsqrtps(XMMRegister dst, const Operand& src);
void sqrtps(XMMRegister dst, XMMRegister src);
void sqrtps(XMMRegister dst, const Operand& src);
void movups(XMMRegister dst, XMMRegister src);
void movups(XMMRegister dst, const Operand& src);
void movups(const Operand& dst, XMMRegister src);
void paddd(XMMRegister dst, XMMRegister src);
void paddd(XMMRegister dst, const Operand& src);
void psubd(XMMRegister dst, XMMRegister src);
void psubd(XMMRegister dst, const Operand& src);
void pmulld(XMMRegister dst, XMMRegister src);
void pmulld(XMMRegister dst, const Operand& src);
void pmuludq(XMMRegister dst, XMMRegister src);
void pmuludq(XMMRegister dst, const Operand& src);
void punpackldq(XMMRegister dst, XMMRegister src);
void punpackldq(XMMRegister dst, const Operand& src);
void psrldq(XMMRegister dst, uint8_t shift);
void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
void cvtps2dq(XMMRegister dst, XMMRegister src);
void cvtps2dq(XMMRegister dst, const Operand& src);
void cvtdq2ps(XMMRegister dst, XMMRegister src);
void cvtdq2ps(XMMRegister dst, const Operand& src);
// AVX instruction
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x99, dst, src1, src2);
......@@ -1943,6 +1980,7 @@ class Assembler : public AssemblerBase {
void emit_sse_operand(Register reg, const Operand& adr);
void emit_sse_operand(XMMRegister dst, Register src);
void emit_sse_operand(Register dst, XMMRegister src);
void emit_sse_operand(XMMRegister dst);
// Emit machine code for one of the operations ADD, ADC, SUB, SBC,
// AND, OR, XOR, or CMP. The encodings of these operations are all
......
......@@ -484,11 +484,48 @@ TEST(DisasmX64) {
{
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope scope(&assm, SSE4_1);
__ insertps(xmm5, xmm1, 123);
__ extractps(rax, xmm1, 0);
__ pextrd(rbx, xmm15, 0);
__ pextrd(r12, xmm0, 1);
__ pinsrd(xmm9, r9, 0);
__ pinsrd(xmm5, rax, 1);
__ pinsrd(xmm5, Operand(rax, 4), 1);
__ cmpps(xmm5, xmm1, 1);
__ cmpeqps(xmm5, xmm1);
__ cmpltps(xmm5, xmm1);
__ cmpleps(xmm5, xmm1);
__ cmpneqps(xmm5, xmm1);
__ cmpnltps(xmm5, xmm1);
__ cmpnleps(xmm5, xmm1);
__ minps(xmm5, xmm1);
__ minps(xmm5, Operand(rdx, 4));
__ maxps(xmm5, xmm1);
__ maxps(xmm5, Operand(rdx, 4));
__ rcpps(xmm5, xmm1);
__ rcpps(xmm5, Operand(rdx, 4));
__ sqrtps(xmm5, xmm1);
__ sqrtps(xmm5, Operand(rdx, 4));
__ movups(xmm5, xmm1);
__ movups(xmm5, Operand(rdx, 4));
__ movups(Operand(rdx, 4), xmm5);
__ paddd(xmm5, xmm1);
__ paddd(xmm5, Operand(rdx, 4));
__ psubd(xmm5, xmm1);
__ psubd(xmm5, Operand(rdx, 4));
__ pmulld(xmm5, xmm1);
__ pmulld(xmm5, Operand(rdx, 4));
__ pmuludq(xmm5, xmm1);
__ pmuludq(xmm5, Operand(rdx, 4));
__ punpackldq(xmm5, xmm1);
__ punpackldq(xmm5, Operand(rdx, 4));
__ psrldq(xmm5, 123);
__ pshufd(xmm5, xmm1, 3);
__ cvtps2dq(xmm5, xmm1);
__ cvtps2dq(xmm5, Operand(rdx, 4));
__ cvtdq2ps(xmm5, xmm1);
__ cvtdq2ps(xmm5, Operand(rdx, 4));
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment