Commit fbf58a5a authored by bradnelson's avatar bradnelson Committed by Commit bot

Adding ia32 simd assembler support.

Based on assembler changes from this patch:
https://codereview.chromium.org/90643003/

BUG=https://bugs.chromium.org/p/v8/issues/detail?id=4124
R=titzer@chromium.org
LOG=N

Review-Url: https://codereview.chromium.org/1991713002
Cr-Commit-Position: refs/heads/master@{#36349}
parent 3596cac8
......@@ -282,6 +282,50 @@ Operand::Operand(Register index,
set_dispr(disp, rmode);
}
Operand::Operand(const Operand& operand, int32_t offset) {
DCHECK(operand.len_ >= 1);
// Operand encodes REX ModR/M [SIB] [Disp].
byte modrm = operand.buf_[0];
DCHECK(modrm < 0xC0); // Disallow mode 3 (register target).
bool has_sib = ((modrm & 0x07) == 0x04);
byte mode = modrm & 0xC0;
int disp_offset = has_sib ? 2 : 1;
int base_reg = (has_sib ? operand.buf_[1] : modrm) & 0x07;
// Mode 0 with rbp/r13 as ModR/M or SIB base register always has a 32-bit
// displacement.
bool is_baseless = (mode == 0) && (base_reg == 0x05); // No base or RIP base.
int32_t disp_value = 0;
if (mode == 0x80 || is_baseless) {
// Mode 2 or mode 0 with rbp/r13 as base: Word displacement.
disp_value = *bit_cast<const int32_t*>(&operand.buf_[disp_offset]);
} else if (mode == 0x40) {
// Mode 1: Byte displacement.
disp_value = static_cast<signed char>(operand.buf_[disp_offset]);
}
// Write new operand with same registers, but with modified displacement.
DCHECK(offset >= 0 ? disp_value + offset >= disp_value
: disp_value + offset < disp_value); // No overflow.
disp_value += offset;
if (!is_int8(disp_value) || is_baseless) {
// Need 32 bits of displacement, mode 2 or mode 1 with register rbp/r13.
buf_[0] = (modrm & 0x3f) | (is_baseless ? 0x00 : 0x80);
len_ = disp_offset + 4;
Memory::int32_at(&buf_[disp_offset]) = disp_value;
} else if (disp_value != 0 || (base_reg == 0x05)) {
// Need 8 bits of displacement.
buf_[0] = (modrm & 0x3f) | 0x40; // Mode 1.
len_ = disp_offset + 1;
buf_[disp_offset] = static_cast<byte>(disp_value);
} else {
// Need no displacement.
buf_[0] = (modrm & 0x3f); // Mode 0.
len_ = disp_offset;
}
if (has_sib) {
buf_[1] = operand.buf_[1];
}
}
bool Operand::is_reg(Register reg) const {
return ((buf_[0] & 0xF8) == 0xC0) // addressing mode is register only.
......@@ -2864,6 +2908,155 @@ void Assembler::rorx(Register dst, const Operand& src, byte imm8) {
EMIT(imm8);
}
void Assembler::movups(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x10);
emit_sse_operand(dst, src);
}
void Assembler::movups(const Operand& dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x11);
emit_sse_operand(src, dst);
}
void Assembler::minps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x5D);
emit_sse_operand(dst, src);
}
void Assembler::maxps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x5F);
emit_sse_operand(dst, src);
}
void Assembler::rcpps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x53);
emit_sse_operand(dst, src);
}
void Assembler::rsqrtps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x52);
emit_sse_operand(dst, src);
}
void Assembler::sqrtps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x51);
emit_sse_operand(dst, src);
}
void Assembler::cvtdq2ps(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0x5B);
emit_sse_operand(dst, src);
}
void Assembler::paddd(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xFE);
emit_sse_operand(dst, src);
}
void Assembler::psubd(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xFA);
emit_sse_operand(dst, src);
}
void Assembler::pmulld(XMMRegister dst, const Operand& src) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x38);
EMIT(0x40);
emit_sse_operand(dst, src);
}
void Assembler::pmuludq(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xF4);
emit_sse_operand(dst, src);
}
void Assembler::punpackldq(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x62);
emit_sse_operand(dst, src);
}
void Assembler::cvtps2dq(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x5B);
emit_sse_operand(dst, src);
}
void Assembler::cmpps(XMMRegister dst, XMMRegister src, int8_t cmp) {
EnsureSpace ensure_space(this);
EMIT(0x0F);
EMIT(0xC2);
emit_sse_operand(dst, src);
EMIT(cmp);
}
void Assembler::cmpeqps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x0);
}
void Assembler::cmpltps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x1);
}
void Assembler::cmpleps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x2);
}
void Assembler::cmpneqps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x4);
}
void Assembler::cmpnltps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x5);
}
void Assembler::cmpnleps(XMMRegister dst, XMMRegister src) {
cmpps(dst, src, 0x6);
}
void Assembler::insertps(XMMRegister dst, XMMRegister src, byte imm8) {
DCHECK(CpuFeatures::IsSupported(SSE4_1));
DCHECK(is_uint8(imm8));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x3A);
EMIT(0x21);
emit_sse_operand(dst, src);
EMIT(imm8);
}
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
Register ireg = { reg.code() };
......
......@@ -222,7 +222,6 @@ enum Condition {
not_sign = positive
};
// Returns the equivalent of !cc.
// Negation of the default no_condition (-1) results in a non-default
// no_condition value (-2). As long as tests for no_condition check
......@@ -358,6 +357,11 @@ class Operand BASE_EMBEDDED {
RelocInfo::INTERNAL_REFERENCE);
}
// Offset from existing memory operand.
// The offset is added to existing displacement as 32-bit signed value.
// The caller must ensure overflow does not occur.
Operand(const Operand& base, int32_t offset);
static Operand StaticVariable(const ExternalReference& ext) {
return Operand(reinterpret_cast<int32_t>(ext.address()),
RelocInfo::EXTERNAL_REFERENCE);
......@@ -963,6 +967,8 @@ class Assembler : public AssemblerBase {
void ucomiss(XMMRegister dst, const Operand& src);
void movaps(XMMRegister dst, XMMRegister src);
void shufps(XMMRegister dst, XMMRegister src, byte imm8);
void movups(XMMRegister dst, const Operand& src);
void movups(const Operand& dst, XMMRegister src);
void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
void maxss(XMMRegister dst, const Operand& src);
......@@ -984,6 +990,24 @@ class Assembler : public AssemblerBase {
void mulps(XMMRegister dst, XMMRegister src) { mulps(dst, Operand(src)); }
void divps(XMMRegister dst, const Operand& src);
void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); }
void minps(XMMRegister dst, XMMRegister src) { minps(dst, Operand(src)); }
void minps(XMMRegister dst, const Operand& src);
void maxps(XMMRegister dst, XMMRegister src) { maxps(dst, Operand(src)); }
void maxps(XMMRegister dst, const Operand& src);
void rcpps(XMMRegister dst, XMMRegister src) { rcpps(dst, Operand(src)); }
void rcpps(XMMRegister dst, const Operand& src);
void rsqrtps(XMMRegister dst, XMMRegister src) { rsqrtps(dst, Operand(src)); }
void rsqrtps(XMMRegister dst, const Operand& src);
void sqrtps(XMMRegister dst, XMMRegister src) { sqrtps(dst, Operand(src)); }
void sqrtps(XMMRegister dst, const Operand& src);
void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
void cmpeqps(XMMRegister dst, XMMRegister src);
void cmpltps(XMMRegister dst, XMMRegister src);
void cmpleps(XMMRegister dst, XMMRegister src);
void cmpneqps(XMMRegister dst, XMMRegister src);
void cmpnltps(XMMRegister dst, XMMRegister src);
void cmpnleps(XMMRegister dst, XMMRegister src);
// SSE2 instructions
void cvttss2si(Register dst, const Operand& src);
......@@ -1090,6 +1114,30 @@ class Assembler : public AssemblerBase {
}
void pinsrd(XMMRegister dst, const Operand& src, int8_t offset);
void paddd(XMMRegister dst, XMMRegister src) { paddd(dst, Operand(src)); }
void paddd(XMMRegister dst, const Operand& src);
void psubd(XMMRegister dst, XMMRegister src) { psubd(dst, Operand(src)); }
void psubd(XMMRegister dst, const Operand& src);
void pmuludq(XMMRegister dst, XMMRegister src) { pmuludq(dst, Operand(src)); }
void pmuludq(XMMRegister dst, const Operand& src);
void punpackldq(XMMRegister dst, XMMRegister src) {
punpackldq(dst, Operand(src));
}
void punpackldq(XMMRegister dst, const Operand& src);
void cvtps2dq(XMMRegister dst, XMMRegister src) {
cvtps2dq(dst, Operand(src));
}
void cvtps2dq(XMMRegister dst, const Operand& src);
void cvtdq2ps(XMMRegister dst, XMMRegister src) {
cvtdq2ps(dst, Operand(src));
}
void cvtdq2ps(XMMRegister dst, const Operand& src);
// SSE4.1 instructions
void insertps(XMMRegister dst, XMMRegister src, byte imm8);
void pmulld(XMMRegister dst, XMMRegister src) { pmulld(dst, Operand(src)); }
void pmulld(XMMRegister dst, const Operand& src);
// AVX instructions
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmadd132sd(dst, src1, Operand(src2));
......
......@@ -1385,7 +1385,20 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
case 0x0F:
{ byte f0byte = data[1];
const char* f0mnem = F0Mnem(f0byte);
if (f0byte == 0x18) {
if (f0byte == 0x10) {
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movups %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (f0byte == 0x11) {
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("movups ");
data += PrintRightXMMOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (f0byte == 0x18) {
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
......@@ -1428,28 +1441,16 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("ucomiss %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (f0byte >= 0x53 && f0byte <= 0x5F) {
} else if (f0byte >= 0x51 && f0byte <= 0x5F) {
const char* const pseudo_op[] = {
"rcpps",
"andps",
"andnps",
"orps",
"xorps",
"addps",
"mulps",
"cvtps2pd",
"cvtdq2ps",
"subps",
"minps",
"divps",
"maxps",
};
"sqrtps", "rsqrtps", "rcpps", "andps", "andnps",
"orps", "xorps", "addps", "mulps", "cvtps2pd",
"cvtdq2ps", "subps", "minps", "divps", "maxps"};
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("%s %s,",
pseudo_op[f0byte - 0x53],
AppendToBuffer("%s %s,", pseudo_op[f0byte - 0x51],
NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (f0byte == 0x50) {
......@@ -1460,6 +1461,17 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfCPURegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (f0byte == 0xC2) {
// Intel manual 2A, Table 3-11.
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
const char* const pseudo_op[] = {
"cmpeqps", "cmpltps", "cmpleps", "cmpunordps",
"cmpneqps", "cmpnltps", "cmpnleps", "cmpordps"};
AppendToBuffer("%s %s,%s", pseudo_op[data[1]],
NameOfXMMRegister(regop), NameOfXMMRegister(rm));
data += 2;
} else if (f0byte== 0xC6) {
// shufps xmm, xmm/m128, imm8
data += 2;
......@@ -1471,6 +1483,12 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
static_cast<int>(imm8));
data += 2;
} else if (f0byte == 0x5B) {
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("cvtdq2ps %s,", NameOfXMMRegister(rm));
data += PrintRightXMMOperand(data);
} else if ((f0byte & 0xF0) == 0x80) {
data += JumpConditional(data, branch_hint);
} else if (f0byte == 0xBE || f0byte == 0xBF || f0byte == 0xB6 ||
......@@ -1666,6 +1684,13 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x40) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pmulld %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data += PrintRightXMMOperand(data);
} else if (*data == 0x2A) {
// movntdqa
UnimplementedInstruction();
......@@ -1702,6 +1727,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0x21) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("insertps %s,%s,%d", NameOfXMMRegister(regop),
NameOfXMMRegister(rm), static_cast<int>(imm8));
data += 2;
} else if (*data == 0x17) {
data++;
int mod, regop, rm;
......@@ -1771,6 +1804,37 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x5B) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("cvtps2dq %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data += PrintRightXMMOperand(data);
} else if (*data == 0x62) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("punpackldq %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xF4) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pmuludq %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xFA) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("psubd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xFE) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("paddd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0x6E) {
data++;
int mod, regop, rm;
......
......@@ -391,6 +391,8 @@ TEST(DisasmIa320) {
// Move operation
__ movaps(xmm0, xmm1);
__ shufps(xmm0, xmm0, 0x0);
__ movups(xmm1, Operand(ebx, ecx, times_4, 10000));
__ movups(Operand(ebx, ecx, times_4, 10000), xmm1);
__ cvtsd2ss(xmm0, xmm1);
__ cvtsd2ss(xmm0, Operand(ebx, ecx, times_4, 10000));
......@@ -468,6 +470,55 @@ TEST(DisasmIa320) {
__ punpckldq(xmm1, xmm6);
__ punpckhdq(xmm7, xmm5);
__ paddd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ paddd(xmm1, xmm0);
__ psubd(xmm1, Operand(ebx, ecx, times_4, 10000));
__ psubd(xmm1, xmm0);
__ pmuludq(xmm1, Operand(ebx, ecx, times_4, 10000));
__ pmuludq(xmm1, xmm0);
__ punpackldq(xmm1, Operand(ebx, ecx, times_4, 10000));
__ punpackldq(xmm1, xmm0);
__ cvtdq2ps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ cvtdq2ps(xmm1, xmm0);
__ cvtps2dq(xmm1, Operand(ebx, ecx, times_4, 10000));
__ cvtps2dq(xmm1, xmm0);
}
{
__ andps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ andps(xmm1, xmm0);
__ xorps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ xorps(xmm1, xmm0);
__ orps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ orps(xmm1, xmm0);
__ addps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ addps(xmm1, xmm0);
__ subps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ subps(xmm1, xmm0);
__ mulps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ mulps(xmm1, xmm0);
__ divps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ divps(xmm1, xmm0);
__ minps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ minps(xmm1, xmm0);
__ maxps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ maxps(xmm1, xmm0);
__ rcpps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ rcpps(xmm1, xmm0);
__ rsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ rsqrtps(xmm1, xmm0);
__ sqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ sqrtps(xmm1, xmm0);
__ cmpps(xmm1, xmm0, 123);
__ cmpeqps(xmm1, xmm0);
__ cmpltps(xmm1, xmm0);
__ cmpleps(xmm1, xmm0);
__ cmpneqps(xmm1, xmm0);
__ cmpnltps(xmm1, xmm0);
__ cmpnleps(xmm1, xmm0);
}
// cmov.
......@@ -496,6 +547,9 @@ TEST(DisasmIa320) {
__ pextrd(eax, xmm0, 1);
__ pinsrd(xmm1, eax, 0);
__ extractps(eax, xmm1, 0);
__ insertps(xmm1, xmm0, 0);
__ pmulld(xmm1, Operand(ebx, ecx, times_4, 10000));
__ pmulld(xmm1, xmm0);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment