Commit 4f3d27e6 authored by weiliang.lin's avatar weiliang.lin Committed by Commit bot

[ia32] Introduce FMA3 instructions on scalar data elements.

port 83a635e0

BUG=

Review URL: https://codereview.chromium.org/773783002

Cr-Commit-Position: refs/heads/master@{#25619}
parent 4fcc2dca
......@@ -2443,6 +2443,71 @@ void Assembler::pinsrd(XMMRegister dst, const Operand& src, int8_t offset) {
}
void Assembler::addss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0xF3);
EMIT(0x0F);
EMIT(0x58);
emit_sse_operand(dst, src);
}
void Assembler::subss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0xF3);
EMIT(0x0F);
EMIT(0x5C);
emit_sse_operand(dst, src);
}
void Assembler::mulss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0xF3);
EMIT(0x0F);
EMIT(0x59);
emit_sse_operand(dst, src);
}
void Assembler::divss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0xF3);
EMIT(0x0F);
EMIT(0x5E);
emit_sse_operand(dst, src);
}
void Assembler::ucomiss(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0x0f);
EMIT(0x2e);
emit_sse_operand(dst, src);
}
// AVX instructions
void Assembler::vfmasd(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kLIG, k66, k0F38, kW1);
EMIT(op);
emit_sse_operand(dst, src2);
}
void Assembler::vfmass(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(FMA3));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kLIG, k66, k0F38, kW0);
EMIT(op);
emit_sse_operand(dst, src2);
}
void Assembler::vsd(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2) {
DCHECK(IsEnabled(AVX));
......
......@@ -928,6 +928,17 @@ class Assembler : public AssemblerBase {
void cpuid();
// SSE instructions
void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); }
void addss(XMMRegister dst, const Operand& src);
void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); }
void subss(XMMRegister dst, const Operand& src);
void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); }
void mulss(XMMRegister dst, const Operand& src);
void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); }
void divss(XMMRegister dst, const Operand& src);
void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
void ucomiss(XMMRegister dst, const Operand& src);
void movaps(XMMRegister dst, XMMRegister src);
void shufps(XMMRegister dst, XMMRegister src, byte imm8);
......@@ -1053,6 +1064,154 @@ class Assembler : public AssemblerBase {
void movntdq(const Operand& dst, XMMRegister src);
// AVX instructions
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmadd132sd(dst, src1, Operand(src2));
}
void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmadd213sd(dst, src1, Operand(src2));
}
void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmadd231sd(dst, src1, Operand(src2));
}
void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0x99, dst, src1, src2);
}
void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xa9, dst, src1, src2);
}
void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xb9, dst, src1, src2);
}
void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmsub132sd(dst, src1, Operand(src2));
}
void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmsub213sd(dst, src1, Operand(src2));
}
void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmsub231sd(dst, src1, Operand(src2));
}
void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0x9b, dst, src1, src2);
}
void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xab, dst, src1, src2);
}
void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xbb, dst, src1, src2);
}
void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmadd132sd(dst, src1, Operand(src2));
}
void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmadd213sd(dst, src1, Operand(src2));
}
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmadd231sd(dst, src1, Operand(src2));
}
void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0x9d, dst, src1, src2);
}
void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xad, dst, src1, src2);
}
void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xbd, dst, src1, src2);
}
void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmsub132sd(dst, src1, Operand(src2));
}
void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmsub213sd(dst, src1, Operand(src2));
}
void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmsub231sd(dst, src1, Operand(src2));
}
void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0x9f, dst, src1, src2);
}
void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xaf, dst, src1, src2);
}
void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmasd(0xbf, dst, src1, src2);
}
void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmadd132ss(dst, src1, Operand(src2));
}
void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmadd213ss(dst, src1, Operand(src2));
}
void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmadd231ss(dst, src1, Operand(src2));
}
void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0x99, dst, src1, src2);
}
void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xa9, dst, src1, src2);
}
void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xb9, dst, src1, src2);
}
void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmsub132ss(dst, src1, Operand(src2));
}
void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmsub213ss(dst, src1, Operand(src2));
}
void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmsub231ss(dst, src1, Operand(src2));
}
void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0x9b, dst, src1, src2);
}
void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xab, dst, src1, src2);
}
void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xbb, dst, src1, src2);
}
void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmadd132ss(dst, src1, Operand(src2));
}
void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmadd213ss(dst, src1, Operand(src2));
}
void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmadd231ss(dst, src1, Operand(src2));
}
void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0x9d, dst, src1, src2);
}
void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xad, dst, src1, src2);
}
void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xbd, dst, src1, src2);
}
void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmsub132ss(dst, src1, Operand(src2));
}
void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmsub213ss(dst, src1, Operand(src2));
}
void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfnmsub231ss(dst, src1, Operand(src2));
}
void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0x9f, dst, src1, src2);
}
void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xaf, dst, src1, src2);
}
void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vfmass(0xbf, dst, src1, src2);
}
void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
void vaddsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vaddsd(dst, src1, Operand(src2));
}
......
......@@ -319,7 +319,7 @@ class DisassemblerIA32 {
bool vex_w() {
if (vex_byte0_ == 0xc5) return false;
return (vex_byte2_ & 0x80) == 1;
return (vex_byte2_ & 0x80) != 0;
}
bool vex_0f() {
......@@ -740,7 +740,74 @@ int DisassemblerIA32::CMov(byte* data) {
int DisassemblerIA32::AVXInstruction(byte* data) {
byte opcode = *data;
byte* current = data + 1;
if (vex_f2() && vex_0f()) {
if (vex_66() && vex_0f38()) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x99:
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xa9:
AppendToBuffer("vfmadd213s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xb9:
AppendToBuffer("vfmadd231s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x9b:
AppendToBuffer("vfmsub132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xab:
AppendToBuffer("vfmsub213s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xbb:
AppendToBuffer("vfmsub231s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x9d:
AppendToBuffer("vfnmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xad:
AppendToBuffer("vfnmadd213s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xbd:
AppendToBuffer("vfnmadd231s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x9f:
AppendToBuffer("vfnmsub132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xaf:
AppendToBuffer("vfnmsub213s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0xbf:
AppendToBuffer("vfnmsub231s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
default:
UnimplementedInstruction();
}
} else if (vex_f2() && vex_0f()) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
......@@ -1159,6 +1226,12 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (f0byte == 0x2e) {
data += 2;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("ucomiss %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (f0byte >= 0x53 && f0byte <= 0x5F) {
const char* const pseudo_op[] = {
"rcpps",
......@@ -1729,12 +1802,36 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("cvttss2si %s,", NameOfCPURegister(regop));
data += PrintRightXMMOperand(data);
} else if (b2 == 0x58) {
data += 3;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("addss %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (b2 == 0x59) {
data += 3;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("mulss %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (b2 == 0x5A) {
data += 3;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("cvtss2sd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (b2 == 0x5c) {
data += 3;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("subss %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (b2 == 0x5e) {
data += 3;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("divss %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (b2 == 0x6F) {
data += 3;
int mod, regop, rm;
......
This diff is collapsed.
......@@ -51,7 +51,7 @@ TEST(DisasmIa320) {
CcTest::InitializeVM();
Isolate* isolate = CcTest::i_isolate();
HandleScope scope(isolate);
v8::internal::byte buffer[2048];
v8::internal::byte buffer[4096];
Assembler assm(isolate, buffer, sizeof buffer);
DummyStaticFunction(NULL); // just bloody use it (DELETE; debugging)
......@@ -401,6 +401,14 @@ TEST(DisasmIa320) {
__ xorps(xmm0, Operand(ebx, ecx, times_4, 10000));
// Arithmetic operation
__ addss(xmm1, xmm0);
__ addss(xmm1, Operand(ebx, ecx, times_4, 10000));
__ mulss(xmm1, xmm0);
__ mulss(xmm1, Operand(ebx, ecx, times_4, 10000));
__ subss(xmm1, xmm0);
__ subss(xmm1, Operand(ebx, ecx, times_4, 10000));
__ divss(xmm1, xmm0);
__ divss(xmm1, Operand(ebx, ecx, times_4, 10000));
__ addps(xmm1, xmm0);
__ addps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ subps(xmm1, xmm0);
......@@ -409,6 +417,9 @@ TEST(DisasmIa320) {
__ mulps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ divps(xmm1, xmm0);
__ divps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ ucomiss(xmm0, xmm1);
__ ucomiss(xmm0, Operand(ebx, ecx, times_4, 10000));
}
{
__ cvttss2si(edx, Operand(ebx, ecx, times_4, 10000));
......@@ -486,6 +497,68 @@ TEST(DisasmIa320) {
}
}
// FMA3 instruction
{
if (CpuFeatures::IsSupported(FMA3)) {
CpuFeatureScope scope(&assm, FMA3);
__ vfmadd132sd(xmm0, xmm1, xmm2);
__ vfmadd132sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmadd213sd(xmm0, xmm1, xmm2);
__ vfmadd213sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmadd231sd(xmm0, xmm1, xmm2);
__ vfmadd231sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmsub132sd(xmm0, xmm1, xmm2);
__ vfmsub132sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmsub213sd(xmm0, xmm1, xmm2);
__ vfmsub213sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmsub231sd(xmm0, xmm1, xmm2);
__ vfmsub231sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmadd132sd(xmm0, xmm1, xmm2);
__ vfnmadd132sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmadd213sd(xmm0, xmm1, xmm2);
__ vfnmadd213sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmadd231sd(xmm0, xmm1, xmm2);
__ vfnmadd231sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmsub132sd(xmm0, xmm1, xmm2);
__ vfnmsub132sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmsub213sd(xmm0, xmm1, xmm2);
__ vfnmsub213sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmsub231sd(xmm0, xmm1, xmm2);
__ vfnmsub231sd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmadd132ss(xmm0, xmm1, xmm2);
__ vfmadd132ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmadd213ss(xmm0, xmm1, xmm2);
__ vfmadd213ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmadd231ss(xmm0, xmm1, xmm2);
__ vfmadd231ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmsub132ss(xmm0, xmm1, xmm2);
__ vfmsub132ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmsub213ss(xmm0, xmm1, xmm2);
__ vfmsub213ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfmsub231ss(xmm0, xmm1, xmm2);
__ vfmsub231ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmadd132ss(xmm0, xmm1, xmm2);
__ vfnmadd132ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmadd213ss(xmm0, xmm1, xmm2);
__ vfnmadd213ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmadd231ss(xmm0, xmm1, xmm2);
__ vfnmadd231ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmsub132ss(xmm0, xmm1, xmm2);
__ vfnmsub132ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmsub213ss(xmm0, xmm1, xmm2);
__ vfnmsub213ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vfnmsub231ss(xmm0, xmm1, xmm2);
__ vfnmsub231ss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
}
}
// xchg.
{
__ xchg(eax, eax);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment