Commit 3800ba6c authored by jing.bao's avatar jing.bao Committed by Commit bot

[ia32] Add some SSE2, SSE4 instructions and AVX version for SIMD

Add instructions in following format:
instr(xmm, xmm/mem)
vinstr(xmm, xmm, xmm/mem)

BUG=

Review-Url: https://codereview.chromium.org/2744643004
Cr-Commit-Position: refs/heads/master@{#43760}
parent 118c376f
...@@ -2341,33 +2341,6 @@ void Assembler::movmskps(Register dst, XMMRegister src) { ...@@ -2341,33 +2341,6 @@ void Assembler::movmskps(Register dst, XMMRegister src) {
} }
void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x76);
emit_sse_operand(dst, src);
}
void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x62);
emit_sse_operand(dst, src);
}
void Assembler::punpckhdq(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x6A);
emit_sse_operand(dst, src);
}
void Assembler::maxsd(XMMRegister dst, const Operand& src) { void Assembler::maxsd(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
EMIT(0xF2); EMIT(0xF2);
...@@ -2548,33 +2521,6 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) { ...@@ -2548,33 +2521,6 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
} }
void Assembler::pand(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xDB);
emit_sse_operand(dst, src);
}
void Assembler::pxor(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xEF);
emit_sse_operand(dst, src);
}
void Assembler::por(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xEB);
emit_sse_operand(dst, src);
}
void Assembler::ptest(XMMRegister dst, XMMRegister src) { void Assembler::ptest(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(SSE4_1)); DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
...@@ -2888,6 +2834,17 @@ void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix, ...@@ -2888,6 +2834,17 @@ void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix,
emit_sse_operand(dst, src); emit_sse_operand(dst, src);
} }
void Assembler::sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(prefix);
EMIT(escape1);
EMIT(escape2);
EMIT(opcode);
emit_sse_operand(dst, src);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2, SIMDPrefix pp, LeadingOpcode m, const Operand& src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) { VexW w) {
......
...@@ -1027,10 +1027,6 @@ class Assembler : public AssemblerBase { ...@@ -1027,10 +1027,6 @@ class Assembler : public AssemblerBase {
void movmskps(Register dst, XMMRegister src); void movmskps(Register dst, XMMRegister src);
void cmpltsd(XMMRegister dst, XMMRegister src); void cmpltsd(XMMRegister dst, XMMRegister src);
void pcmpeqd(XMMRegister dst, XMMRegister src);
void punpckldq(XMMRegister dst, XMMRegister src);
void punpckhdq(XMMRegister dst, XMMRegister src);
void maxsd(XMMRegister dst, XMMRegister src) { maxsd(dst, Operand(src)); } void maxsd(XMMRegister dst, XMMRegister src) { maxsd(dst, Operand(src)); }
void maxsd(XMMRegister dst, const Operand& src); void maxsd(XMMRegister dst, const Operand& src);
...@@ -1063,9 +1059,6 @@ class Assembler : public AssemblerBase { ...@@ -1063,9 +1059,6 @@ class Assembler : public AssemblerBase {
void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); } void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
void extractps(Register dst, XMMRegister src, byte imm8); void extractps(Register dst, XMMRegister src, byte imm8);
void pand(XMMRegister dst, XMMRegister src);
void pxor(XMMRegister dst, XMMRegister src);
void por(XMMRegister dst, XMMRegister src);
void ptest(XMMRegister dst, XMMRegister src); void ptest(XMMRegister dst, XMMRegister src);
void pslld(XMMRegister reg, int8_t shift); void pslld(XMMRegister reg, int8_t shift);
...@@ -1445,6 +1438,31 @@ class Assembler : public AssemblerBase { ...@@ -1445,6 +1438,31 @@ class Assembler : public AssemblerBase {
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION) SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
#undef DECLARE_SSE2_AVX_INSTRUCTION #undef DECLARE_SSE2_AVX_INSTRUCTION
#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
instruction(dst, Operand(src)); \
} \
void instruction(XMMRegister dst, const Operand& src) { \
sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
}
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
#undef DECLARE_SSE4_INSTRUCTION
#define DECLARE_SSE4_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
v##instruction(dst, src1, Operand(src2)); \
} \
void v##instruction(XMMRegister dst, XMMRegister src1, \
const Operand& src2) { \
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
}
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_AVX_INSTRUCTION)
#undef DECLARE_SSE4_AVX_INSTRUCTION
// Prefetch src position into cache level. // Prefetch src position into cache level.
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
// non-temporal // non-temporal
...@@ -1577,6 +1595,8 @@ class Assembler : public AssemblerBase { ...@@ -1577,6 +1595,8 @@ class Assembler : public AssemblerBase {
void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape, void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
byte opcode); byte opcode);
void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2, void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
SIMDPrefix pp, LeadingOpcode m, VexW w); SIMDPrefix pp, LeadingOpcode m, VexW w);
// Most BMI instructions are similiar. // Most BMI instructions are similiar.
......
...@@ -802,6 +802,17 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -802,6 +802,17 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
current += PrintRightOperand(current); current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfCPURegister(vvvv)); AppendToBuffer(",%s", NameOfCPURegister(vvvv));
break; break;
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, \
opcode) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
NameOfXMMRegister(vvvv)); \
current += PrintRightXMMOperand(current); \
break; \
}
SSE4_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE
default: default:
UnimplementedInstruction(); UnimplementedInstruction();
} }
...@@ -1696,19 +1707,27 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -1696,19 +1707,27 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data++; data++;
if (*data == 0x38) { if (*data == 0x38) {
data++; data++;
if (*data == 0x17) { byte op = *data;
data++; data++;
int mod, regop, rm; int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm); get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("ptest %s,%s", switch (op) {
NameOfXMMRegister(regop), case 0x17:
NameOfXMMRegister(rm)); AppendToBuffer("ptest %s,%s", NameOfXMMRegister(regop),
data++; NameOfXMMRegister(rm));
} else if (*data == 0x2A) { data++;
// movntdqa break;
UnimplementedInstruction(); #define SSE4_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
} else { case 0x##opcode: { \
UnimplementedInstruction(); AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \
data += PrintRightXMMOperand(data); \
break; \
}
SSE4_INSTRUCTION_LIST(SSE4_DIS_CASE)
#undef SSE4_DIS_CASE
default:
UnimplementedInstruction();
} }
} else if (*data == 0x3A) { } else if (*data == 0x3A) {
data++; data++;
...@@ -1831,28 +1850,6 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -1831,28 +1850,6 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(rm), NameOfXMMRegister(rm),
static_cast<int>(imm8)); static_cast<int>(imm8));
data += 2; data += 2;
} else if (*data == 0x62) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("punpckldq %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x6A) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("punpckhdq %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x76) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pcmpeqd %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x90) { } else if (*data == 0x90) {
data++; data++;
AppendToBuffer("nop"); // 2 byte nop. AppendToBuffer("nop"); // 2 byte nop.
...@@ -1914,14 +1911,6 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -1914,14 +1911,6 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data += PrintRightOperand(data); data += PrintRightOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data)); AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
data++; data++;
} else if (*data == 0xDB) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pand %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xE7) { } else if (*data == 0xE7) {
data++; data++;
int mod, regop, rm; int mod, regop, rm;
...@@ -1932,39 +1921,27 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -1932,39 +1921,27 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
} else { } else {
UnimplementedInstruction(); UnimplementedInstruction();
} }
} else if (*data == 0xEF) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pxor %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xEB) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("por %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xFA) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("psubd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xFE) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("paddd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xB1) { } else if (*data == 0xB1) {
data++; data++;
data += PrintOperands("cmpxchg_w", OPER_REG_OP_ORDER, data); data += PrintOperands("cmpxchg_w", OPER_REG_OP_ORDER, data);
} else { } else {
UnimplementedInstruction(); byte op = *data;
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
switch (op) {
#define SSE2_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
case 0x##opcode: { \
AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \
data += PrintRightXMMOperand(data); \
break; \
}
SSE2_INSTRUCTION_LIST(SSE2_DIS_CASE)
#undef SSE2_DIS_CASE
default:
UnimplementedInstruction();
}
} }
} else { } else {
UnimplementedInstruction(); UnimplementedInstruction();
......
...@@ -6,7 +6,52 @@ ...@@ -6,7 +6,52 @@
#define V8_SSE_INSTR_H_ #define V8_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \ #define SSE2_INSTRUCTION_LIST(V) \
V(paddb, 66, 0F, FC) \
V(paddw, 66, 0F, FD) \
V(paddd, 66, 0F, FE) \ V(paddd, 66, 0F, FE) \
V(psubd, 66, 0F, FA) V(paddsb, 66, 0F, EC) \
V(paddsw, 66, 0F, ED) \
V(paddusb, 66, 0F, DC) \
V(paddusw, 66, 0F, DD) \
V(pand, 66, 0F, DB) \
V(pcmpeqb, 66, 0F, 74) \
V(pcmpeqw, 66, 0F, 75) \
V(pcmpeqd, 66, 0F, 76) \
V(pcmpgtb, 66, 0F, 64) \
V(pcmpgtw, 66, 0F, 65) \
V(pcmpgtd, 66, 0F, 66) \
V(pmaxsw, 66, 0F, EE) \
V(pmaxub, 66, 0F, DE) \
V(pminsw, 66, 0F, EA) \
V(pminub, 66, 0F, DA) \
V(pmullw, 66, 0F, D5) \
V(por, 66, 0F, EB) \
V(psllw, 66, 0F, F1) \
V(pslld, 66, 0F, F2) \
V(psraw, 66, 0F, E1) \
V(psrad, 66, 0F, E2) \
V(psrlw, 66, 0F, D1) \
V(psrld, 66, 0F, D2) \
V(psubb, 66, 0F, F8) \
V(psubw, 66, 0F, F9) \
V(psubd, 66, 0F, FA) \
V(psubsb, 66, 0F, E8) \
V(psubsw, 66, 0F, E9) \
V(psubusb, 66, 0F, D8) \
V(psubusw, 66, 0F, D9) \
V(punpckhdq, 66, 0F, 6A) \
V(punpckldq, 66, 0F, 62) \
V(pxor, 66, 0F, EF)
#define SSE4_INSTRUCTION_LIST(V) \
V(pminsb, 66, 0F, 38, 38) \
V(pminsd, 66, 0F, 38, 39) \
V(pminuw, 66, 0F, 38, 3A) \
V(pminud, 66, 0F, 38, 3B) \
V(pmaxsb, 66, 0F, 38, 3C) \
V(pmaxsd, 66, 0F, 38, 3D) \
V(pmaxuw, 66, 0F, 38, 3E) \
V(pmaxud, 66, 0F, 38, 3F) \
V(pmulld, 66, 0F, 38, 40)
#endif // V8_SSE_INSTR_H_ #endif // V8_SSE_INSTR_H_
...@@ -462,12 +462,6 @@ TEST(DisasmIa320) { ...@@ -462,12 +462,6 @@ TEST(DisasmIa320) {
__ psllq(xmm0, xmm1); __ psllq(xmm0, xmm1);
__ psrlq(xmm0, 17); __ psrlq(xmm0, 17);
__ psrlq(xmm0, xmm1); __ psrlq(xmm0, xmm1);
__ por(xmm0, xmm1);
__ pcmpeqd(xmm1, xmm0);
__ punpckldq(xmm1, xmm6);
__ punpckhdq(xmm7, xmm5);
__ pinsrw(xmm5, edx, 5); __ pinsrw(xmm5, edx, 5);
__ pinsrw(xmm5, Operand(edx, 4), 5); __ pinsrw(xmm5, Operand(edx, 4), 5);
...@@ -506,6 +500,13 @@ TEST(DisasmIa320) { ...@@ -506,6 +500,13 @@ TEST(DisasmIa320) {
__ pextrd(eax, xmm0, 1); __ pextrd(eax, xmm0, 1);
__ pinsrd(xmm1, eax, 0); __ pinsrd(xmm1, eax, 0);
__ extractps(eax, xmm1, 0); __ extractps(eax, xmm1, 0);
#define EMIT_SSE4_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE4_INSTR)
#undef EMIT_SSE4_INSTR
} }
} }
...@@ -555,6 +556,14 @@ TEST(DisasmIa320) { ...@@ -555,6 +556,14 @@ TEST(DisasmIa320) {
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR) SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
#undef EMIT_SSE2_AVXINSTR #undef EMIT_SSE2_AVXINSTR
#define EMIT_SSE4_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3, \
notUsed4) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE4_AVXINSTR)
#undef EMIT_SSE4_AVXINSTR
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment