Commit 3800ba6c authored by jing.bao's avatar jing.bao Committed by Commit bot

[ia32] Add some SSE2, SSE4 instructions and AVX version for SIMD

Add instructions in following format:
instr(xmm, xmm/mem)
vinstr(xmm, xmm, xmm/mem)

BUG=

Review-Url: https://codereview.chromium.org/2744643004
Cr-Commit-Position: refs/heads/master@{#43760}
parent 118c376f
......@@ -2341,33 +2341,6 @@ void Assembler::movmskps(Register dst, XMMRegister src) {
}
void Assembler::pcmpeqd(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x76);
emit_sse_operand(dst, src);
}
void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x62);
emit_sse_operand(dst, src);
}
void Assembler::punpckhdq(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x6A);
emit_sse_operand(dst, src);
}
void Assembler::maxsd(XMMRegister dst, const Operand& src) {
EnsureSpace ensure_space(this);
EMIT(0xF2);
......@@ -2548,33 +2521,6 @@ void Assembler::extractps(Register dst, XMMRegister src, byte imm8) {
}
void Assembler::pand(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xDB);
emit_sse_operand(dst, src);
}
void Assembler::pxor(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xEF);
emit_sse_operand(dst, src);
}
void Assembler::por(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xEB);
emit_sse_operand(dst, src);
}
void Assembler::ptest(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
......@@ -2888,6 +2834,17 @@ void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix,
emit_sse_operand(dst, src);
}
void Assembler::sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(prefix);
EMIT(escape1);
EMIT(escape2);
EMIT(opcode);
emit_sse_operand(dst, src);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) {
......
......@@ -1027,10 +1027,6 @@ class Assembler : public AssemblerBase {
void movmskps(Register dst, XMMRegister src);
void cmpltsd(XMMRegister dst, XMMRegister src);
void pcmpeqd(XMMRegister dst, XMMRegister src);
void punpckldq(XMMRegister dst, XMMRegister src);
void punpckhdq(XMMRegister dst, XMMRegister src);
void maxsd(XMMRegister dst, XMMRegister src) { maxsd(dst, Operand(src)); }
void maxsd(XMMRegister dst, const Operand& src);
......@@ -1063,9 +1059,6 @@ class Assembler : public AssemblerBase {
void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
void extractps(Register dst, XMMRegister src, byte imm8);
void pand(XMMRegister dst, XMMRegister src);
void pxor(XMMRegister dst, XMMRegister src);
void por(XMMRegister dst, XMMRegister src);
void ptest(XMMRegister dst, XMMRegister src);
void pslld(XMMRegister reg, int8_t shift);
......@@ -1445,6 +1438,31 @@ class Assembler : public AssemblerBase {
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
#undef DECLARE_SSE2_AVX_INSTRUCTION
#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
instruction(dst, Operand(src)); \
} \
void instruction(XMMRegister dst, const Operand& src) { \
sse4_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
}
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
#undef DECLARE_SSE4_INSTRUCTION
#define DECLARE_SSE4_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
v##instruction(dst, src1, Operand(src2)); \
} \
void v##instruction(XMMRegister dst, XMMRegister src1, \
const Operand& src2) { \
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
}
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_AVX_INSTRUCTION)
#undef DECLARE_SSE4_AVX_INSTRUCTION
// Prefetch src position into cache level.
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
// non-temporal
......@@ -1577,6 +1595,8 @@ class Assembler : public AssemblerBase {
void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
byte opcode);
void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
SIMDPrefix pp, LeadingOpcode m, VexW w);
// Most BMI instructions are similiar.
......
......@@ -802,6 +802,17 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
current += PrintRightOperand(current);
AppendToBuffer(",%s", NameOfCPURegister(vvvv));
break;
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, \
opcode) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
NameOfXMMRegister(vvvv)); \
current += PrintRightXMMOperand(current); \
break; \
}
SSE4_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE
default:
UnimplementedInstruction();
}
......@@ -1696,19 +1707,27 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data++;
if (*data == 0x38) {
data++;
if (*data == 0x17) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("ptest %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x2A) {
// movntdqa
UnimplementedInstruction();
} else {
UnimplementedInstruction();
byte op = *data;
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
switch (op) {
case 0x17:
AppendToBuffer("ptest %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
break;
#define SSE4_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
case 0x##opcode: { \
AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \
data += PrintRightXMMOperand(data); \
break; \
}
SSE4_INSTRUCTION_LIST(SSE4_DIS_CASE)
#undef SSE4_DIS_CASE
default:
UnimplementedInstruction();
}
} else if (*data == 0x3A) {
data++;
......@@ -1831,28 +1850,6 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(rm),
static_cast<int>(imm8));
data += 2;
} else if (*data == 0x62) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("punpckldq %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x6A) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("punpckhdq %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x76) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pcmpeqd %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0x90) {
data++;
AppendToBuffer("nop"); // 2 byte nop.
......@@ -1914,14 +1911,6 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data += PrintRightOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
data++;
} else if (*data == 0xDB) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pand %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xE7) {
data++;
int mod, regop, rm;
......@@ -1932,39 +1921,27 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
} else {
UnimplementedInstruction();
}
} else if (*data == 0xEF) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pxor %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xEB) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("por %s,%s",
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xFA) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("psubd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xFE) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("paddd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xB1) {
data++;
data += PrintOperands("cmpxchg_w", OPER_REG_OP_ORDER, data);
} else {
UnimplementedInstruction();
byte op = *data;
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
switch (op) {
#define SSE2_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
case 0x##opcode: { \
AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \
data += PrintRightXMMOperand(data); \
break; \
}
SSE2_INSTRUCTION_LIST(SSE2_DIS_CASE)
#undef SSE2_DIS_CASE
default:
UnimplementedInstruction();
}
}
} else {
UnimplementedInstruction();
......
......@@ -6,7 +6,52 @@
#define V8_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \
V(paddb, 66, 0F, FC) \
V(paddw, 66, 0F, FD) \
V(paddd, 66, 0F, FE) \
V(psubd, 66, 0F, FA)
V(paddsb, 66, 0F, EC) \
V(paddsw, 66, 0F, ED) \
V(paddusb, 66, 0F, DC) \
V(paddusw, 66, 0F, DD) \
V(pand, 66, 0F, DB) \
V(pcmpeqb, 66, 0F, 74) \
V(pcmpeqw, 66, 0F, 75) \
V(pcmpeqd, 66, 0F, 76) \
V(pcmpgtb, 66, 0F, 64) \
V(pcmpgtw, 66, 0F, 65) \
V(pcmpgtd, 66, 0F, 66) \
V(pmaxsw, 66, 0F, EE) \
V(pmaxub, 66, 0F, DE) \
V(pminsw, 66, 0F, EA) \
V(pminub, 66, 0F, DA) \
V(pmullw, 66, 0F, D5) \
V(por, 66, 0F, EB) \
V(psllw, 66, 0F, F1) \
V(pslld, 66, 0F, F2) \
V(psraw, 66, 0F, E1) \
V(psrad, 66, 0F, E2) \
V(psrlw, 66, 0F, D1) \
V(psrld, 66, 0F, D2) \
V(psubb, 66, 0F, F8) \
V(psubw, 66, 0F, F9) \
V(psubd, 66, 0F, FA) \
V(psubsb, 66, 0F, E8) \
V(psubsw, 66, 0F, E9) \
V(psubusb, 66, 0F, D8) \
V(psubusw, 66, 0F, D9) \
V(punpckhdq, 66, 0F, 6A) \
V(punpckldq, 66, 0F, 62) \
V(pxor, 66, 0F, EF)
#define SSE4_INSTRUCTION_LIST(V) \
V(pminsb, 66, 0F, 38, 38) \
V(pminsd, 66, 0F, 38, 39) \
V(pminuw, 66, 0F, 38, 3A) \
V(pminud, 66, 0F, 38, 3B) \
V(pmaxsb, 66, 0F, 38, 3C) \
V(pmaxsd, 66, 0F, 38, 3D) \
V(pmaxuw, 66, 0F, 38, 3E) \
V(pmaxud, 66, 0F, 38, 3F) \
V(pmulld, 66, 0F, 38, 40)
#endif // V8_SSE_INSTR_H_
......@@ -462,12 +462,6 @@ TEST(DisasmIa320) {
__ psllq(xmm0, xmm1);
__ psrlq(xmm0, 17);
__ psrlq(xmm0, xmm1);
__ por(xmm0, xmm1);
__ pcmpeqd(xmm1, xmm0);
__ punpckldq(xmm1, xmm6);
__ punpckhdq(xmm7, xmm5);
__ pinsrw(xmm5, edx, 5);
__ pinsrw(xmm5, Operand(edx, 4), 5);
......@@ -506,6 +500,13 @@ TEST(DisasmIa320) {
__ pextrd(eax, xmm0, 1);
__ pinsrd(xmm1, eax, 0);
__ extractps(eax, xmm1, 0);
#define EMIT_SSE4_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE4_INSTR)
#undef EMIT_SSE4_INSTR
}
}
......@@ -555,6 +556,14 @@ TEST(DisasmIa320) {
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
#undef EMIT_SSE2_AVXINSTR
#define EMIT_SSE4_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3, \
notUsed4) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE4_AVXINSTR)
#undef EMIT_SSE4_AVXINSTR
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment