Commit 9359dc4d authored by jing.bao's avatar jing.bao Committed by Commit Bot

[ia32] Add psignb/w/d and AVX version

Reconstruct pshufb using macro

Bug: 
Change-Id: I5556ce1108378fc7a7658443cd09c3f676c16aa7
Reviewed-on: https://chromium-review.googlesource.com/603907Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Bill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#47228}
parent fc574b16
...@@ -2707,16 +2707,6 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister src) { ...@@ -2707,16 +2707,6 @@ void Assembler::psrlq(XMMRegister dst, XMMRegister src) {
emit_sse_operand(dst, src); emit_sse_operand(dst, src);
} }
void Assembler::pshufb(XMMRegister dst, const Operand& src) {
DCHECK(IsEnabled(SSSE3));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x38);
EMIT(0x00);
emit_sse_operand(dst, src);
}
void Assembler::pshuflw(XMMRegister dst, const Operand& src, uint8_t shuffle) { void Assembler::pshuflw(XMMRegister dst, const Operand& src, uint8_t shuffle) {
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
EMIT(0xF2); EMIT(0xF2);
...@@ -3070,6 +3060,17 @@ void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix, ...@@ -3070,6 +3060,17 @@ void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix,
emit_sse_operand(dst, src); emit_sse_operand(dst, src);
} }
void Assembler::ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode) {
DCHECK(IsEnabled(SSSE3));
EnsureSpace ensure_space(this);
EMIT(prefix);
EMIT(escape1);
EMIT(escape2);
EMIT(opcode);
emit_sse_operand(dst, src);
}
void Assembler::sse4_instr(XMMRegister dst, const Operand& src, byte prefix, void Assembler::sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode) { byte escape1, byte escape2, byte opcode) {
DCHECK(IsEnabled(SSE4_1)); DCHECK(IsEnabled(SSE4_1));
......
...@@ -1141,9 +1141,6 @@ class Assembler : public AssemblerBase { ...@@ -1141,9 +1141,6 @@ class Assembler : public AssemblerBase {
void psrlq(XMMRegister reg, int8_t shift); void psrlq(XMMRegister reg, int8_t shift);
void psrlq(XMMRegister dst, XMMRegister src); void psrlq(XMMRegister dst, XMMRegister src);
// pshufb is SSSE3 instruction
void pshufb(XMMRegister dst, XMMRegister src) { pshufb(dst, Operand(src)); }
void pshufb(XMMRegister dst, const Operand& src);
void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
pshuflw(dst, Operand(src), shuffle); pshuflw(dst, Operand(src), shuffle);
} }
...@@ -1423,12 +1420,6 @@ class Assembler : public AssemblerBase { ...@@ -1423,12 +1420,6 @@ class Assembler : public AssemblerBase {
void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8); void vpsraw(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8); void vpsrad(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpshufb(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vpshufb(dst, src1, Operand(src2));
}
void vpshufb(XMMRegister dst, XMMRegister src1, const Operand& src2) {
vinstr(0x00, dst, src1, src2, k66, k0F38, kW0);
}
void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { void vpshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
vpshuflw(dst, Operand(src), shuffle); vpshuflw(dst, Operand(src), shuffle);
} }
...@@ -1647,6 +1638,18 @@ class Assembler : public AssemblerBase { ...@@ -1647,6 +1638,18 @@ class Assembler : public AssemblerBase {
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION) SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
#undef DECLARE_SSE2_AVX_INSTRUCTION #undef DECLARE_SSE2_AVX_INSTRUCTION
#define DECLARE_SSSE3_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
instruction(dst, Operand(src)); \
} \
void instruction(XMMRegister dst, const Operand& src) { \
ssse3_instr(dst, src, 0x##prefix, 0x##escape1, 0x##escape2, 0x##opcode); \
}
SSSE3_INSTRUCTION_LIST(DECLARE_SSSE3_INSTRUCTION)
#undef DECLARE_SSSE3_INSTRUCTION
#define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \ #define DECLARE_SSE4_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \ opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \ void instruction(XMMRegister dst, XMMRegister src) { \
...@@ -1659,8 +1662,8 @@ class Assembler : public AssemblerBase { ...@@ -1659,8 +1662,8 @@ class Assembler : public AssemblerBase {
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION) SSE4_INSTRUCTION_LIST(DECLARE_SSE4_INSTRUCTION)
#undef DECLARE_SSE4_INSTRUCTION #undef DECLARE_SSE4_INSTRUCTION
#define DECLARE_SSE4_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \ #define DECLARE_SSE34_AVX_INSTRUCTION(instruction, prefix, escape1, escape2, \
opcode) \ opcode) \
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \ void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
v##instruction(dst, src1, Operand(src2)); \ v##instruction(dst, src1, Operand(src2)); \
} \ } \
...@@ -1669,8 +1672,9 @@ class Assembler : public AssemblerBase { ...@@ -1669,8 +1672,9 @@ class Assembler : public AssemblerBase {
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \ vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape1##escape2, kW0); \
} }
SSE4_INSTRUCTION_LIST(DECLARE_SSE4_AVX_INSTRUCTION) SSSE3_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
#undef DECLARE_SSE4_AVX_INSTRUCTION SSE4_INSTRUCTION_LIST(DECLARE_SSE34_AVX_INSTRUCTION)
#undef DECLARE_SSE34_AVX_INSTRUCTION
// Prefetch src position into cache level. // Prefetch src position into cache level.
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
...@@ -1800,6 +1804,8 @@ class Assembler : public AssemblerBase { ...@@ -1800,6 +1804,8 @@ class Assembler : public AssemblerBase {
void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape, void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
byte opcode); byte opcode);
void ssse3_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode);
void sse4_instr(XMMRegister dst, const Operand& src, byte prefix, void sse4_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape1, byte escape2, byte opcode); byte escape1, byte escape2, byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2, void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
......
...@@ -738,11 +738,6 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -738,11 +738,6 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg(); int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
switch (opcode) { switch (opcode) {
case 0x00:
AppendToBuffer("vpshufb %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x99: case 0x99:
AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(), AppendToBuffer("vfmadd132s%c %s,%s,", float_size_code(),
NameOfXMMRegister(regop), NameOfXMMRegister(vvvv)); NameOfXMMRegister(regop), NameOfXMMRegister(vvvv));
...@@ -817,6 +812,7 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -817,6 +812,7 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
break; \ break; \
} }
SSSE3_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
SSE4_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE) SSE4_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE #undef DECLARE_SSE_AVX_DIS_CASE
default: default:
...@@ -1885,24 +1881,21 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -1885,24 +1881,21 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
int mod, regop, rm; int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm); get_modrm(*data, &mod, &regop, &rm);
switch (op) { switch (op) {
case 0x00:
AppendToBuffer("pshufb %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
break;
case 0x17: case 0x17:
AppendToBuffer("ptest %s,%s", NameOfXMMRegister(regop), AppendToBuffer("ptest %s,%s", NameOfXMMRegister(regop),
NameOfXMMRegister(rm)); NameOfXMMRegister(rm));
data++; data++;
break; break;
#define SSE4_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \ #define SSE34_DIS_CASE(instruction, notUsed1, notUsed2, notUsed3, opcode) \
case 0x##opcode: { \ case 0x##opcode: { \
AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \ AppendToBuffer(#instruction " %s,", NameOfXMMRegister(regop)); \
data += PrintRightXMMOperand(data); \ data += PrintRightXMMOperand(data); \
break; \ break; \
} }
SSE4_INSTRUCTION_LIST(SSE4_DIS_CASE) SSSE3_INSTRUCTION_LIST(SSE34_DIS_CASE)
#undef SSE4_DIS_CASE SSE4_INSTRUCTION_LIST(SSE34_DIS_CASE)
#undef SSE34_DIS_CASE
default: default:
UnimplementedInstruction(); UnimplementedInstruction();
} }
......
...@@ -43,6 +43,12 @@ ...@@ -43,6 +43,12 @@
V(punpckldq, 66, 0F, 62) \ V(punpckldq, 66, 0F, 62) \
V(pxor, 66, 0F, EF) V(pxor, 66, 0F, EF)
#define SSSE3_INSTRUCTION_LIST(V) \
V(pshufb, 66, 0F, 38, 00) \
V(psignb, 66, 0F, 38, 08) \
V(psignw, 66, 0F, 38, 09) \
V(psignd, 66, 0F, 38, 0A)
#define SSE4_INSTRUCTION_LIST(V) \ #define SSE4_INSTRUCTION_LIST(V) \
V(pminsb, 66, 0F, 38, 38) \ V(pminsb, 66, 0F, 38, 38) \
V(pminsd, 66, 0F, 38, 39) \ V(pminsd, 66, 0F, 38, 39) \
......
...@@ -530,11 +530,14 @@ TEST(DisasmIa320) { ...@@ -530,11 +530,14 @@ TEST(DisasmIa320) {
__ cmov(greater, eax, Operand(edx, 3)); __ cmov(greater, eax, Operand(edx, 3));
} }
#define EMIT_SSE34_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
{ {
if (CpuFeatures::IsSupported(SSSE3)) { if (CpuFeatures::IsSupported(SSSE3)) {
CpuFeatureScope scope(&assm, SSSE3); CpuFeatureScope scope(&assm, SSSE3);
__ pshufb(xmm5, xmm1); SSSE3_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
__ pshufb(xmm5, Operand(edx, 4));
} }
} }
...@@ -553,14 +556,10 @@ TEST(DisasmIa320) { ...@@ -553,14 +556,10 @@ TEST(DisasmIa320) {
__ pinsrd(xmm1, Operand(edx, 4), 0); __ pinsrd(xmm1, Operand(edx, 4), 0);
__ extractps(eax, xmm1, 0); __ extractps(eax, xmm1, 0);
#define EMIT_SSE4_INSTR(instruction, notUsed1, notUsed2, notUsed3, notUsed4) \ SSE4_INSTRUCTION_LIST(EMIT_SSE34_INSTR)
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE4_INSTR)
#undef EMIT_SSE4_INSTR
} }
} }
#undef EMIT_SSE34_INSTR
// AVX instruction // AVX instruction
{ {
...@@ -646,8 +645,6 @@ TEST(DisasmIa320) { ...@@ -646,8 +645,6 @@ TEST(DisasmIa320) {
__ vpsraw(xmm0, xmm7, 21); __ vpsraw(xmm0, xmm7, 21);
__ vpsrad(xmm0, xmm7, 21); __ vpsrad(xmm0, xmm7, 21);
__ vpshufb(xmm5, xmm0, xmm1);
__ vpshufb(xmm5, xmm0, Operand(edx, 4));
__ vpshuflw(xmm5, xmm1, 5); __ vpshuflw(xmm5, xmm1, 5);
__ vpshuflw(xmm5, Operand(edx, 4), 5); __ vpshuflw(xmm5, Operand(edx, 4), 5);
__ vpshufd(xmm5, xmm1, 5); __ vpshufd(xmm5, xmm1, 5);
...@@ -681,13 +678,14 @@ TEST(DisasmIa320) { ...@@ -681,13 +678,14 @@ TEST(DisasmIa320) {
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR) SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
#undef EMIT_SSE2_AVXINSTR #undef EMIT_SSE2_AVXINSTR
#define EMIT_SSE4_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3, \ #define EMIT_SSE34_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3, \
notUsed4) \ notUsed4) \
__ v##instruction(xmm7, xmm5, xmm1); \ __ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4)); __ v##instruction(xmm7, xmm5, Operand(edx, 4));
SSE4_INSTRUCTION_LIST(EMIT_SSE4_AVXINSTR) SSSE3_INSTRUCTION_LIST(EMIT_SSE34_AVXINSTR)
#undef EMIT_SSE4_AVXINSTR SSE4_INSTRUCTION_LIST(EMIT_SSE34_AVXINSTR)
#undef EMIT_SSE34_AVXINSTR
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment