Commit b9614d4b authored by jing.bao's avatar jing.bao Committed by Commit bot

Add several SIMD opcodes to IA32

CreateInt32x4, Int32x4ExtractLane, Int32x4ReplaceLane
Int32x4Add, Int32x4Sub

Also add paddd, psubd, vpaddd, vpsubd, pinsrw to ia32-assembler

BUG=

Review-Url: https://codereview.chromium.org/2695613004
Cr-Original-Commit-Position: refs/heads/master@{#43483}
Committed: https://chromium.googlesource.com/v8/v8/+/4deb9ffdecf121c69a3db7eae6698eae23a80a15
Review-Url: https://codereview.chromium.org/2695613004
Cr-Commit-Position: refs/heads/master@{#43708}
parent fd5b3e75
......@@ -1889,6 +1889,7 @@ v8_source_set("v8_base") {
"src/ia32/macro-assembler-ia32.h",
"src/ia32/simulator-ia32.cc",
"src/ia32/simulator-ia32.h",
"src/ia32/sse-instr.h",
"src/ic/ia32/access-compiler-ia32.cc",
"src/ic/ia32/handler-compiler-ia32.cc",
"src/ic/ia32/ic-ia32.cc",
......
......@@ -1614,10 +1614,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
case kSSEFloat64InsertLowWord32:
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0, true);
break;
case kSSEFloat64InsertHighWord32:
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1, true);
break;
case kSSEFloat64LoadLowWord32:
__ movd(i.OutputDoubleRegister(), i.InputOperand(0));
......@@ -1888,6 +1888,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kIA32Int32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ movd(dst, i.InputOperand(0));
__ pshufd(dst, dst, 0x0);
break;
}
case kIA32Int32x4ExtractLane: {
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kIA32Int32x4ReplaceLane: {
__ Pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
break;
}
case kSSEInt32x4Add: {
__ paddd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kSSEInt32x4Sub: {
__ psubd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXInt32x4Add: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXInt32x4Sub: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kCheckedLoadInt8:
ASSEMBLE_CHECKED_LOAD_INTEGER(movsx_b);
break;
......
......@@ -110,7 +110,14 @@ namespace compiler {
V(IA32PushFloat32) \
V(IA32PushFloat64) \
V(IA32Poke) \
V(IA32StackCheck)
V(IA32StackCheck) \
V(IA32Int32x4Splat) \
V(IA32Int32x4ExtractLane) \
V(IA32Int32x4ReplaceLane) \
V(SSEInt32x4Add) \
V(SSEInt32x4Sub) \
V(AVXInt32x4Add) \
V(AVXInt32x4Sub)
// Addressing modes represent the "shape" of inputs to an instruction.
// Many instructions support multiple addressing modes. Addressing modes
......
......@@ -97,6 +97,13 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXFloat32Neg:
case kIA32BitcastFI:
case kIA32BitcastIF:
case kIA32Int32x4Splat:
case kIA32Int32x4ExtractLane:
case kIA32Int32x4ReplaceLane:
case kSSEInt32x4Add:
case kSSEInt32x4Sub:
case kAVXInt32x4Add:
case kAVXInt32x4Sub:
return (instr->addressing_mode() == kMode_None)
? kNoOpcodeFlags
: kIsLoadOperation | kHasSideEffect;
......
......@@ -873,7 +873,9 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div)
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
V(Int32x4Add, kAVXInt32x4Add, kSSEInt32x4Add) \
V(Int32x4Sub, kAVXInt32x4Sub, kSSEInt32x4Sub)
#define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
......@@ -1756,6 +1758,25 @@ void InstructionSelector::VisitAtomicExchange(Node* node) {
Emit(code, 1, outputs, input_count, inputs);
}
void InstructionSelector::VisitInt32x4Splat(Node* node) {
VisitRO(this, node, kIA32Int32x4Splat);
}
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
IA32OperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kIA32Int32x4ExtractLane, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane));
}
void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
IA32OperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kIA32Int32x4ReplaceLane, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
g.Use(node->InputAt(1)));
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -2031,7 +2031,7 @@ void InstructionSelector::VisitWord32PairShr(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
#endif // V8_TARGET_ARCH_64_BIT
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitInt32x4Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
......@@ -2045,7 +2045,9 @@ void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
void InstructionSelector::VisitInt32x4Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4Sub(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitSimd128Zero(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4Zero(Node* node) { UNIMPLEMENTED(); }
......
......@@ -48,7 +48,7 @@ namespace internal {
bool CpuFeatures::SupportsCrankshaft() { return true; }
bool CpuFeatures::SupportsSimd128() { return false; }
bool CpuFeatures::SupportsSimd128() { return true; }
static const byte kCallOpcode = 0xE8;
static const int kNoCodeAgeSequenceLength = 5;
......
......@@ -2665,6 +2665,15 @@ void Assembler::pextrd(const Operand& dst, XMMRegister src, int8_t offset) {
EMIT(offset);
}
void Assembler::pinsrw(XMMRegister dst, const Operand& src, int8_t offset) {
DCHECK(is_uint8(offset));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0xC4);
emit_sse_operand(dst, src);
EMIT(offset);
}
void Assembler::pinsrd(XMMRegister dst, const Operand& src, int8_t offset) {
DCHECK(IsEnabled(SSE4_1));
......@@ -2870,6 +2879,24 @@ void Assembler::rorx(Register dst, const Operand& src, byte imm8) {
EMIT(imm8);
}
void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape, byte opcode) {
EnsureSpace ensure_space(this);
EMIT(prefix);
EMIT(escape);
EMIT(opcode);
emit_sse_operand(dst, src);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, pp, m, w);
EMIT(op);
emit_sse_operand(dst, src2);
}
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
Register ireg = { reg.code() };
......
......@@ -40,6 +40,7 @@
#include <deque>
#include "src/assembler.h"
#include "src/ia32/sse-instr.h"
#include "src/isolate.h"
#include "src/utils.h"
......@@ -1078,6 +1079,10 @@ class Assembler : public AssemblerBase {
pextrd(Operand(dst), src, offset);
}
void pextrd(const Operand& dst, XMMRegister src, int8_t offset);
void pinsrw(XMMRegister dst, Register src, int8_t offset) {
pinsrw(dst, Operand(src), offset);
}
void pinsrw(XMMRegister dst, const Operand& src, int8_t offset);
void pinsrd(XMMRegister dst, Register src, int8_t offset) {
pinsrd(dst, Operand(src), offset);
}
......@@ -1416,6 +1421,30 @@ class Assembler : public AssemblerBase {
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
// Other SSE and AVX instructions
#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
instruction(dst, Operand(src)); \
} \
void instruction(XMMRegister dst, const Operand& src) { \
sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
#undef DECLARE_SSE2_INSTRUCTION
#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
v##instruction(dst, src1, Operand(src2)); \
} \
void v##instruction(XMMRegister dst, XMMRegister src1, \
const Operand& src2) { \
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
#undef DECLARE_SSE2_AVX_INSTRUCTION
// Prefetch src position into cache level.
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
// non-temporal
......@@ -1546,6 +1575,10 @@ class Assembler : public AssemblerBase {
inline void emit_disp(Label* L, Displacement::Type type);
inline void emit_near_disp(Label* L);
void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
SIMDPrefix pp, LeadingOpcode m, VexW w);
// Most BMI instructions are similiar.
void bmi1(byte op, Register reg, Register vreg, const Operand& rm);
void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg,
......
......@@ -10,6 +10,7 @@
#include "src/base/compiler-specific.h"
#include "src/disasm.h"
#include "src/ia32/sse-instr.h"
namespace disasm {
......@@ -1002,6 +1003,16 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
NameOfXMMRegister(vvvv)); \
current += PrintRightXMMOperand(current); \
break; \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE
default:
UnimplementedInstruction();
}
......@@ -1895,6 +1906,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
AppendToBuffer("movd ");
data += PrintRightOperand(data);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
} else if (*data == 0xC4) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("pinsrw %s,", NameOfXMMRegister(regop));
data += PrintRightOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
data++;
} else if (*data == 0xDB) {
data++;
int mod, regop, rm;
......@@ -1929,6 +1948,18 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xFA) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("psubd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xFE) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("paddd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xB1) {
data++;
data += PrintOperands("cmpxchg_w", OPER_REG_OP_ORDER, data);
......
......@@ -2270,32 +2270,41 @@ void MacroAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
movd(dst, src);
return;
}
DCHECK_EQ(1, imm8);
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pextrd(dst, src, imm8);
return;
}
pshufd(xmm0, src, 1);
DCHECK_LT(imm8, 4);
pshufd(xmm0, src, imm8);
movd(dst, xmm0);
}
void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
DCHECK(imm8 == 0 || imm8 == 1);
void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8,
bool is_64_bits) {
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrd(dst, src, imm8);
return;
}
movd(xmm0, src);
if (imm8 == 1) {
punpckldq(dst, xmm0);
if (is_64_bits) {
movd(xmm0, src);
if (imm8 == 1) {
punpckldq(dst, xmm0);
} else {
DCHECK_EQ(0, imm8);
psrlq(dst, 32);
punpckldq(xmm0, dst);
movaps(dst, xmm0);
}
} else {
DCHECK_EQ(0, imm8);
psrlq(dst, 32);
punpckldq(xmm0, dst);
movaps(dst, xmm0);
DCHECK_LT(imm8, 4);
push(eax);
mov(eax, src);
pinsrw(dst, eax, imm8 * 2);
shr(eax, 16);
pinsrw(dst, eax, imm8 * 2 + 1);
pop(eax);
}
}
......
......@@ -752,10 +752,12 @@ class MacroAssembler: public Assembler {
// Non-SSE2 instructions.
void Pextrd(Register dst, XMMRegister src, int8_t imm8);
void Pinsrd(XMMRegister dst, Register src, int8_t imm8) {
Pinsrd(dst, Operand(src), imm8);
void Pinsrd(XMMRegister dst, Register src, int8_t imm8,
bool is_64_bits = false) {
Pinsrd(dst, Operand(src), imm8, is_64_bits);
}
void Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8);
void Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8,
bool is_64_bits = false);
void Lzcnt(Register dst, Register src) { Lzcnt(dst, Operand(src)); }
void Lzcnt(Register dst, const Operand& src);
......
// Copyright 2012 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_SSE_INSTR_H_
#define V8_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \
V(paddd, 66, 0F, FE) \
V(psubd, 66, 0F, FA)
#endif // V8_SSE_INSTR_H_
......@@ -1486,6 +1486,7 @@
'ia32/macro-assembler-ia32.h',
'ia32/simulator-ia32.cc',
'ia32/simulator-ia32.h',
'ia32/sse-instr.h',
'builtins/ia32/builtins-ia32.cc',
'compiler/ia32/code-generator-ia32.cc',
'compiler/ia32/instruction-codes-ia32.h',
......
......@@ -468,6 +468,16 @@ TEST(DisasmIa320) {
__ punpckldq(xmm1, xmm6);
__ punpckhdq(xmm7, xmm5);
__ pinsrw(xmm5, edx, 5);
__ pinsrw(xmm5, Operand(edx, 4), 5);
#define EMIT_SSE2_INSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
SSE2_INSTRUCTION_LIST(EMIT_SSE2_INSTR)
#undef EMIT_SSE2_INSTR
}
// cmov.
......@@ -538,6 +548,13 @@ TEST(DisasmIa320) {
__ vandpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vxorpd(xmm0, xmm1, xmm2);
__ vxorpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
#undef EMIT_SSE2_AVXINSTR
}
}
......
......@@ -31,11 +31,11 @@ typedef int8_t (*Int8BinOp)(int8_t, int8_t);
typedef int (*Int8CompareOp)(int8_t, int8_t);
typedef int8_t (*Int8ShiftOp)(int8_t, int);
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#define SIMD_LOWERING_TARGET 1
#else
#define SIMD_LOWERING_TARGET 0
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
// Generic expected value functions.
template <typename T>
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment