Commit 4deb9ffd authored by jing.bao's avatar jing.bao Committed by Commit bot

Add several SIMD opcodes to IA32

CreateInt32x4, Int32x4ExtractLane, Int32x4ReplaceLane
Int32x4Add, Int32x4Sub

Also add paddd and psubd to ia32-assembler

BUG=

Review-Url: https://codereview.chromium.org/2695613004
Cr-Commit-Position: refs/heads/master@{#43483}
parent 63afdb00
......@@ -1875,6 +1875,7 @@ v8_source_set("v8_base") {
"src/ia32/macro-assembler-ia32.h",
"src/ia32/simulator-ia32.cc",
"src/ia32/simulator-ia32.h",
"src/ia32/sse-instr.h",
"src/ic/ia32/access-compiler-ia32.cc",
"src/ic/ia32/handler-compiler-ia32.cc",
"src/ic/ia32/ic-ia32.cc",
......
......@@ -1906,6 +1906,41 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ xchg(i.InputRegister(index), operand);
break;
}
case kIA32Int32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ movd(dst, i.InputOperand(0));
__ pshufd(dst, dst, 0x0);
break;
}
case kIA32Int32x4ExtractLane: {
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break;
}
case kIA32Int32x4ReplaceLane: {
CpuFeatureScope sse_scope(masm(), SSE4_1);
__ pinsrd(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
break;
}
case kSSEInt32x4Add: {
__ paddd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kSSEInt32x4Sub: {
__ psubd(i.OutputSimd128Register(), i.InputOperand(1));
break;
}
case kAVXInt32x4Add: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vpaddd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kAVXInt32x4Sub: {
CpuFeatureScope avx_scope(masm(), AVX);
__ vpsubd(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(1));
break;
}
case kCheckedLoadInt8:
ASSEMBLE_CHECKED_LOAD_INTEGER(movsx_b);
break;
......
......@@ -113,7 +113,14 @@ namespace compiler {
V(IA32StackCheck) \
V(IA32Xchgb) \
V(IA32Xchgw) \
V(IA32Xchgl)
V(IA32Xchgl) \
V(IA32Int32x4Splat) \
V(IA32Int32x4ExtractLane) \
V(IA32Int32x4ReplaceLane) \
V(SSEInt32x4Add) \
V(SSEInt32x4Sub) \
V(AVXInt32x4Add) \
V(AVXInt32x4Sub)
// Addressing modes represent the "shape" of inputs to an instruction.
// Many instructions support multiple addressing modes. Addressing modes
......
......@@ -97,6 +97,13 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXFloat32Neg:
case kIA32BitcastFI:
case kIA32BitcastIF:
case kIA32Int32x4Splat:
case kIA32Int32x4ExtractLane:
case kIA32Int32x4ReplaceLane:
case kSSEInt32x4Add:
case kSSEInt32x4Sub:
case kAVXInt32x4Add:
case kAVXInt32x4Sub:
return (instr->addressing_mode() == kMode_None)
? kNoOpcodeFlags
: kIsLoadOperation | kHasSideEffect;
......
......@@ -873,7 +873,9 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float32Mul, kAVXFloat32Mul, kSSEFloat32Mul) \
V(Float64Mul, kAVXFloat64Mul, kSSEFloat64Mul) \
V(Float32Div, kAVXFloat32Div, kSSEFloat32Div) \
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div)
V(Float64Div, kAVXFloat64Div, kSSEFloat64Div) \
V(Int32x4Add, kAVXInt32x4Add, kSSEInt32x4Add) \
V(Int32x4Sub, kAVXInt32x4Sub, kSSEInt32x4Sub)
#define FLOAT_UNOP_LIST(V) \
V(Float32Abs, kAVXFloat32Abs, kSSEFloat32Abs) \
......@@ -1716,6 +1718,25 @@ void InstructionSelector::VisitAtomicStore(Node* node) {
Emit(code, 0, nullptr, input_count, inputs);
}
void InstructionSelector::VisitInt32x4Splat(Node* node) {
VisitRO(this, node, kIA32Int32x4Splat);
}
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
IA32OperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kIA32Int32x4ExtractLane, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane));
}
void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
IA32OperandGenerator g(this);
int32_t lane = OpParameter<int32_t>(node);
Emit(kIA32Int32x4ReplaceLane, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane),
g.Use(node->InputAt(1)));
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -1966,7 +1966,7 @@ void InstructionSelector::VisitWord32PairShr(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
#endif // V8_TARGET_ARCH_64_BIT
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitInt32x4Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4ExtractLane(Node* node) {
......@@ -1980,7 +1980,9 @@ void InstructionSelector::VisitInt32x4ReplaceLane(Node* node) {
void InstructionSelector::VisitInt32x4Add(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitInt32x4Sub(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitSimd128Zero(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitSimd1x4Zero(Node* node) { UNIMPLEMENTED(); }
......
......@@ -48,7 +48,7 @@ namespace internal {
bool CpuFeatures::SupportsCrankshaft() { return true; }
bool CpuFeatures::SupportsSimd128() { return false; }
bool CpuFeatures::SupportsSimd128() { return true; }
static const byte kCallOpcode = 0xE8;
static const int kNoCodeAgeSequenceLength = 5;
......
......@@ -2864,6 +2864,24 @@ void Assembler::rorx(Register dst, const Operand& src, byte imm8) {
EMIT(imm8);
}
void Assembler::sse2_instr(XMMRegister dst, const Operand& src, byte prefix,
byte escape, byte opcode) {
EnsureSpace ensure_space(this);
EMIT(prefix);
EMIT(escape);
EMIT(opcode);
emit_sse_operand(dst, src);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
const Operand& src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, pp, m, w);
EMIT(op);
emit_sse_operand(dst, src2);
}
void Assembler::emit_sse_operand(XMMRegister reg, const Operand& adr) {
Register ireg = { reg.code() };
......
......@@ -40,6 +40,7 @@
#include <deque>
#include "src/assembler.h"
#include "src/ia32/sse-instr.h"
#include "src/isolate.h"
#include "src/utils.h"
......@@ -1418,6 +1419,30 @@ class Assembler : public AssemblerBase {
void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
// Other SSE and AVX instructions
#define DECLARE_SSE2_INSTRUCTION(instruction, prefix, escape, opcode) \
void instruction(XMMRegister dst, XMMRegister src) { \
instruction(dst, Operand(src)); \
} \
void instruction(XMMRegister dst, const Operand& src) { \
sse2_instr(dst, src, 0x##prefix, 0x##escape, 0x##opcode); \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_INSTRUCTION)
#undef DECLARE_SSE2_INSTRUCTION
#define DECLARE_SSE2_AVX_INSTRUCTION(instruction, prefix, escape, opcode) \
void v##instruction(XMMRegister dst, XMMRegister src1, XMMRegister src2) { \
v##instruction(dst, src1, Operand(src2)); \
} \
void v##instruction(XMMRegister dst, XMMRegister src1, \
const Operand& src2) { \
vinstr(0x##opcode, dst, src1, src2, k##prefix, k##escape, kW0); \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE2_AVX_INSTRUCTION)
#undef DECLARE_SSE2_AVX_INSTRUCTION
// Prefetch src position into cache level.
// Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
// non-temporal
......@@ -1548,6 +1573,10 @@ class Assembler : public AssemblerBase {
inline void emit_disp(Label* L, Displacement::Type type);
inline void emit_near_disp(Label* L);
void sse2_instr(XMMRegister dst, const Operand& src, byte prefix, byte escape,
byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2,
SIMDPrefix pp, LeadingOpcode m, VexW w);
// Most BMI instructions are similiar.
void bmi1(byte op, Register reg, Register vreg, const Operand& rm);
void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg,
......
......@@ -10,6 +10,7 @@
#include "src/base/compiler-specific.h"
#include "src/disasm.h"
#include "src/ia32/sse-instr.h"
namespace disasm {
......@@ -1002,6 +1003,16 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
#define DECLARE_SSE_AVX_DIS_CASE(instruction, notUsed1, notUsed2, opcode) \
case 0x##opcode: { \
AppendToBuffer("v" #instruction " %s,%s,", NameOfXMMRegister(regop), \
NameOfXMMRegister(vvvv)); \
current += PrintRightXMMOperand(current); \
break; \
}
SSE2_INSTRUCTION_LIST(DECLARE_SSE_AVX_DIS_CASE)
#undef DECLARE_SSE_AVX_DIS_CASE
default:
UnimplementedInstruction();
}
......@@ -1929,6 +1940,18 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
NameOfXMMRegister(regop),
NameOfXMMRegister(rm));
data++;
} else if (*data == 0xFA) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("psubd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xFE) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("paddd %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
} else if (*data == 0xB1) {
data++;
data += PrintOperands("cmpxchg_w", OPER_REG_OP_ORDER, data);
......
......@@ -2270,19 +2270,18 @@ void MacroAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
movd(dst, src);
return;
}
DCHECK_EQ(1, imm8);
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pextrd(dst, src, imm8);
return;
}
pshufd(xmm0, src, 1);
DCHECK_LT(imm8, 4);
pshufd(xmm0, src, imm8);
movd(dst, xmm0);
}
void MacroAssembler::Pinsrd(XMMRegister dst, const Operand& src, int8_t imm8) {
DCHECK(imm8 == 0 || imm8 == 1);
if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1);
pinsrd(dst, src, imm8);
......
// Copyright 2012 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_SSE_INSTR_H_
#define V8_SSE_INSTR_H_
#define SSE2_INSTRUCTION_LIST(V) \
V(paddd, 66, 0F, FE) \
V(psubd, 66, 0F, FA)
#endif // V8_SSE_INSTR_H_
......@@ -1483,6 +1483,7 @@
'ia32/macro-assembler-ia32.h',
'ia32/simulator-ia32.cc',
'ia32/simulator-ia32.h',
'ia32/sse-instr.h',
'builtins/ia32/builtins-ia32.cc',
'compiler/ia32/code-generator-ia32.cc',
'compiler/ia32/instruction-codes-ia32.h',
......
......@@ -468,6 +468,13 @@ TEST(DisasmIa320) {
__ punpckldq(xmm1, xmm6);
__ punpckhdq(xmm7, xmm5);
#define EMIT_SSE2_INSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ instruction(xmm5, xmm1); \
__ instruction(xmm5, Operand(edx, 4));
SSE2_INSTRUCTION_LIST(EMIT_SSE2_INSTR)
#undef EMIT_SSE2_INSTR
}
// cmov.
......@@ -538,6 +545,13 @@ TEST(DisasmIa320) {
__ vandpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vxorpd(xmm0, xmm1, xmm2);
__ vxorpd(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
SSE2_INSTRUCTION_LIST(EMIT_SSE2_AVXINSTR)
#undef EMIT_SSE2_AVXINSTR
}
}
......
......@@ -214,11 +214,11 @@ T Not(T a) {
} // namespace
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
#define SIMD_LOWERING_TARGET 1
#else
#define SIMD_LOWERING_TARGET 0
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
// TODO(gdeepti): These are tests using sample values to verify functional
// correctness of opcodes, add more tests for a range of values and macroize
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment