Commit 3cc75409 authored by jing.bao's avatar jing.bao Committed by Commit Bot

[ia32][wasm] Add F32x4 Splat/ExtractLane/ReplaceLane.

Add vshufps, vmovaps, insertps/vinsertps

Bug: 
Change-Id: I178cde529b5309a5221086ab916e5485a8b67d5a
Reviewed-on: https://chromium-review.googlesource.com/802999Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Jing Bao <jing.bao@intel.com>
Cr-Commit-Position: refs/heads/master@{#49880}
parent 663b55aa
......@@ -2005,6 +2005,54 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kSSEF32x4Splat: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputSimd128Register();
__ shufps(dst, dst, 0x0);
break;
}
case kAVXF32x4Splat: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister src = i.InputFloatRegister(0);
__ vshufps(i.OutputSimd128Register(), src, src, 0x0);
break;
}
case kSSEF32x4ExtractLane: {
DCHECK_EQ(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
XMMRegister dst = i.OutputFloatRegister();
int8_t lane = i.InputInt8(1);
if (lane != 0) {
DCHECK_LT(lane, 4);
__ shufps(dst, dst, lane);
}
break;
}
case kAVXF32x4ExtractLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputFloatRegister();
XMMRegister src = i.InputSimd128Register(0);
int8_t lane = i.InputInt8(1);
if (lane == 0) {
if (dst != src) __ vmovaps(dst, src);
} else {
DCHECK_LT(lane, 4);
__ vshufps(dst, src, src, lane);
}
break;
}
case kSSEF32x4ReplaceLane: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ insertps(i.OutputSimd128Register(), i.InputOperand(2),
i.InputInt8(1) << 4);
break;
}
case kAVXF32x4ReplaceLane: {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vinsertps(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputOperand(2), i.InputInt8(1) << 4);
break;
}
case kIA32I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0));
......
......@@ -111,6 +111,12 @@ namespace compiler {
V(IA32PushFloat64) \
V(IA32Poke) \
V(IA32StackCheck) \
V(SSEF32x4Splat) \
V(AVXF32x4Splat) \
V(SSEF32x4ExtractLane) \
V(AVXF32x4ExtractLane) \
V(SSEF32x4ReplaceLane) \
V(AVXF32x4ReplaceLane) \
V(IA32I32x4Splat) \
V(IA32I32x4ExtractLane) \
V(SSEI32x4ReplaceLane) \
......
......@@ -97,6 +97,12 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXFloat32Neg:
case kIA32BitcastFI:
case kIA32BitcastIF:
case kSSEF32x4Splat:
case kAVXF32x4Splat:
case kSSEF32x4ExtractLane:
case kAVXF32x4ExtractLane:
case kSSEF32x4ReplaceLane:
case kAVXF32x4ReplaceLane:
case kIA32I32x4Splat:
case kIA32I32x4ExtractLane:
case kSSEI32x4ReplaceLane:
......
......@@ -1897,9 +1897,9 @@ VISIT_ATOMIC_BINOP(Or)
VISIT_ATOMIC_BINOP(Xor)
#undef VISIT_ATOMIC_BINOP
#define SIMD_TYPES(V) \
V(I32x4) \
V(I16x8) \
#define SIMD_INT_TYPES(V) \
V(I32x4) \
V(I16x8) \
V(I8x16)
#define SIMD_BINOP_LIST(V) \
......@@ -1966,6 +1966,27 @@ VISIT_ATOMIC_BINOP(Xor)
V(I16x8ShrS) \
V(I16x8ShrU)
void InstructionSelector::VisitF32x4Splat(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
if (IsSupported(AVX)) {
Emit(kAVXF32x4Splat, g.DefineAsRegister(node), operand0);
} else {
Emit(kSSEF32x4Splat, g.DefineSameAsFirst(node), operand0);
}
}
void InstructionSelector::VisitF32x4ExtractLane(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand operand0 = g.UseRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseImmediate(OpParameter<int32_t>(node));
if (IsSupported(AVX)) {
Emit(kAVXF32x4ExtractLane, g.DefineAsRegister(node), operand0, operand1);
} else {
Emit(kSSEF32x4ExtractLane, g.DefineSameAsFirst(node), operand0, operand1);
}
}
void InstructionSelector::VisitS128Zero(Node* node) {
IA32OperandGenerator g(this);
Emit(kIA32S128Zero, g.DefineAsRegister(node));
......@@ -1981,7 +2002,7 @@ void InstructionSelector::VisitS128Not(Node* node) {
void InstructionSelector::Visit##Type##Splat(Node* node) { \
VisitRO(this, node, kIA32##Type##Splat); \
}
SIMD_TYPES(VISIT_SIMD_SPLAT)
SIMD_INT_TYPES(VISIT_SIMD_SPLAT)
#undef VISIT_SIMD_SPLAT
#define VISIT_SIMD_EXTRACT_LANE(Type) \
......@@ -1991,7 +2012,7 @@ SIMD_TYPES(VISIT_SIMD_SPLAT)
Emit(kIA32##Type##ExtractLane, g.DefineAsRegister(node), \
g.UseRegister(node->InputAt(0)), g.UseImmediate(lane)); \
}
SIMD_TYPES(VISIT_SIMD_EXTRACT_LANE)
SIMD_INT_TYPES(VISIT_SIMD_EXTRACT_LANE)
#undef VISIT_SIMD_EXTRACT_LANE
#define VISIT_SIMD_REPLACE_LANE(Type) \
......@@ -2008,7 +2029,8 @@ SIMD_TYPES(VISIT_SIMD_EXTRACT_LANE)
operand1, operand2); \
} \
}
SIMD_TYPES(VISIT_SIMD_REPLACE_LANE)
SIMD_INT_TYPES(VISIT_SIMD_REPLACE_LANE)
VISIT_SIMD_REPLACE_LANE(F32x4)
#undef VISIT_SIMD_REPLACE_LANE
#define VISIT_SIMD_SHIFT(Opcode) \
......
......@@ -2084,13 +2084,17 @@ void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
#endif // V8_TARGET_ARCH_64_BIT
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
UNIMPLEMENTED();
}
......
......@@ -2828,6 +2828,17 @@ void Assembler::pextrd(const Operand& dst, XMMRegister src, int8_t offset) {
EMIT(offset);
}
void Assembler::insertps(XMMRegister dst, const Operand& src, int8_t offset) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x3A);
EMIT(0x21);
emit_sse_operand(dst, src);
EMIT(offset);
}
void Assembler::pinsrb(XMMRegister dst, const Operand& src, int8_t offset) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
......@@ -2982,6 +2993,13 @@ void Assembler::vcmpps(XMMRegister dst, XMMRegister src1, const Operand& src2,
EMIT(cmp);
}
void Assembler::vshufps(XMMRegister dst, XMMRegister src1, const Operand& src2,
byte imm8) {
DCHECK(is_uint8(imm8));
vps(0xC6, dst, src1, src2);
EMIT(imm8);
}
void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8) {
XMMRegister iop = XMMRegister::from_code(6);
vinstr(0x71, iop, dst, Operand(src), k66, k0F, kWIG);
......@@ -3043,6 +3061,12 @@ void Assembler::vpextrd(const Operand& dst, XMMRegister src, int8_t offset) {
EMIT(offset);
}
void Assembler::vinsertps(XMMRegister dst, XMMRegister src1,
const Operand& src2, int8_t offset) {
vinstr(0x21, dst, src1, src2, k66, k0F3A, kWIG);
EMIT(offset);
}
void Assembler::vpinsrb(XMMRegister dst, XMMRegister src1, const Operand& src2,
int8_t offset) {
vinstr(0x20, dst, src1, src2, k66, k0F3A, kWIG);
......
......@@ -1149,6 +1149,10 @@ class Assembler : public AssemblerBase {
}
void pextrd(const Operand& dst, XMMRegister src, int8_t offset);
void insertps(XMMRegister dst, XMMRegister src, int8_t offset) {
insertps(dst, Operand(src), offset);
}
void insertps(XMMRegister dst, const Operand& src, int8_t offset);
void pinsrb(XMMRegister dst, Register src, int8_t offset) {
pinsrb(dst, Operand(src), offset);
}
......@@ -1397,6 +1401,14 @@ class Assembler : public AssemblerBase {
void vrsqrtps(XMMRegister dst, const Operand& src) {
vinstr(0x52, dst, xmm0, src, kNone, k0F, kWIG);
}
void vmovaps(XMMRegister dst, XMMRegister src) {
vps(0x28, dst, xmm0, Operand(src));
}
void vshufps(XMMRegister dst, XMMRegister src1, XMMRegister src2, byte imm8) {
vshufps(dst, src1, Operand(src2), imm8);
}
void vshufps(XMMRegister dst, XMMRegister src1, const Operand& src2,
byte imm8);
void vpsllw(XMMRegister dst, XMMRegister src, int8_t imm8);
void vpslld(XMMRegister dst, XMMRegister src, int8_t imm8);
......@@ -1427,6 +1439,12 @@ class Assembler : public AssemblerBase {
}
void vpextrd(const Operand& dst, XMMRegister src, int8_t offset);
void vinsertps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
int8_t offset) {
vinsertps(dst, src1, Operand(src2), offset);
}
void vinsertps(XMMRegister dst, XMMRegister src1, const Operand& src2,
int8_t offset);
void vpinsrb(XMMRegister dst, XMMRegister src1, Register src2,
int8_t offset) {
vpinsrb(dst, src1, Operand(src2), offset);
......
......@@ -847,6 +847,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
current++;
break;
case 0x21:
AppendToBuffer("vinsertps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(current));
current++;
break;
case 0x22:
AppendToBuffer("vpinsrd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -1038,6 +1045,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x28:
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x52:
AppendToBuffer("vrsqrtps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
......@@ -1100,6 +1111,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
current++;
break;
}
case 0xC6:
AppendToBuffer("vshufps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(", %d", (*current) & 3);
current += 1;
break;
default:
UnimplementedInstruction();
}
......@@ -1961,6 +1979,14 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
data += PrintRightOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
data++;
} else if (*data == 0x21) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
AppendToBuffer("insertps %s,", NameOfXMMRegister(regop));
data += PrintRightXMMOperand(data);
AppendToBuffer(",%d", *reinterpret_cast<int8_t*>(data));
data++;
} else if (*data == 0x22) {
data++;
int mod, regop, rm;
......
......@@ -550,6 +550,8 @@ TEST(DisasmIa320) {
__ pextrw(Operand(edx, 4), xmm0, 1);
__ pextrd(eax, xmm0, 1);
__ pextrd(Operand(edx, 4), xmm0, 1);
__ insertps(xmm1, xmm2, 0);
__ insertps(xmm1, Operand(edx, 4), 0);
__ pinsrb(xmm1, eax, 0);
__ pinsrb(xmm1, Operand(edx, 4), 0);
__ pinsrd(xmm1, eax, 0);
......@@ -611,6 +613,9 @@ TEST(DisasmIa320) {
__ vrcpps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vrsqrtps(xmm1, xmm0);
__ vrsqrtps(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovaps(xmm0, xmm1);
__ vshufps(xmm0, xmm1, xmm2, 3);
__ vshufps(xmm0, xmm1, Operand(edx, 4), 3);
__ vcmpeqps(xmm5, xmm4, xmm1);
__ vcmpeqps(xmm5, xmm4, Operand(ebx, ecx, times_4, 10000));
......@@ -655,6 +660,8 @@ TEST(DisasmIa320) {
__ vpextrw(Operand(edx, 4), xmm0, 1);
__ vpextrd(eax, xmm0, 1);
__ vpextrd(Operand(edx, 4), xmm0, 1);
__ vinsertps(xmm0, xmm1, xmm2, 0);
__ vinsertps(xmm0, xmm1, Operand(edx, 4), 0);
__ vpinsrb(xmm0, xmm1, eax, 0);
__ vpinsrb(xmm0, xmm1, Operand(edx, 4), 0);
__ vpinsrw(xmm0, xmm1, eax, 0);
......
......@@ -406,7 +406,7 @@ bool SkipFPValue(float x) {
bool SkipFPExpectedValue(float x) { return std::isnan(x) || SkipFPValue(x); }
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4Splat) {
WasmRunner<int32_t, float> r(execution_mode);
byte lane_val = 0;
......@@ -446,7 +446,11 @@ WASM_SIMD_TEST(F32x4ReplaceLane) {
CHECK_EQ(1, r.Call(3.14159f, -1.5f));
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
// Tests both signed and unsigned conversion.
WASM_SIMD_TEST(F32x4ConvertI32x4) {
WasmRunner<int32_t, int32_t, float, float> r(execution_mode);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment