Commit 7fd5949c authored by Deepti Gandluri's avatar Deepti Gandluri Committed by Commit Bot

[wasm] Implement wasm SIMD F32x4 Ops

This patch implements the following F32x4 Ops:
F32x4Splat, F32x4ExtractLane, F32x4ReplaceLane
F32x4RecipApprox, F32x4RecipSqrtApprox
F32x4Add, F32x4Sub, F32x4Mul, F32x4Min, F32x4Max,
F32x4Eq, F32x4Ne, F32x4Gt, F32x4Ge

BUG=V8:6020

Change-Id: I8267734d336f4bae6fed008d7b1f5faa428574df
Reviewed-on: https://chromium-review.googlesource.com/816734Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarMircea Trofin <mtrofin@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50014}
parent d3ed12b4
......@@ -2083,16 +2083,6 @@ void InstructionSelector::VisitWord32PairShr(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord32PairSar(Node* node) { UNIMPLEMENTED(); }
#endif // V8_TARGET_ARCH_64_BIT
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4ReplaceLane(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitF32x4SConvertI32x4(Node* node) {
......@@ -2106,15 +2096,27 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
void InstructionSelector::VisitF32x4Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Neg(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF32x4Add(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF32x4Sub(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Mul(Node* node) { UNIMPLEMENTED(); }
......@@ -2125,19 +2127,7 @@ void InstructionSelector::VisitF32x4Min(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Eq(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Ne(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Lt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Le(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 && \
!V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
......
......@@ -2239,6 +2239,84 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
// TODO(gdeepti): Get rid of redundant moves for F32x4Splat/Extract below
case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsFPRegister()) {
__ Movss(dst, i.InputDoubleRegister(0));
} else {
__ Movss(dst, i.InputOperand(0));
}
__ shufps(dst, dst, 0x0);
break;
}
case kX64F32x4ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ extractps(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
__ movd(i.OutputDoubleRegister(), kScratchRegister);
break;
}
case kX64F32x4ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
// The insertps instruction uses imm8[5:4] to indicate the lane
// that needs to be replaced.
byte select = i.InputInt8(1) << 4 & 0x30;
__ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
break;
}
case kX64F32x4RecipApprox: {
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4RecipSqrtApprox: {
__ rsqrtps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4Add: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Mul: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ mulps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Min: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ minps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Max: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ maxps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Eq: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x0);
break;
}
case kX64F32x4Ne: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpps(i.OutputSimd128Register(), i.InputSimd128Register(1), 0x4);
break;
}
case kX64F32x4Lt: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpltps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Le: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ cmpleps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
__ movd(dst, i.InputRegister(0));
......
......@@ -144,6 +144,20 @@ namespace compiler {
V(X64Push) \
V(X64Poke) \
V(X64StackCheck) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
V(X64F32x4RecipApprox) \
V(X64F32x4RecipSqrtApprox) \
V(X64F32x4Add) \
V(X64F32x4Sub) \
V(X64F32x4Mul) \
V(X64F32x4Min) \
V(X64F32x4Max) \
V(X64F32x4Eq) \
V(X64F32x4Ne) \
V(X64F32x4Lt) \
V(X64F32x4Le) \
V(X64I32x4Splat) \
V(X64I32x4ExtractLane) \
V(X64I32x4ReplaceLane) \
......
......@@ -123,6 +123,20 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Lea:
case kX64Dec32:
case kX64Inc32:
case kX64F32x4Splat:
case kX64F32x4ExtractLane:
case kX64F32x4ReplaceLane:
case kX64F32x4RecipApprox:
case kX64F32x4RecipSqrtApprox:
case kX64F32x4Add:
case kX64F32x4Sub:
case kX64F32x4Mul:
case kX64F32x4Min:
case kX64F32x4Max:
case kX64F32x4Eq:
case kX64F32x4Ne:
case kX64F32x4Lt:
case kX64F32x4Le:
case kX64I32x4Splat:
case kX64I32x4ExtractLane:
case kX64I32x4ReplaceLane:
......
......@@ -1369,6 +1369,7 @@ void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
}
RO_OP_LIST(RO_VISITOR)
#undef RO_VISITOR
#undef RO_OP_LIST
#define RR_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
......@@ -1376,6 +1377,7 @@ RO_OP_LIST(RO_VISITOR)
}
RR_OP_LIST(RR_VISITOR)
#undef RR_VISITOR
#undef RR_OP_LIST
void InstructionSelector::VisitTruncateFloat64ToWord32(Node* node) {
VisitRR(this, node, kArchTruncateDoubleToI);
......@@ -2442,16 +2444,21 @@ VISIT_ATOMIC_BINOP(Xor)
#undef VISIT_ATOMIC_BINOP
#define SIMD_TYPES(V) \
V(F32x4) \
V(I32x4) \
V(I16x8) \
V(I8x16)
#define SIMD_FORMAT_LIST(V) \
V(32x4) \
V(16x8) \
V(8x16)
#define SIMD_BINOP_LIST(V) \
V(F32x4Add) \
V(F32x4Sub) \
V(F32x4Mul) \
V(F32x4Min) \
V(F32x4Max) \
V(F32x4Eq) \
V(F32x4Ne) \
V(F32x4Lt) \
V(F32x4Le) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
......@@ -2505,6 +2512,8 @@ VISIT_ATOMIC_BINOP(Xor)
V(S128Xor)
#define SIMD_UNOP_LIST(V) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(I32x4Neg) \
V(I16x8Neg) \
V(I8x16Neg) \
......@@ -2580,6 +2589,10 @@ SIMD_UNOP_LIST(VISIT_SIMD_UNOP)
}
SIMD_BINOP_LIST(VISIT_SIMD_BINOP)
#undef VISIT_SIMD_BINOP
#undef SIMD_TYPES
#undef SIMD_BINOP_LIST
#undef SIMD_UNOP_LIST
#undef SIMD_SHIFT_OPCODES
void InstructionSelector::VisitS128Select(Node* node) {
X64OperandGenerator g(this);
......
......@@ -405,8 +405,6 @@ bool SkipFPValue(float x) {
// doesn't handle NaNs. Also skip extreme values.
bool SkipFPExpectedValue(float x) { return std::isnan(x) || SkipFPValue(x); }
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4Splat) {
WasmRunner<int32_t, float> r(execution_mode);
byte lane_val = 0;
......@@ -446,8 +444,6 @@ WASM_SIMD_TEST(F32x4ReplaceLane) {
CHECK_EQ(1, r.Call(3.14159f, -1.5f));
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
......@@ -474,6 +470,8 @@ WASM_SIMD_TEST(F32x4ConvertI32x4) {
static_cast<float>(static_cast<uint32_t>(*i))));
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
void RunF32x4UnOpTest(WasmExecutionMode execution_mode, WasmOpcode simd_op,
FloatUnOp expected_op, float error = 0.0f) {
......@@ -496,13 +494,19 @@ void RunF32x4UnOpTest(WasmExecutionMode execution_mode, WasmOpcode simd_op,
}
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
WASM_SIMD_TEST(F32x4Abs) {
RunF32x4UnOpTest(execution_mode, kExprF32x4Abs, std::abs);
}
WASM_SIMD_TEST(F32x4Neg) {
RunF32x4UnOpTest(execution_mode, kExprF32x4Neg, Negate);
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
static const float kApproxError = 0.01f;
WASM_SIMD_COMPILED_TEST(F32x4RecipApprox) {
......@@ -555,10 +559,8 @@ WASM_SIMD_TEST(F32x4_Max) {
RunF32x4BinOpTest(execution_mode, kExprF32x4Max, JSMax);
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
void RunF32x4CompareOpTest(WasmExecutionMode execution_mode, WasmOpcode simd_op,
FloatCompareOp expected_op) {
WasmRunner<int32_t, float, float, int32_t> r(execution_mode);
......@@ -607,8 +609,6 @@ WASM_SIMD_TEST(F32x4Lt) {
WASM_SIMD_TEST(F32x4Le) {
RunF32x4CompareOpTest(execution_mode, kExprF32x4Le, LessEqual);
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(I32x4Splat) {
// Store SIMD value in a local variable, use extract lane to check lane values
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment