Commit 3363e519 authored by Deepti Gandluri's avatar Deepti Gandluri Committed by Commit Bot

[wasm] Add F32x4{Abs, Neg, AddHoriz}

 - Remove redundant instruction from I16x8Splat
 - Force F32x4Splat to use movss, as using MacroAssembler can mix SSE/AVX
 instructions

Bug: v8:6020
Change-Id: I781c22adecf892a79b6a38c3d83fc4022f9067de
Reviewed-on: https://chromium-review.googlesource.com/898429Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarJaroslav Sevcik <jarin@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#51123}
parent cf9b4873
...@@ -2131,33 +2131,15 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { ...@@ -2131,33 +2131,15 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS #endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 // && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF32x4Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Neg(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \ #if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64 !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) { void InstructionSelector::VisitF32x4RecipSqrtApprox(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF32x4RecipApprox(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64 #endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_X64
// && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 // && !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
...@@ -2170,7 +2152,17 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) { ...@@ -2170,7 +2152,17 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) { void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI32x4AddHoriz(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) { void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
......
...@@ -2066,9 +2066,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2066,9 +2066,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64F32x4Splat: { case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsFPRegister()) { if (instr->InputAt(0)->IsFPRegister()) {
__ Movss(dst, i.InputDoubleRegister(0)); __ movss(dst, i.InputDoubleRegister(0));
} else { } else {
__ Movss(dst, i.InputOperand(0)); __ movss(dst, i.InputOperand(0));
} }
__ shufps(dst, dst, 0x0); __ shufps(dst, dst, 0x0);
break; break;
...@@ -2087,6 +2087,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2087,6 +2087,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select); __ insertps(i.OutputSimd128Register(), i.InputDoubleRegister(2), select);
break; break;
} }
case kX64F32x4Abs: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrld(kScratchDoubleReg, 1);
__ andps(i.OutputSimd128Register(), kScratchDoubleReg);
} else {
__ pcmpeqd(dst, dst);
__ psrld(dst, 1);
__ andps(dst, i.InputSimd128Register(0));
}
break;
}
case kX64F32x4Neg: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (dst == src) {
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pslld(kScratchDoubleReg, 31);
__ xorps(i.OutputSimd128Register(), kScratchDoubleReg);
} else {
__ pcmpeqd(dst, dst);
__ pslld(dst, 31);
__ xorps(dst, i.InputSimd128Register(0));
}
break;
}
case kX64F32x4RecipApprox: { case kX64F32x4RecipApprox: {
__ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ rcpps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break; break;
...@@ -2100,6 +2128,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2100,6 +2128,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ addps(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ addps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break; break;
} }
case kX64F32x4AddHoriz: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ haddps(i.OutputSimd128Register(), i.InputSimd128Register(1));
break;
}
case kX64F32x4Sub: { case kX64F32x4Sub: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0)); DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ subps(i.OutputSimd128Register(), i.InputSimd128Register(1)); __ subps(i.OutputSimd128Register(), i.InputSimd128Register(1));
...@@ -2273,7 +2306,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2273,7 +2306,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ movd(dst, i.InputRegister(0)); __ movd(dst, i.InputRegister(0));
__ pshuflw(dst, dst, 0x0); __ pshuflw(dst, dst, 0x0);
__ pshufhw(dst, dst, 0x0);
__ pshufd(dst, dst, 0x0); __ pshufd(dst, dst, 0x0);
break; break;
} }
......
...@@ -149,9 +149,12 @@ namespace compiler { ...@@ -149,9 +149,12 @@ namespace compiler {
V(X64F32x4Splat) \ V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \ V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \ V(X64F32x4ReplaceLane) \
V(X64F32x4Abs) \
V(X64F32x4Neg) \
V(X64F32x4RecipApprox) \ V(X64F32x4RecipApprox) \
V(X64F32x4RecipSqrtApprox) \ V(X64F32x4RecipSqrtApprox) \
V(X64F32x4Add) \ V(X64F32x4Add) \
V(X64F32x4AddHoriz) \
V(X64F32x4Sub) \ V(X64F32x4Sub) \
V(X64F32x4Mul) \ V(X64F32x4Mul) \
V(X64F32x4Min) \ V(X64F32x4Min) \
......
...@@ -128,7 +128,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -128,7 +128,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4ReplaceLane: case kX64F32x4ReplaceLane:
case kX64F32x4RecipApprox: case kX64F32x4RecipApprox:
case kX64F32x4RecipSqrtApprox: case kX64F32x4RecipSqrtApprox:
case kX64F32x4Abs:
case kX64F32x4Neg:
case kX64F32x4Add: case kX64F32x4Add:
case kX64F32x4AddHoriz:
case kX64F32x4Sub: case kX64F32x4Sub:
case kX64F32x4Mul: case kX64F32x4Mul:
case kX64F32x4Min: case kX64F32x4Min:
......
...@@ -2374,6 +2374,7 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -2374,6 +2374,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_BINOP_LIST(V) \ #define SIMD_BINOP_LIST(V) \
V(F32x4Add) \ V(F32x4Add) \
V(F32x4AddHoriz) \
V(F32x4Sub) \ V(F32x4Sub) \
V(F32x4Mul) \ V(F32x4Mul) \
V(F32x4Min) \ V(F32x4Min) \
...@@ -2435,6 +2436,8 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -2435,6 +2436,8 @@ VISIT_ATOMIC_BINOP(Xor)
V(S128Xor) V(S128Xor)
#define SIMD_UNOP_LIST(V) \ #define SIMD_UNOP_LIST(V) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4RecipApprox) \ V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \ V(F32x4RecipSqrtApprox) \
V(I32x4Neg) \ V(I32x4Neg) \
......
...@@ -494,16 +494,12 @@ void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op, ...@@ -494,16 +494,12 @@ void RunF32x4UnOpTest(LowerSimd lower_simd, WasmOpcode simd_op,
} }
} }
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST(F32x4Abs) { WASM_SIMD_TEST(F32x4Abs) {
RunF32x4UnOpTest(lower_simd, kExprF32x4Abs, std::abs); RunF32x4UnOpTest(lower_simd, kExprF32x4Abs, std::abs);
} }
WASM_SIMD_TEST(F32x4Neg) { WASM_SIMD_TEST(F32x4Neg) {
RunF32x4UnOpTest(lower_simd, kExprF32x4Neg, Negate); RunF32x4UnOpTest(lower_simd, kExprF32x4Neg, Negate);
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \ #if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64 V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_X64
...@@ -1621,16 +1617,16 @@ WASM_SIMD_COMPILED_TEST(I16x8AddHoriz) { ...@@ -1621,16 +1617,16 @@ WASM_SIMD_COMPILED_TEST(I16x8AddHoriz) {
RunBinaryLaneOpTest<int16_t>(lower_simd, kExprI16x8AddHoriz, RunBinaryLaneOpTest<int16_t>(lower_simd, kExprI16x8AddHoriz,
{{1, 5, 9, 13, 17, 21, 25, 29}}); {{1, 5, 9, 13, 17, 21, 25, 29}});
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
WASM_SIMD_COMPILED_TEST(F32x4AddHoriz) { WASM_SIMD_COMPILED_TEST(F32x4AddHoriz) {
RunBinaryLaneOpTest<float>(lower_simd, kExprF32x4AddHoriz, RunBinaryLaneOpTest<float>(lower_simd, kExprF32x4AddHoriz,
{{1.0f, 5.0f, 9.0f, 13.0f}}); {{1.0f, 5.0f, 9.0f, 13.0f}});
} }
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64 ||
// V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
// Test some regular shuffles that may have special handling on some targets. // Test some regular shuffles that may have special handling on some targets.
// Test a normal and unary versions (where second operand isn't used). // Test a normal and unary versions (where second operand isn't used).
WASM_SIMD_COMPILED_TEST(S32x4Dup) { WASM_SIMD_COMPILED_TEST(S32x4Dup) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment