Commit 4559bd69 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Prototype f32x4 and f64x2 rounding

Implements f32x4 and f64x2 ceil, floor, trunc, nearestint, arm64.

Bug: v8:10553
Change-Id: I346c6e60719ea953ff1adc9d8791768838e57cb9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2213083Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68309}
parent dbc19aff
...@@ -1924,6 +1924,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1924,6 +1924,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break; break;
} }
case kArm64F64x2RoundUp:
__ Frintp(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2D());
break;
case kArm64F64x2RoundDown:
__ Frintm(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2D());
break;
case kArm64F64x2RoundTruncate:
__ Frintz(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2D());
break;
case kArm64F64x2RoundTiesEven:
__ Frintn(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2D());
break;
case kArm64F32x4Splat: { case kArm64F32x4Splat: {
__ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0); __ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
break; break;
...@@ -1997,6 +2013,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1997,6 +2013,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B()); __ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break; break;
} }
case kArm64F32x4RoundUp:
__ Frintp(i.OutputSimd128Register().V4S(),
i.InputSimd128Register(0).V4S());
break;
case kArm64F32x4RoundDown:
__ Frintm(i.OutputSimd128Register().V4S(),
i.InputSimd128Register(0).V4S());
break;
case kArm64F32x4RoundTruncate:
__ Frintz(i.OutputSimd128Register().V4S(),
i.InputSimd128Register(0).V4S());
break;
case kArm64F32x4RoundTiesEven:
__ Frintn(i.OutputSimd128Register().V4S(),
i.InputSimd128Register(0).V4S());
break;
case kArm64I64x2Splat: { case kArm64I64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0)); __ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
break; break;
......
...@@ -188,6 +188,10 @@ namespace compiler { ...@@ -188,6 +188,10 @@ namespace compiler {
V(Arm64F64x2Qfms) \ V(Arm64F64x2Qfms) \
V(Arm64F64x2Pmin) \ V(Arm64F64x2Pmin) \
V(Arm64F64x2Pmax) \ V(Arm64F64x2Pmax) \
V(Arm64F64x2RoundUp) \
V(Arm64F64x2RoundDown) \
V(Arm64F64x2RoundTruncate) \
V(Arm64F64x2RoundTiesEven) \
V(Arm64F32x4Splat) \ V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \ V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \ V(Arm64F32x4ReplaceLane) \
...@@ -213,6 +217,10 @@ namespace compiler { ...@@ -213,6 +217,10 @@ namespace compiler {
V(Arm64F32x4Qfms) \ V(Arm64F32x4Qfms) \
V(Arm64F32x4Pmin) \ V(Arm64F32x4Pmin) \
V(Arm64F32x4Pmax) \ V(Arm64F32x4Pmax) \
V(Arm64F32x4RoundUp) \
V(Arm64F32x4RoundDown) \
V(Arm64F32x4RoundTruncate) \
V(Arm64F32x4RoundTiesEven) \
V(Arm64I64x2Splat) \ V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \ V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \ V(Arm64I64x2ReplaceLane) \
......
...@@ -158,6 +158,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -158,6 +158,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2Qfms: case kArm64F64x2Qfms:
case kArm64F64x2Pmin: case kArm64F64x2Pmin:
case kArm64F64x2Pmax: case kArm64F64x2Pmax:
case kArm64F64x2RoundUp:
case kArm64F64x2RoundDown:
case kArm64F64x2RoundTruncate:
case kArm64F64x2RoundTiesEven:
case kArm64F32x4Splat: case kArm64F32x4Splat:
case kArm64F32x4ExtractLane: case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane: case kArm64F32x4ReplaceLane:
...@@ -183,6 +187,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -183,6 +187,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4Qfms: case kArm64F32x4Qfms:
case kArm64F32x4Pmin: case kArm64F32x4Pmin:
case kArm64F32x4Pmax: case kArm64F32x4Pmax:
case kArm64F32x4RoundUp:
case kArm64F32x4RoundDown:
case kArm64F32x4RoundTruncate:
case kArm64F32x4RoundTiesEven:
case kArm64I64x2Splat: case kArm64I64x2Splat:
case kArm64I64x2ExtractLane: case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane: case kArm64I64x2ReplaceLane:
......
...@@ -1365,7 +1365,15 @@ void InstructionSelector::VisitWord64Ror(Node* node) { ...@@ -1365,7 +1365,15 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
V(Float64RoundTiesEven, kArm64Float64RoundTiesEven) \ V(Float64RoundTiesEven, kArm64Float64RoundTiesEven) \
V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \ V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \
V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \ V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \
V(Float64SilenceNaN, kArm64Float64SilenceNaN) V(Float64SilenceNaN, kArm64Float64SilenceNaN) \
V(F32x4Ceil, kArm64F32x4RoundUp) \
V(F32x4Floor, kArm64F32x4RoundDown) \
V(F32x4Trunc, kArm64F32x4RoundTruncate) \
V(F32x4NearestInt, kArm64F32x4RoundTiesEven) \
V(F64x2Ceil, kArm64F64x2RoundUp) \
V(F64x2Floor, kArm64F64x2RoundDown) \
V(F64x2Trunc, kArm64F64x2RoundTruncate) \
V(F64x2NearestInt, kArm64F64x2RoundTiesEven)
#define RRR_OP_LIST(V) \ #define RRR_OP_LIST(V) \
V(Int32Div, kArm64Idiv32) \ V(Int32Div, kArm64Idiv32) \
......
...@@ -2685,7 +2685,7 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); } ...@@ -2685,7 +2685,7 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && // && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X &&
// !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 // !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
// TODO(v8:10553) Prototyping floating point rounding instructions. // TODO(v8:10553) Prototyping floating point rounding instructions.
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
...@@ -2695,7 +2695,7 @@ void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); } ...@@ -2695,7 +2695,7 @@ void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
...@@ -691,7 +691,7 @@ WASM_SIMD_TEST(F32x4RecipSqrtApprox) { ...@@ -691,7 +691,7 @@ WASM_SIMD_TEST(F32x4RecipSqrtApprox) {
} }
// TODO(v8:10553) Prototyping floating-point rounding instructions. // TODO(v8:10553) Prototyping floating-point rounding instructions.
#if V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F32x4Ceil) { WASM_SIMD_TEST_NO_LOWERING(F32x4Ceil) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Ceil, ceilf, true); RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Ceil, ceilf, true);
...@@ -712,7 +712,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) { ...@@ -712,7 +712,7 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf, RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf,
true); true);
} }
#endif // V8_TARGET_ARCH_X64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatBinOp expected_op) { WasmOpcode opcode, FloatBinOp expected_op) {
...@@ -1326,7 +1326,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) { ...@@ -1326,7 +1326,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) {
} }
// TODO(v8:10553) Prototyping floating-point rounding instructions. // TODO(v8:10553) Prototyping floating-point rounding instructions.
#if V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2Ceil) { WASM_SIMD_TEST_NO_LOWERING(F64x2Ceil) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Ceil, ceil, true); RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Ceil, ceil, true);
...@@ -1347,7 +1347,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2NearestInt) { ...@@ -1347,7 +1347,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2NearestInt) {
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2NearestInt, nearbyint, RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2NearestInt, nearbyint,
true); true);
} }
#endif // V8_TARGET_ARCH_X64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleBinOp expected_op) { WasmOpcode opcode, DoubleBinOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment