Commit 6b24d5d7 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm] Prototype f64x2.ceil

Prototype f64x2.ceil on ARM for both ARM v7 and ARM v8. ARM v8 has
support for vrintp, and for ARM v7 we fallback to runtime.

Since ARM v8 uses vrintp, which is the same instruction used for
Float64RoundUp (scalar), wasm-compiler reuses the Float64RoundUp check.

Bug: v8:10553
Change-Id: I5841c6a06f260debe8ae90d331bdcc2a0fa3278c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2258813Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68553}
parent 1e4282db
...@@ -297,6 +297,7 @@ FUNCTION_REFERENCE(wasm_word32_rol, wasm::word32_rol_wrapper) ...@@ -297,6 +297,7 @@ FUNCTION_REFERENCE(wasm_word32_rol, wasm::word32_rol_wrapper)
FUNCTION_REFERENCE(wasm_word32_ror, wasm::word32_ror_wrapper) FUNCTION_REFERENCE(wasm_word32_ror, wasm::word32_ror_wrapper)
FUNCTION_REFERENCE(wasm_word64_rol, wasm::word64_rol_wrapper) FUNCTION_REFERENCE(wasm_word64_rol, wasm::word64_rol_wrapper)
FUNCTION_REFERENCE(wasm_word64_ror, wasm::word64_ror_wrapper) FUNCTION_REFERENCE(wasm_word64_ror, wasm::word64_ror_wrapper)
FUNCTION_REFERENCE(wasm_f64x2_ceil, wasm::f64x2_ceil_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_ceil, wasm::f32x4_ceil_wrapper) FUNCTION_REFERENCE(wasm_f32x4_ceil, wasm::f32x4_ceil_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_floor, wasm::f32x4_floor_wrapper) FUNCTION_REFERENCE(wasm_f32x4_floor, wasm::f32x4_floor_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_trunc, wasm::f32x4_trunc_wrapper) FUNCTION_REFERENCE(wasm_f32x4_trunc, wasm::f32x4_trunc_wrapper)
......
...@@ -206,6 +206,7 @@ class StatsCounter; ...@@ -206,6 +206,7 @@ class StatsCounter;
V(wasm_word64_ror, "wasm::word64_ror") \ V(wasm_word64_ror, "wasm::word64_ror") \
V(wasm_word64_ctz, "wasm::word64_ctz") \ V(wasm_word64_ctz, "wasm::word64_ctz") \
V(wasm_word64_popcnt, "wasm::word64_popcnt") \ V(wasm_word64_popcnt, "wasm::word64_popcnt") \
V(wasm_f64x2_ceil, "wasm::f64x2_ceil_wrapper") \
V(wasm_f32x4_ceil, "wasm::f32x4_ceil_wrapper") \ V(wasm_f32x4_ceil, "wasm::f32x4_ceil_wrapper") \
V(wasm_f32x4_floor, "wasm::f32x4_floor_wrapper") \ V(wasm_f32x4_floor, "wasm::f32x4_floor_wrapper") \
V(wasm_f32x4_trunc, "wasm::f32x4_trunc_wrapper") \ V(wasm_f32x4_trunc, "wasm::f32x4_trunc_wrapper") \
......
...@@ -2035,6 +2035,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2035,6 +2035,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmov(dst.high(), rhs.high(), gt); __ vmov(dst.high(), rhs.high(), gt);
break; break;
} }
case kArmF64x2Ceil: {
CpuFeatureScope scope(tasm(), ARMv8);
Simd128Register dst = i.OutputSimd128Register();
Simd128Register src = i.InputSimd128Register(0);
__ vrintp(dst.low(), src.low());
__ vrintp(dst.high(), src.high());
break;
}
case kArmI64x2SplatI32Pair: { case kArmI64x2SplatI32Pair: {
Simd128Register dst = i.OutputSimd128Register(); Simd128Register dst = i.OutputSimd128Register();
__ vdup(Neon32, dst, i.InputRegister(0)); __ vdup(Neon32, dst, i.InputRegister(0));
......
...@@ -146,6 +146,7 @@ namespace compiler { ...@@ -146,6 +146,7 @@ namespace compiler {
V(ArmF64x2Le) \ V(ArmF64x2Le) \
V(ArmF64x2Pmin) \ V(ArmF64x2Pmin) \
V(ArmF64x2Pmax) \ V(ArmF64x2Pmax) \
V(ArmF64x2Ceil) \
V(ArmF32x4Splat) \ V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \ V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \ V(ArmF32x4ReplaceLane) \
......
...@@ -126,6 +126,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -126,6 +126,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmF64x2Le: case kArmF64x2Le:
case kArmF64x2Pmin: case kArmF64x2Pmin:
case kArmF64x2Pmax: case kArmF64x2Pmax:
case kArmF64x2Ceil:
case kArmF32x4Splat: case kArmF32x4Splat:
case kArmF32x4ExtractLane: case kArmF32x4ExtractLane:
case kArmF32x4ReplaceLane: case kArmF32x4ReplaceLane:
......
...@@ -1496,6 +1496,7 @@ void InstructionSelector::VisitUint32Mod(Node* node) { ...@@ -1496,6 +1496,7 @@ void InstructionSelector::VisitUint32Mod(Node* node) {
V(Float64RoundTiesAway, kArmVrintaF64) \ V(Float64RoundTiesAway, kArmVrintaF64) \
V(Float32RoundTiesEven, kArmVrintnF32) \ V(Float32RoundTiesEven, kArmVrintnF32) \
V(Float64RoundTiesEven, kArmVrintnF64) \ V(Float64RoundTiesEven, kArmVrintnF64) \
V(F64x2Ceil, kArmF64x2Ceil) \
V(F32x4Ceil, kArmVrintpF32) \ V(F32x4Ceil, kArmVrintpF32) \
V(F32x4Floor, kArmVrintmF32) \ V(F32x4Floor, kArmVrintmF32) \
V(F32x4Trunc, kArmVrintzF32) \ V(F32x4Trunc, kArmVrintzF32) \
......
...@@ -2693,12 +2693,12 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); } ...@@ -2693,12 +2693,12 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
// TODO(zhin): Temporary convoluted way to for unimplemented opcodes on ARM as // TODO(zhin): Temporary convoluted way to for unimplemented opcodes on ARM as
// we are implementing them one at a time. // we are implementing them one at a time.
#if !V8_TARGET_ARCH_ARM #if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM #endif // !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); }
......
...@@ -4040,6 +4040,12 @@ Node* WasmGraphBuilder::BuildAsmjsStoreMem(MachineType type, Node* index, ...@@ -4040,6 +4040,12 @@ Node* WasmGraphBuilder::BuildAsmjsStoreMem(MachineType type, Node* index,
return val; return val;
} }
Node* WasmGraphBuilder::BuildF64x2Ceil(Node* input) {
MachineType type = MachineType::Simd128();
ExternalReference ref = ExternalReference::wasm_f64x2_ceil();
return BuildCFuncInstruction(ref, type, input);
}
Node* WasmGraphBuilder::BuildF32x4Ceil(Node* input) { Node* WasmGraphBuilder::BuildF32x4Ceil(Node* input) {
MachineType type = MachineType::Simd128(); MachineType type = MachineType::Simd128();
ExternalReference ref = ExternalReference::wasm_f32x4_ceil(); ExternalReference ref = ExternalReference::wasm_f32x4_ceil();
...@@ -4225,6 +4231,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) { ...@@ -4225,6 +4231,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return graph()->NewNode(mcgraph()->machine()->F64x2Pmax(), inputs[0], return graph()->NewNode(mcgraph()->machine()->F64x2Pmax(), inputs[0],
inputs[1]); inputs[1]);
case wasm::kExprF64x2Ceil: case wasm::kExprF64x2Ceil:
// Architecture support for F64x2Ceil and Float64RoundUp is the same.
if (!mcgraph()->machine()->Float64RoundUp().IsSupported())
return BuildF64x2Ceil(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F64x2Ceil(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->F64x2Ceil(), inputs[0]);
case wasm::kExprF64x2Floor: case wasm::kExprF64x2Floor:
return graph()->NewNode(mcgraph()->machine()->F64x2Floor(), inputs[0]); return graph()->NewNode(mcgraph()->machine()->F64x2Floor(), inputs[0]);
......
...@@ -557,6 +557,7 @@ class WasmGraphBuilder { ...@@ -557,6 +557,7 @@ class WasmGraphBuilder {
Node* BuildAsmjsStoreMem(MachineType type, Node* index, Node* val); Node* BuildAsmjsStoreMem(MachineType type, Node* index, Node* val);
// Wasm SIMD. // Wasm SIMD.
Node* BuildF64x2Ceil(Node* input);
Node* BuildF32x4Ceil(Node* input); Node* BuildF32x4Ceil(Node* input);
Node* BuildF32x4Floor(Node* input); Node* BuildF32x4Floor(Node* input);
Node* BuildF32x4Trunc(Node* input); Node* BuildF32x4Trunc(Node* input);
......
...@@ -411,6 +411,10 @@ void simd_float_round_wrapper(Address data) { ...@@ -411,6 +411,10 @@ void simd_float_round_wrapper(Address data) {
} }
} }
void f64x2_ceil_wrapper(Address data) {
simd_float_round_wrapper<double, &ceil>(data);
}
void f32x4_ceil_wrapper(Address data) { void f32x4_ceil_wrapper(Address data) {
simd_float_round_wrapper<float, &ceilf>(data); simd_float_round_wrapper<float, &ceilf>(data);
} }
......
...@@ -79,6 +79,8 @@ V8_EXPORT_PRIVATE void word64_ror_wrapper(Address data); ...@@ -79,6 +79,8 @@ V8_EXPORT_PRIVATE void word64_ror_wrapper(Address data);
V8_EXPORT_PRIVATE void float64_pow_wrapper(Address data); V8_EXPORT_PRIVATE void float64_pow_wrapper(Address data);
V8_EXPORT_PRIVATE void f64x2_ceil_wrapper(Address data);
V8_EXPORT_PRIVATE void f32x4_ceil_wrapper(Address data); V8_EXPORT_PRIVATE void f32x4_ceil_wrapper(Address data);
V8_EXPORT_PRIVATE void f32x4_floor_wrapper(Address data); V8_EXPORT_PRIVATE void f32x4_floor_wrapper(Address data);
......
...@@ -1328,12 +1328,15 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) { ...@@ -1328,12 +1328,15 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) {
} }
// TODO(v8:10553) Prototyping floating-point rounding instructions. // TODO(v8:10553) Prototyping floating-point rounding instructions.
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
V8_TARGET_ARCH_ARM
WASM_SIMD_TEST_NO_LOWERING(F64x2Ceil) { WASM_SIMD_TEST_NO_LOWERING(F64x2Ceil) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Ceil, ceil, true); RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Ceil, ceil, true);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
// V8_TARGET_ARCH_ARM
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X
WASM_SIMD_TEST_NO_LOWERING(F64x2Floor) { WASM_SIMD_TEST_NO_LOWERING(F64x2Floor) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Floor, floor, true); RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Floor, floor, true);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment