Commit b7d14790 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Prototype f32x4 rounding instructions

Implements f32x4 ceil, floor, trunc, nearestint, for interpreter and
x64.

Bug: v8:10553
Change-Id: Iab747cbd2a872aa6cd4ad23c5b8334d5c8e4da61
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2212435Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68054}
parent 9036662f
......@@ -3424,6 +3424,13 @@ void Assembler::roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(!IsEnabled(AVX));
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x08);
// Mask precision exception.
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::movmskpd(Register dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
......
......@@ -1141,6 +1141,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
void cmpps(XMMRegister dst, Operand src, int8_t cmp);
......@@ -1358,6 +1359,11 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vroundps(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode) {
vinstr(0x08, dst, src1, src2, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
......
......@@ -176,6 +176,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Cmpnlepd, cmpnlepd)
AVX_OP(Roundss, roundss)
AVX_OP(Roundsd, roundsd)
AVX_OP(Roundps, roundps)
AVX_OP(Sqrtss, sqrtss)
AVX_OP(Sqrtsd, sqrtsd)
AVX_OP(Sqrtps, sqrtps)
......
......@@ -1939,6 +1939,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Pmin(node);
case IrOpcode::kF32x4Pmax:
return MarkAsSimd128(node), VisitF32x4Pmax(node);
case IrOpcode::kF32x4Ceil:
return MarkAsSimd128(node), VisitF32x4Ceil(node);
case IrOpcode::kF32x4Floor:
return MarkAsSimd128(node), VisitF32x4Floor(node);
case IrOpcode::kF32x4Trunc:
return MarkAsSimd128(node), VisitF32x4Trunc(node);
case IrOpcode::kF32x4NearestInt:
return MarkAsSimd128(node), VisitF32x4NearestInt(node);
case IrOpcode::kI64x2Splat:
return MarkAsSimd128(node), VisitI64x2Splat(node);
case IrOpcode::kI64x2SplatI32Pair:
......@@ -2662,6 +2670,14 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_IA32
// && !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64
// TODO(v8:10553) Prototyping floating point rounding instructions.
void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -2619,6 +2619,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Maxps(dst, i.InputSimd128Register(1));
break;
}
case kX64F32x4Round: {
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
break;
}
case kX64F64x2Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
......
......@@ -199,6 +199,7 @@ namespace compiler {
V(X64F32x4Qfms) \
V(X64F32x4Pmin) \
V(X64F32x4Pmax) \
V(X64F32x4Round) \
V(X64I64x2Splat) \
V(X64I64x2ExtractLane) \
V(X64I64x2ReplaceLane) \
......
......@@ -171,6 +171,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Qfms:
case kX64F32x4Pmin:
case kX64F32x4Pmax:
case kX64F32x4Round:
case kX64I64x2Splat:
case kX64I64x2ExtractLane:
case kX64I64x2ReplaceLane:
......
......@@ -1461,7 +1461,12 @@ void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTiesEven, \
kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \
V(Float64RoundTiesEven, kSSEFloat64Round | MiscField::encode(kRoundToNearest))
V(Float64RoundTiesEven, \
kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \
V(F32x4Ceil, kX64F32x4Round | MiscField::encode(kRoundUp)) \
V(F32x4Floor, kX64F32x4Round | MiscField::encode(kRoundDown)) \
V(F32x4Trunc, kX64F32x4Round | MiscField::encode(kRoundToZero)) \
V(F32x4NearestInt, kX64F32x4Round | MiscField::encode(kRoundToNearest))
#define RO_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
......
......@@ -362,6 +362,10 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(F32x4Qfms, Operator::kNoProperties, 3, 0, 1) \
V(F32x4Pmin, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Pmax, Operator::kNoProperties, 2, 0, 1) \
V(F32x4Ceil, Operator::kNoProperties, 1, 0, 1) \
V(F32x4Floor, Operator::kNoProperties, 1, 0, 1) \
V(F32x4Trunc, Operator::kNoProperties, 1, 0, 1) \
V(F32x4NearestInt, Operator::kNoProperties, 1, 0, 1) \
V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(I64x2SplatI32Pair, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Neg, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -602,6 +602,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4Qfms();
const Operator* F32x4Pmin();
const Operator* F32x4Pmax();
const Operator* F32x4Ceil();
const Operator* F32x4Floor();
const Operator* F32x4Trunc();
const Operator* F32x4NearestInt();
const Operator* I64x2Splat();
const Operator* I64x2SplatI32Pair();
......
......@@ -792,6 +792,10 @@
V(F32x4Qfms) \
V(F32x4Pmin) \
V(F32x4Pmax) \
V(F32x4Ceil) \
V(F32x4Floor) \
V(F32x4Trunc) \
V(F32x4NearestInt) \
V(I64x2Splat) \
V(I64x2SplatI32Pair) \
V(I64x2ExtractLane) \
......
......@@ -4254,6 +4254,15 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF32x4Pmax:
return graph()->NewNode(mcgraph()->machine()->F32x4Pmax(), inputs[0],
inputs[1]);
case wasm::kExprF32x4Ceil:
return graph()->NewNode(mcgraph()->machine()->F32x4Ceil(), inputs[0]);
case wasm::kExprF32x4Floor:
return graph()->NewNode(mcgraph()->machine()->F32x4Floor(), inputs[0]);
case wasm::kExprF32x4Trunc:
return graph()->NewNode(mcgraph()->machine()->F32x4Trunc(), inputs[0]);
case wasm::kExprF32x4NearestInt:
return graph()->NewNode(mcgraph()->machine()->F32x4NearestInt(),
inputs[0]);
case wasm::kExprI64x2Splat:
return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]);
case wasm::kExprI64x2Neg:
......
......@@ -2341,6 +2341,10 @@ class ThreadImpl {
UNOP_CASE(F32x4Sqrt, f32x4, float4, 4, std::sqrt(a))
UNOP_CASE(F32x4RecipApprox, f32x4, float4, 4, base::Recip(a))
UNOP_CASE(F32x4RecipSqrtApprox, f32x4, float4, 4, base::RecipSqrt(a))
UNOP_CASE(F32x4Ceil, f32x4, float4, 4, ceilf(a))
UNOP_CASE(F32x4Floor, f32x4, float4, 4, floorf(a))
UNOP_CASE(F32x4Trunc, f32x4, float4, 4, truncf(a))
UNOP_CASE(F32x4NearestInt, f32x4, float4, 4, nearbyintf(a))
UNOP_CASE(I64x2Neg, i64x2, int2, 2, base::NegateWithWraparound(a))
UNOP_CASE(I32x4Neg, i32x4, int4, 4, base::NegateWithWraparound(a))
UNOP_CASE(I32x4Abs, i32x4, int4, 4, std::abs(a))
......
......@@ -326,6 +326,11 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F64x2_OP(Pmin, "pmin")
CASE_F64x2_OP(Pmax, "pmax")
CASE_F32x4_OP(Ceil, "ceil")
CASE_F32x4_OP(Floor, "floor")
CASE_F32x4_OP(Trunc, "trunc")
CASE_F32x4_OP(NearestInt, "nearest")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
CASE_INT_OP(AtomicWait, "atomic.wait")
......
......@@ -424,6 +424,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(F32x4Div, 0xfde7, s_ss) \
V(F32x4Min, 0xfde8, s_ss) \
V(F32x4Max, 0xfde9, s_ss) \
V(F32x4Ceil, 0xfdda, s_s) \
V(F32x4Floor, 0xfddb, s_s) \
V(F32x4Trunc, 0xfddc, s_s) \
V(F32x4NearestInt, 0xfdde, s_s) \
V(F64x2Abs, 0xfdec, s_s) \
V(F64x2Neg, 0xfded, s_s) \
V(F64x2Sqrt, 0xfdef, s_s) \
......
......@@ -690,6 +690,30 @@ WASM_SIMD_TEST(F32x4RecipSqrtApprox) {
base::RecipSqrt, false /* !exact */);
}
// TODO(v8:10553) Prototyping floating-point rounding instructions.
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F32x4Ceil) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Ceil, ceilf, true);
}
WASM_SIMD_TEST_NO_LOWERING(F32x4Floor) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Floor, floorf, true);
}
WASM_SIMD_TEST_NO_LOWERING(F32x4Trunc) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Trunc, truncf, true);
}
WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf,
true);
}
#endif // V8_TARGET_ARCH_X64
void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatBinOp expected_op) {
WasmRunner<int32_t, float, float> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment