Commit 667fafce authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Reland "[wasm-simd] Prototype f64x2 rounding instructions"

This is a reland of f7f72b7b

This was reverted because of a test timing out on slow_path
variant (https://crrev.com/c/2237131 for details). Turns out
the test is just really slow, and was skipped on this variant
in https://crrev.com/c/2237628. Relanding without changes.


Original change's description:
> [wasm-simd] Prototype f64x2 rounding instructions
>
> Implements f64x2 ceil, floor, trunc, nearestint, for interpreter and
> x64.
>
> Bug: v8:10553
> Change-Id: I12a260a3b1d728368e5525d317d30fc9581cae04
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2213082
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Tobias Tebbi <tebbi@chromium.org>
> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#68241}

Tbr: tebbi@chromium.org
Bug: v8:10553
Change-Id: I4cdc23d0556f11310d32fa066f40b057fd49d2d7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2237350
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68304}
parent 414522e0
......@@ -3316,6 +3316,13 @@ void Assembler::roundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(!IsEnabled(AVX));
sse4_instr(dst, src, 0x66, 0x0F, 0x3A, 0x09);
// Mask precision exception.
emit(static_cast<byte>(mode) | 0x8);
}
void Assembler::movmskpd(Register dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit(0x66);
......
......@@ -1216,6 +1216,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void cmpps(XMMRegister dst, XMMRegister src, int8_t cmp);
void cmpps(XMMRegister dst, Operand src, int8_t cmp);
......@@ -1437,6 +1438,10 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x08, dst, xmm0, src, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
vinstr(0x09, dst, xmm0, src, k66, k0F3A, kWIG);
emit(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void vsd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(op, dst, src1, src2, kF2, k0F, kWIG);
......
......@@ -282,6 +282,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Pmovzxdq, pmovzxdq)
AVX_OP_SSE4_1(Pextrq, pextrq)
AVX_OP_SSE4_1(Roundps, roundps)
AVX_OP_SSE4_1(Roundpd, roundpd)
AVX_OP_SSE4_1(Roundss, roundss)
AVX_OP_SSE4_1(Roundsd, roundsd)
AVX_OP_SSE4_2(Pcmpgtq, pcmpgtq)
......
......@@ -1889,6 +1889,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Pmin(node);
case IrOpcode::kF64x2Pmax:
return MarkAsSimd128(node), VisitF64x2Pmax(node);
case IrOpcode::kF64x2Ceil:
return MarkAsSimd128(node), VisitF64x2Ceil(node);
case IrOpcode::kF64x2Floor:
return MarkAsSimd128(node), VisitF64x2Floor(node);
case IrOpcode::kF64x2Trunc:
return MarkAsSimd128(node), VisitF64x2Trunc(node);
case IrOpcode::kF64x2NearestInt:
return MarkAsSimd128(node), VisitF64x2NearestInt(node);
case IrOpcode::kF32x4Splat:
return MarkAsSimd128(node), VisitF32x4Splat(node);
case IrOpcode::kF32x4ExtractLane:
......@@ -2679,6 +2687,10 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_X64
// TODO(v8:10553) Prototyping floating point rounding instructions.
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
......
......@@ -2707,6 +2707,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Roundps(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
break;
}
case kX64F64x2Round: {
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
break;
}
case kX64F64x2Pmin: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
......
......@@ -174,6 +174,7 @@ namespace compiler {
V(X64F64x2Qfms) \
V(X64F64x2Pmin) \
V(X64F64x2Pmax) \
V(X64F64x2Round) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
......
......@@ -146,6 +146,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Qfms:
case kX64F64x2Pmin:
case kX64F64x2Pmax:
case kX64F64x2Round:
case kX64F32x4Splat:
case kX64F32x4ExtractLane:
case kX64F32x4ReplaceLane:
......
......@@ -1466,7 +1466,11 @@ void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
V(F32x4Ceil, kX64F32x4Round | MiscField::encode(kRoundUp)) \
V(F32x4Floor, kX64F32x4Round | MiscField::encode(kRoundDown)) \
V(F32x4Trunc, kX64F32x4Round | MiscField::encode(kRoundToZero)) \
V(F32x4NearestInt, kX64F32x4Round | MiscField::encode(kRoundToNearest))
V(F32x4NearestInt, kX64F32x4Round | MiscField::encode(kRoundToNearest)) \
V(F64x2Ceil, kX64F64x2Round | MiscField::encode(kRoundUp)) \
V(F64x2Floor, kX64F64x2Round | MiscField::encode(kRoundDown)) \
V(F64x2Trunc, kX64F64x2Round | MiscField::encode(kRoundToZero)) \
V(F64x2NearestInt, kX64F64x2Round | MiscField::encode(kRoundToNearest))
#define RO_VISITOR(Name, opcode) \
void InstructionSelector::Visit##Name(Node* node) { \
......
......@@ -339,6 +339,10 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(F64x2Qfms, Operator::kNoProperties, 3, 0, 1) \
V(F64x2Pmin, Operator::kNoProperties, 2, 0, 1) \
V(F64x2Pmax, Operator::kNoProperties, 2, 0, 1) \
V(F64x2Ceil, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Floor, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Trunc, Operator::kNoProperties, 1, 0, 1) \
V(F64x2NearestInt, Operator::kNoProperties, 1, 0, 1) \
V(F32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(F32x4SConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
V(F32x4UConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -576,6 +576,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F64x2Qfms();
const Operator* F64x2Pmin();
const Operator* F64x2Pmax();
const Operator* F64x2Ceil();
const Operator* F64x2Floor();
const Operator* F64x2Trunc();
const Operator* F64x2NearestInt();
const Operator* F32x4Splat();
const Operator* F32x4ExtractLane(int32_t);
......
......@@ -765,6 +765,10 @@
V(F64x2Qfms) \
V(F64x2Pmin) \
V(F64x2Pmax) \
V(F64x2Ceil) \
V(F64x2Floor) \
V(F64x2Trunc) \
V(F64x2NearestInt) \
V(F32x4Splat) \
V(F32x4ExtractLane) \
V(F32x4ReplaceLane) \
......
......@@ -4206,6 +4206,15 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF64x2Pmax:
return graph()->NewNode(mcgraph()->machine()->F64x2Pmax(), inputs[0],
inputs[1]);
case wasm::kExprF64x2Ceil:
return graph()->NewNode(mcgraph()->machine()->F64x2Ceil(), inputs[0]);
case wasm::kExprF64x2Floor:
return graph()->NewNode(mcgraph()->machine()->F64x2Floor(), inputs[0]);
case wasm::kExprF64x2Trunc:
return graph()->NewNode(mcgraph()->machine()->F64x2Trunc(), inputs[0]);
case wasm::kExprF64x2NearestInt:
return graph()->NewNode(mcgraph()->machine()->F64x2NearestInt(),
inputs[0]);
case wasm::kExprF32x4Splat:
return graph()->NewNode(mcgraph()->machine()->F32x4Splat(), inputs[0]);
case wasm::kExprF32x4SConvertI32x4:
......
......@@ -952,6 +952,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x09:
AppendToBuffer("vroundpd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x0A:
AppendToBuffer("vroundss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -1851,6 +1856,12 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", (*current) & 3);
current += 1;
} else if (third_byte == 0x09) {
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("roundpd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", (*current) & 3);
current += 1;
} else if (third_byte == 0x0A) {
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("roundss %s,", NameOfXMMRegister(regop));
......
......@@ -2261,6 +2261,10 @@ class WasmInterpreterInternals {
UNOP_CASE(F64x2Abs, f64x2, float2, 2, std::abs(a))
UNOP_CASE(F64x2Neg, f64x2, float2, 2, -a)
UNOP_CASE(F64x2Sqrt, f64x2, float2, 2, std::sqrt(a))
UNOP_CASE(F64x2Ceil, f64x2, float2, 2, ceil(a))
UNOP_CASE(F64x2Floor, f64x2, float2, 2, floor(a))
UNOP_CASE(F64x2Trunc, f64x2, float2, 2, trunc(a))
UNOP_CASE(F64x2NearestInt, f64x2, float2, 2, nearbyint(a))
UNOP_CASE(F32x4Abs, f32x4, float4, 4, std::abs(a))
UNOP_CASE(F32x4Neg, f32x4, float4, 4, -a)
UNOP_CASE(F32x4Sqrt, f32x4, float4, 4, std::sqrt(a))
......
......@@ -330,6 +330,10 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F32x4_OP(Floor, "floor")
CASE_F32x4_OP(Trunc, "trunc")
CASE_F32x4_OP(NearestInt, "nearest")
CASE_F64x2_OP(Ceil, "ceil")
CASE_F64x2_OP(Floor, "floor")
CASE_F64x2_OP(Trunc, "trunc")
CASE_F64x2_OP(NearestInt, "nearest")
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
......
......@@ -434,6 +434,10 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(F64x2Div, 0xfdf3, s_ss) \
V(F64x2Min, 0xfdf4, s_ss) \
V(F64x2Max, 0xfdf5, s_ss) \
V(F64x2Ceil, 0xfddf, s_s) \
V(F64x2Floor, 0xfde2, s_s) \
V(F64x2Trunc, 0xfdee, s_s) \
V(F64x2NearestInt, 0xfdbe, s_s) \
V(I32x4SConvertF32x4, 0xfdf8, s_s) \
V(I32x4UConvertF32x4, 0xfdf9, s_s) \
V(F32x4SConvertI32x4, 0xfdfa, s_s) \
......
......@@ -589,6 +589,7 @@ TEST(DisasmX64) {
__ blendvpd(xmm5, Operand(rdx, 4));
__ roundps(xmm8, xmm3, kRoundUp);
__ roundpd(xmm8, xmm3, kRoundToNearest);
__ roundss(xmm8, xmm3, kRoundDown);
__ roundsd(xmm8, xmm3, kRoundDown);
......@@ -650,6 +651,7 @@ TEST(DisasmX64) {
__ vmovdqu(Operand(rbx, rcx, times_4, 10000), xmm0);
__ vroundps(xmm9, xmm2, kRoundUp);
__ vroundpd(xmm9, xmm2, kRoundToNearest);
__ vroundss(xmm9, xmm1, xmm2, kRoundDown);
__ vroundsd(xmm8, xmm3, xmm0, kRoundDown);
__ vucomisd(xmm9, xmm1);
......
......@@ -1325,6 +1325,30 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Sqrt) {
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Sqrt, Sqrt);
}
// TODO(v8:10553) Prototyping floating-point rounding instructions.
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F64x2Ceil) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Ceil, ceil, true);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2Floor) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Floor, floor, true);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2Trunc) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2Trunc, trunc, true);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2NearestInt) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF64x2UnOpTest(execution_tier, lower_simd, kExprF64x2NearestInt, nearbyint,
true);
}
#endif // V8_TARGET_ARCH_X64
void RunF64x2BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleBinOp expected_op) {
WasmRunner<int32_t, double, double> r(execution_tier, lower_simd);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment