Commit d81888bc authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[relaxed-simd][wasm] Prototype relaxed int-float trunc

4 instructions, int32x4.trunc_f32x4_{s,u},
int32x4.trunc_f64x2_{s,u}_zero.

Drive-by cleanup to wasm-interpreter to use saturated_cast.

The machine ops are named <int>Trunc<float>, dropping the "sat" since
these don't do any saturation anymore.

Bug: v8:12284
Change-Id: I2d4d6a61b819b287fee69e3eea03dd3151cfa10d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3223166Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77598}
parent 715d5a3a
......@@ -242,6 +242,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP(Cvtps2pd, cvtps2pd)
AVX_OP(Cvtsd2ss, cvtsd2ss)
AVX_OP(Cvtss2sd, cvtss2sd)
AVX_OP(Cvttpd2dq, cvttpd2dq)
AVX_OP(Cvttps2dq, cvttps2dq)
AVX_OP(Cvttsd2si, cvttsd2si)
AVX_OP(Cvttss2si, cvttss2si)
......@@ -728,6 +729,68 @@ class V8_EXPORT_PRIVATE SharedTurboAssemblerBase : public SharedTurboAssembler {
}
}
void I32x4TruncF64x2UZero(XMMRegister dst, XMMRegister src, Register tmp,
XMMRegister scratch) {
// TODO(zhin): call this from I32x4TruncSatF64x2UZero.
ASM_CODE_COMMENT(this);
if (dst != src && !CpuFeatures::IsSupported(AVX)) {
movaps(dst, src);
src = dst;
}
// Same as I32x4TruncSatF64x2UZero but without the saturation.
Roundpd(dst, src, kRoundToZero);
// Add to special double where significant bits == uint32.
Addpd(dst, dst,
ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52(), tmp));
// Extract low 32 bits of each double's significand, zero top lanes.
// dst = [dst[0], dst[2], 0, 0]
Shufps(dst, dst, scratch, 0x88);
}
void I32x4TruncF32x4U(XMMRegister dst, XMMRegister src, Register scratch,
XMMRegister tmp) {
ASM_CODE_COMMENT(this);
Operand int32_overflow_op = ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_overflow_as_float(), scratch);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vcmpltps(tmp, src, int32_overflow_op);
} else {
movaps(tmp, src);
cmpltps(tmp, int32_overflow_op);
}
// In tmp, lanes < INT32_MAX are left alone, other lanes are zeroed.
Pand(tmp, src);
// tmp = src with all the valid conversions
if (dst != src) {
Movaps(dst, src);
}
// In dst, lanes < INT32_MAX are zeroed, other lanes left alone.
Pxor(dst, tmp);
// tmp contains only lanes which can be converted correctly (<INT32_MAX)
Cvttps2dq(tmp, tmp);
// Bit-trick follows:
// All integers from INT32_MAX to UINT32_MAX that are representable as
// floats lie between [0x4f00'0000,0x4f80'0000).
// The bit representation of the integers is actually shifted right by 8.
// For example given 2147483904.0f (which fits in UINT32_MAX):
//
// 01001111 000000000 000000000 000000001 (float 0x4f00'0001)
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
// these are exactly the top 24 bits of the int representation
// but needs the top bit to be flipped
// 10000000 000000000 000000001 000000000 (int 0x8000'0100)
//
// So what needs to be done is to flip bit 23, which is the lowest bit of
// the exponent, which means multiply by 2 (or addps to itself).
Addps(dst, dst, dst);
// Then shift to get the bit representation of the int.
Pslld(dst, byte{8});
// Merge the converted lanes and bit shifted lanes.
Paddd(dst, tmp);
}
void I32x4ExtAddPairwiseI16x8S(XMMRegister dst, XMMRegister src,
Register scratch) {
ASM_CODE_COMMENT(this);
......
......@@ -2368,6 +2368,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2RelaxedMin(node);
case IrOpcode::kF64x2RelaxedMax:
return MarkAsSimd128(node), VisitF64x2RelaxedMax(node);
case IrOpcode::kI32x4RelaxedTruncF64x2SZero:
return MarkAsSimd128(node), VisitI32x4RelaxedTruncF64x2SZero(node);
case IrOpcode::kI32x4RelaxedTruncF64x2UZero:
return MarkAsSimd128(node), VisitI32x4RelaxedTruncF64x2UZero(node);
case IrOpcode::kI32x4RelaxedTruncF32x4S:
return MarkAsSimd128(node), VisitI32x4RelaxedTruncF32x4S(node);
case IrOpcode::kI32x4RelaxedTruncF32x4U:
return MarkAsSimd128(node), VisitI32x4RelaxedTruncF32x4U(node);
default:
FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(),
node->op()->mnemonic(), node->id());
......@@ -2798,6 +2806,18 @@ void InstructionSelector::VisitF32x4RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RelaxedMax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -4074,6 +4074,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kX64I32x4TruncF64x2UZero: {
__ I32x4TruncF64x2UZero(i.OutputSimd128Register(),
i.InputSimd128Register(0), kScratchRegister,
kScratchDoubleReg);
break;
}
case kX64I32x4TruncF32x4U: {
__ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
kScratchRegister, kScratchDoubleReg);
break;
}
case kX64Cvttps2dq: {
__ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64Cvttpd2dq: {
__ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kAtomicStoreWord8: {
ASSEMBLE_SEQ_CST_STORE(MachineRepresentation::kWord8);
break;
......
......@@ -174,6 +174,10 @@ namespace compiler {
V(X64Push) \
V(X64Poke) \
V(X64Peek) \
V(X64Cvttps2dq) \
V(X64Cvttpd2dq) \
V(X64I32x4TruncF64x2UZero) \
V(X64I32x4TruncF32x4U) \
V(X64F64x2Splat) \
V(X64F64x2ExtractLane) \
V(X64F64x2ReplaceLane) \
......
......@@ -126,6 +126,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64Pinsrw:
case kX64Pinsrd:
case kX64Pinsrq:
case kX64Cvttps2dq:
case kX64Cvttpd2dq:
case kX64I32x4TruncF64x2UZero:
case kX64I32x4TruncF32x4U:
case kX64F64x2Splat:
case kX64F64x2ExtractLane:
case kX64F64x2ReplaceLane:
......
......@@ -3872,6 +3872,22 @@ void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
Emit(kX64I32x4TruncSatF64x2UZero, dst, g.UseRegister(node->InputAt(0)));
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kX64Cvttpd2dq);
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kX64I32x4TruncF64x2UZero);
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kX64Cvttps2dq);
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kX64I32x4TruncF32x4U);
}
void InstructionSelector::VisitI64x2GtS(Node* node) {
X64OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {
......
......@@ -605,7 +605,11 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(F32x4RelaxedMin, Operator::kNoProperties, 2, 0, 1) \
V(F32x4RelaxedMax, Operator::kNoProperties, 2, 0, 1) \
V(F64x2RelaxedMin, Operator::kNoProperties, 2, 0, 1) \
V(F64x2RelaxedMax, Operator::kNoProperties, 2, 0, 1)
V(F64x2RelaxedMax, Operator::kNoProperties, 2, 0, 1) \
V(I32x4RelaxedTruncF32x4S, Operator::kNoProperties, 1, 0, 1) \
V(I32x4RelaxedTruncF32x4U, Operator::kNoProperties, 1, 0, 1) \
V(I32x4RelaxedTruncF64x2SZero, Operator::kNoProperties, 1, 0, 1) \
V(I32x4RelaxedTruncF64x2UZero, Operator::kNoProperties, 1, 0, 1)
// The format is:
// V(Name, properties, value_input_count, control_input_count, output_count)
......
......@@ -923,6 +923,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4RelaxedMax();
const Operator* F64x2RelaxedMin();
const Operator* F64x2RelaxedMax();
const Operator* I32x4RelaxedTruncF32x4S();
const Operator* I32x4RelaxedTruncF32x4U();
const Operator* I32x4RelaxedTruncF64x2SZero();
const Operator* I32x4RelaxedTruncF64x2UZero();
// load [base + index]
const Operator* Load(LoadRepresentation rep);
......
This diff is collapsed.
......@@ -5158,6 +5158,18 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprF64x2RelaxedMax:
return graph()->NewNode(mcgraph()->machine()->F64x2RelaxedMax(),
inputs[0], inputs[1]);
case wasm::kExprI32x4RelaxedTruncF64x2SZero:
return graph()->NewNode(
mcgraph()->machine()->I32x4RelaxedTruncF64x2SZero(), inputs[0]);
case wasm::kExprI32x4RelaxedTruncF64x2UZero:
return graph()->NewNode(
mcgraph()->machine()->I32x4RelaxedTruncF64x2UZero(), inputs[0]);
case wasm::kExprI32x4RelaxedTruncF32x4S:
return graph()->NewNode(mcgraph()->machine()->I32x4RelaxedTruncF32x4S(),
inputs[0]);
case wasm::kExprI32x4RelaxedTruncF32x4U:
return graph()->NewNode(mcgraph()->machine()->I32x4RelaxedTruncF32x4U(),
inputs[0]);
default:
FATAL_UNSUPPORTED_OPCODE(opcode);
}
......
......@@ -374,6 +374,10 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I64x2_OP(RelaxedLaneSelect, "relaxed_laneselect");
CASE_SIMDF_OP(RelaxedMin, "relaxed_min");
CASE_SIMDF_OP(RelaxedMax, "relaxed_max");
CASE_I32x4_OP(RelaxedTruncF32x4S, "relaxed_trunc_f32x4_s");
CASE_I32x4_OP(RelaxedTruncF32x4U, "relaxed_trunc_f32x4_u");
CASE_I32x4_OP(RelaxedTruncF64x2SZero, "relaxed_trunc_f64x2_s_zero");
CASE_I32x4_OP(RelaxedTruncF64x2UZero, "relaxed_trunc_f64x2_u_zero");
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
......
......@@ -514,22 +514,26 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(F64x2ConvertLowI32x4S, 0xfdfe, s_s) \
V(F64x2ConvertLowI32x4U, 0xfdff, s_s)
#define FOREACH_RELAXED_SIMD_OPCODE(V) \
V(I8x16RelaxedSwizzle, 0xfda2, s_ss) \
V(I8x16RelaxedLaneSelect, 0xfdb2, s_sss) \
V(I16x8RelaxedLaneSelect, 0xfdb3, s_sss) \
V(I32x4RelaxedLaneSelect, 0xfdd2, s_sss) \
V(I64x2RelaxedLaneSelect, 0xfdd3, s_sss) \
V(F32x4RelaxedMin, 0xfdb4, s_ss) \
V(F32x4RelaxedMax, 0xfde2, s_ss) \
V(F64x2RelaxedMin, 0xfdd4, s_ss) \
V(F64x2RelaxedMax, 0xfdee, s_ss) \
V(F32x4Qfma, 0xfdaf, s_sss) \
V(F32x4Qfms, 0xfdb0, s_sss) \
V(F64x2Qfma, 0xfdcf, s_sss) \
V(F64x2Qfms, 0xfdd0, s_sss) \
V(F32x4RecipApprox, 0xfda5, s_s) \
V(F32x4RecipSqrtApprox, 0xfda6, s_s)
#define FOREACH_RELAXED_SIMD_OPCODE(V) \
V(I8x16RelaxedSwizzle, 0xfda2, s_ss) \
V(I8x16RelaxedLaneSelect, 0xfdb2, s_sss) \
V(I16x8RelaxedLaneSelect, 0xfdb3, s_sss) \
V(I32x4RelaxedLaneSelect, 0xfdd2, s_sss) \
V(I64x2RelaxedLaneSelect, 0xfdd3, s_sss) \
V(F32x4Qfma, 0xfdaf, s_sss) \
V(F32x4Qfms, 0xfdb0, s_sss) \
V(F64x2Qfma, 0xfdcf, s_sss) \
V(F64x2Qfms, 0xfdd0, s_sss) \
V(F32x4RelaxedMin, 0xfdb4, s_ss) \
V(F32x4RelaxedMax, 0xfde2, s_ss) \
V(F64x2RelaxedMin, 0xfdd4, s_ss) \
V(F64x2RelaxedMax, 0xfdee, s_ss) \
V(I32x4RelaxedTruncF32x4S, 0xfda5, s_s) \
V(I32x4RelaxedTruncF32x4U, 0xfda6, s_s) \
V(I32x4RelaxedTruncF64x2SZero, 0xfdc5, s_s) \
V(I32x4RelaxedTruncF64x2UZero, 0xfdc6, s_s) \
V(F32x4RecipApprox, 0xfdbb, s_s) \
V(F32x4RecipSqrtApprox, 0xfdc2, s_s)
#define FOREACH_SIMD_1_OPERAND_1_PARAM_OPCODE(V) \
V(I8x16ExtractLaneS, 0xfd15, _) \
......
......@@ -146,6 +146,7 @@ class ValueHelper {
797056.0f,
1.77219e+09f,
2147483648.0f, // INT32_MAX + 1
2147483904.0f, // INT32_MAX + 1 and significand = 1.
4294967296.0f, // UINT32_MAX + 1
1.51116e+11f,
4.18193e+13f,
......
......@@ -2,7 +2,10 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <type_traits>
#include "src/base/overflowing-math.h"
#include "src/base/safe_conversions.h"
#include "src/common/globals.h"
#include "src/wasm/compilation-environment.h"
#include "test/cctest/cctest.h"
......@@ -348,6 +351,62 @@ WASM_RELAXED_SIMD_TEST(F64x2RelaxedMin) {
WASM_RELAXED_SIMD_TEST(F64x2RelaxedMax) {
RunF64x2BinOpTest(execution_tier, kExprF64x2RelaxedMax, Maximum);
}
namespace {
// For relaxed trunc instructions, don't test out of range values.
// FloatType comes later so caller can rely on template argument deduction and
// just pass IntType.
template <typename IntType, typename FloatType>
typename std::enable_if<std::is_floating_point<FloatType>::value, bool>::type
ShouldSkipTestingConstant(FloatType x) {
return std::isnan(x) || !base::IsValueInRangeForNumericType<IntType>(x) ||
!PlatformCanRepresent(x);
}
template <typename IntType, typename FloatType>
void IntRelaxedTruncFloatTest(TestExecutionTier execution_tier,
WasmOpcode trunc_op, WasmOpcode splat_op) {
WasmRunner<int, FloatType> r(execution_tier);
IntType* g0 = r.builder().template AddGlobal<IntType>(kWasmS128);
constexpr int lanes = kSimd128Size / sizeof(FloatType);
// global[0] = trunc(splat(local[0])).
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_UNOP(trunc_op,
WASM_SIMD_UNOP(splat_op, WASM_LOCAL_GET(0)))),
WASM_ONE);
for (FloatType x : compiler::ValueHelper::GetVector<FloatType>()) {
if (ShouldSkipTestingConstant<IntType>(x)) continue;
CHECK_EQ(1, r.Call(x));
IntType expected = base::checked_cast<IntType>(x);
for (int i = 0; i < lanes; i++) {
CHECK_EQ(expected, LANE(g0, i));
}
}
}
} // namespace
WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF64x2SZero) {
IntRelaxedTruncFloatTest<int32_t, double>(
execution_tier, kExprI32x4RelaxedTruncF64x2SZero, kExprF64x2Splat);
}
WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF64x2UZero) {
IntRelaxedTruncFloatTest<uint32_t, double>(
execution_tier, kExprI32x4RelaxedTruncF64x2UZero, kExprF64x2Splat);
}
WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF32x4S) {
IntRelaxedTruncFloatTest<int32_t, float>(
execution_tier, kExprI32x4RelaxedTruncF32x4S, kExprF32x4Splat);
}
WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF32x4U) {
IntRelaxedTruncFloatTest<uint32_t, float>(
execution_tier, kExprI32x4RelaxedTruncF32x4U, kExprF32x4Splat);
}
#endif // V8_TARGET_ARCH_X64
#undef WASM_RELAXED_SIMD_TEST
......
......@@ -2664,16 +2664,14 @@ class WasmInterpreterInternals {
static_cast<float>(a))
CONVERT_CASE(F32x4UConvertI32x4, int4, i32x4, float4, 4, 0, uint32_t,
static_cast<float>(a))
CONVERT_CASE(I32x4SConvertF32x4, float4, f32x4, int4, 4, 0, double,
std::isnan(a) ? 0
: a<kMinInt ? kMinInt : a> kMaxInt
? kMaxInt
: static_cast<int32_t>(a))
CONVERT_CASE(I32x4UConvertF32x4, float4, f32x4, int4, 4, 0, double,
std::isnan(a)
? 0
: a<0 ? 0 : a> kMaxUInt32 ? kMaxUInt32
: static_cast<uint32_t>(a))
CONVERT_CASE(I32x4SConvertF32x4, float4, f32x4, int4, 4, 0, float,
base::saturated_cast<int32_t>(a))
CONVERT_CASE(I32x4UConvertF32x4, float4, f32x4, int4, 4, 0, float,
base::saturated_cast<uint32_t>(a))
CONVERT_CASE(I32x4RelaxedTruncF32x4S, float4, f32x4, int4, 4, 0, float,
base::saturated_cast<int32_t>(a))
CONVERT_CASE(I32x4RelaxedTruncF32x4U, float4, f32x4, int4, 4, 0, float,
base::saturated_cast<uint32_t>(a))
CONVERT_CASE(I64x2SConvertI32x4Low, int4, i32x4, int2, 2, 0, int32_t, a)
CONVERT_CASE(I64x2SConvertI32x4High, int4, i32x4, int2, 2, 2, int32_t,
a)
......@@ -2703,6 +2701,10 @@ class WasmInterpreterInternals {
base::saturated_cast<int32_t>(a))
CONVERT_CASE(I32x4TruncSatF64x2UZero, float2, f64x2, int4, 2, 0, double,
base::saturated_cast<uint32_t>(a))
CONVERT_CASE(I32x4RelaxedTruncF64x2SZero, float2, f64x2, int4, 2, 0,
double, base::saturated_cast<int32_t>(a))
CONVERT_CASE(I32x4RelaxedTruncF64x2UZero, float2, f64x2, int4, 2, 0,
double, base::saturated_cast<uint32_t>(a))
CONVERT_CASE(F32x4DemoteF64x2Zero, float2, f64x2, float4, 2, 0, float,
DoubleToFloat32(a))
CONVERT_CASE(F64x2PromoteLowF32x4, float4, f32x4, float2, 2, 0, float,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment