Commit c3f346b7 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-relaxed-simd][x64] Prototype relaxed min and max

Relaxed f32x4 and f64x2 min and max.

These instructions only guarantee results when the inputs are non nans,
and when the inputs are not 0s of opposite signs.

Reuse existing float binop testing harnesses and add special checks for
such constants when relaxed operations are being tested.

Drive-by rename of x64 instruction codes to be Minps/Maxps/Minpd/Maxpd
since they map down exactly to a single instruction.

Bug: v8:12284
Change-Id: I1449dbfa87935a96d7d260db22667ab7b9e86601
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3218196Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77484}
parent 24af42e8
......@@ -2360,6 +2360,14 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4RelaxedLaneSelect(node);
case IrOpcode::kI64x2RelaxedLaneSelect:
return MarkAsSimd128(node), VisitI64x2RelaxedLaneSelect(node);
case IrOpcode::kF32x4RelaxedMin:
return MarkAsSimd128(node), VisitF32x4RelaxedMin(node);
case IrOpcode::kF32x4RelaxedMax:
return MarkAsSimd128(node), VisitF32x4RelaxedMax(node);
case IrOpcode::kF64x2RelaxedMin:
return MarkAsSimd128(node), VisitF64x2RelaxedMin(node);
case IrOpcode::kF64x2RelaxedMax:
return MarkAsSimd128(node), VisitF64x2RelaxedMax(node);
default:
FATAL("Unexpected operator #%d:%s @ node #%d", node->opcode(),
node->op()->mnemonic(), node->id());
......@@ -2786,6 +2794,10 @@ void InstructionSelector::VisitI32x4RelaxedLaneSelect(Node* node) {
void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF32x4RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RelaxedMax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMax(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -2864,11 +2864,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kScratchDoubleReg);
break;
}
case kX64F32x4Pmin: {
case kX64Minps: {
ASSEMBLE_SIMD_BINOP(minps);
break;
}
case kX64F32x4Pmax: {
case kX64Maxps: {
ASSEMBLE_SIMD_BINOP(maxps);
break;
}
......@@ -2884,11 +2884,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Roundpd(i.OutputSimd128Register(), i.InputSimd128Register(0), mode);
break;
}
case kX64F64x2Pmin: {
case kX64Minpd: {
ASSEMBLE_SIMD_BINOP(minpd);
break;
}
case kX64F64x2Pmax: {
case kX64Maxpd: {
ASSEMBLE_SIMD_BINOP(maxpd);
break;
}
......
......@@ -192,8 +192,8 @@ namespace compiler {
V(X64F64x2Le) \
V(X64F64x2Qfma) \
V(X64F64x2Qfms) \
V(X64F64x2Pmin) \
V(X64F64x2Pmax) \
V(X64Minpd) \
V(X64Maxpd) \
V(X64F64x2Round) \
V(X64F64x2ConvertLowI32x4S) \
V(X64F64x2ConvertLowI32x4U) \
......@@ -219,8 +219,8 @@ namespace compiler {
V(X64F32x4Le) \
V(X64F32x4Qfma) \
V(X64F32x4Qfms) \
V(X64F32x4Pmin) \
V(X64F32x4Pmax) \
V(X64Minps) \
V(X64Maxps) \
V(X64F32x4Round) \
V(X64F32x4DemoteF64x2Zero) \
V(X64I64x2Splat) \
......
......@@ -144,8 +144,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Le:
case kX64F64x2Qfma:
case kX64F64x2Qfms:
case kX64F64x2Pmin:
case kX64F64x2Pmax:
case kX64Minpd:
case kX64Maxpd:
case kX64F64x2Round:
case kX64F64x2ConvertLowI32x4S:
case kX64F64x2ConvertLowI32x4U:
......@@ -172,8 +172,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Le:
case kX64F32x4Qfma:
case kX64F32x4Qfms:
case kX64F32x4Pmin:
case kX64F32x4Pmax:
case kX64Minps:
case kX64Maxps:
case kX64F32x4Round:
case kX64F32x4DemoteF64x2Zero:
case kX64I64x2Splat:
......
......@@ -3756,33 +3756,55 @@ void InstructionSelector::VisitI64x2RelaxedLaneSelect(Node* node) {
#endif // V8_ENABLE_WEBASSEMBLY
namespace {
void VisitPminOrPmax(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
// Due to the way minps/minpd work, we want the dst to be same as the second
// input: b = pmin(a, b) directly maps to minps b a.
// Used for pmin/pmax and relaxed min/max.
void VisitMinOrMax(InstructionSelector* selector, Node* node, ArchOpcode opcode,
bool flip_inputs) {
X64OperandGenerator g(selector);
InstructionOperand dst = selector->IsSupported(AVX)
? g.DefineAsRegister(node)
: g.DefineSameAsFirst(node);
selector->Emit(opcode, dst, g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)));
if (flip_inputs) {
// Due to the way minps/minpd work, we want the dst to be same as the second
// input: b = pmin(a, b) directly maps to minps b a.
selector->Emit(opcode, dst, g.UseRegister(node->InputAt(1)),
g.UseRegister(node->InputAt(0)));
} else {
selector->Emit(opcode, dst, g.UseRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)));
}
}
} // namespace
void InstructionSelector::VisitF32x4Pmin(Node* node) {
VisitPminOrPmax(this, node, kX64F32x4Pmin);
VisitMinOrMax(this, node, kX64Minps, true);
}
void InstructionSelector::VisitF32x4Pmax(Node* node) {
VisitPminOrPmax(this, node, kX64F32x4Pmax);
VisitMinOrMax(this, node, kX64Maxps, true);
}
void InstructionSelector::VisitF64x2Pmin(Node* node) {
VisitPminOrPmax(this, node, kX64F64x2Pmin);
VisitMinOrMax(this, node, kX64Minpd, true);
}
void InstructionSelector::VisitF64x2Pmax(Node* node) {
VisitPminOrPmax(this, node, kX64F64x2Pmax);
VisitMinOrMax(this, node, kX64Maxpd, true);
}
void InstructionSelector::VisitF32x4RelaxedMin(Node* node) {
VisitMinOrMax(this, node, kX64Minps, false);
}
void InstructionSelector::VisitF32x4RelaxedMax(Node* node) {
VisitMinOrMax(this, node, kX64Maxps, false);
}
void InstructionSelector::VisitF64x2RelaxedMin(Node* node) {
VisitMinOrMax(this, node, kX64Minpd, false);
}
void InstructionSelector::VisitF64x2RelaxedMax(Node* node) {
VisitMinOrMax(this, node, kX64Maxpd, false);
}
void InstructionSelector::VisitI32x4ExtAddPairwiseI16x8S(Node* node) {
......
......@@ -601,7 +601,11 @@ std::ostream& operator<<(std::ostream& os, TruncateKind kind) {
V(I8x16RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1) \
V(I16x8RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1) \
V(I32x4RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1) \
V(I64x2RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1)
V(I64x2RelaxedLaneSelect, Operator::kNoProperties, 3, 0, 1) \
V(F32x4RelaxedMin, Operator::kNoProperties, 2, 0, 1) \
V(F32x4RelaxedMax, Operator::kNoProperties, 2, 0, 1) \
V(F64x2RelaxedMin, Operator::kNoProperties, 2, 0, 1) \
V(F64x2RelaxedMax, Operator::kNoProperties, 2, 0, 1)
// The format is:
// V(Name, properties, value_input_count, control_input_count, output_count)
......
......@@ -919,6 +919,10 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I16x8RelaxedLaneSelect();
const Operator* I32x4RelaxedLaneSelect();
const Operator* I64x2RelaxedLaneSelect();
const Operator* F32x4RelaxedMin();
const Operator* F32x4RelaxedMax();
const Operator* F64x2RelaxedMin();
const Operator* F64x2RelaxedMax();
// load [base + index]
const Operator* Load(LoadRepresentation rep);
......
......@@ -987,6 +987,10 @@
V(I16x8RelaxedLaneSelect) \
V(I32x4RelaxedLaneSelect) \
V(I64x2RelaxedLaneSelect) \
V(F32x4RelaxedMin) \
V(F32x4RelaxedMax) \
V(F64x2RelaxedMin) \
V(F64x2RelaxedMax) \
V(I8x16Shuffle) \
V(V128AnyTrue) \
V(I64x2AllTrue) \
......
......@@ -5128,6 +5128,18 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI64x2RelaxedLaneSelect:
return graph()->NewNode(mcgraph()->machine()->I64x2RelaxedLaneSelect(),
inputs[0], inputs[1], inputs[2]);
case wasm::kExprF32x4RelaxedMin:
return graph()->NewNode(mcgraph()->machine()->F32x4RelaxedMin(),
inputs[0], inputs[1]);
case wasm::kExprF32x4RelaxedMax:
return graph()->NewNode(mcgraph()->machine()->F32x4RelaxedMax(),
inputs[0], inputs[1]);
case wasm::kExprF64x2RelaxedMin:
return graph()->NewNode(mcgraph()->machine()->F64x2RelaxedMin(),
inputs[0], inputs[1]);
case wasm::kExprF64x2RelaxedMax:
return graph()->NewNode(mcgraph()->machine()->F64x2RelaxedMax(),
inputs[0], inputs[1]);
default:
FATAL_UNSUPPORTED_OPCODE(opcode);
}
......
......@@ -372,6 +372,8 @@ constexpr const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_I16x8_OP(RelaxedLaneSelect, "relaxed_laneselect");
CASE_I32x4_OP(RelaxedLaneSelect, "relaxed_laneselect");
CASE_I64x2_OP(RelaxedLaneSelect, "relaxed_laneselect");
CASE_SIMDF_OP(RelaxedMin, "relaxed_min");
CASE_SIMDF_OP(RelaxedMax, "relaxed_max");
// Atomic operations.
CASE_OP(AtomicNotify, "atomic.notify")
......
......@@ -520,6 +520,10 @@ bool V8_EXPORT_PRIVATE IsJSCompatibleSignature(const FunctionSig* sig,
V(I16x8RelaxedLaneSelect, 0xfdb3, s_sss) \
V(I32x4RelaxedLaneSelect, 0xfdd2, s_sss) \
V(I64x2RelaxedLaneSelect, 0xfdd3, s_sss) \
V(F32x4RelaxedMin, 0xfdb4, s_ss) \
V(F32x4RelaxedMax, 0xfde2, s_ss) \
V(F64x2RelaxedMin, 0xfdd4, s_ss) \
V(F64x2RelaxedMax, 0xfdee, s_ss) \
V(F32x4Qfma, 0xfdaf, s_sss) \
V(F32x4Qfms, 0xfdb0, s_sss) \
V(F64x2Qfma, 0xfdcf, s_sss) \
......
......@@ -332,6 +332,22 @@ WASM_RELAXED_SIMD_TEST(I64x2RelaxedLaneSelect) {
RelaxedLaneSelectTest<uint64_t, kElems>(execution_tier, v1, v2, s, expected,
kExprI64x2RelaxedLaneSelect);
}
WASM_RELAXED_SIMD_TEST(F32x4RelaxedMin) {
RunF32x4BinOpTest(execution_tier, kExprF32x4RelaxedMin, Minimum);
}
WASM_RELAXED_SIMD_TEST(F32x4RelaxedMax) {
RunF32x4BinOpTest(execution_tier, kExprF32x4RelaxedMax, Maximum);
}
WASM_RELAXED_SIMD_TEST(F64x2RelaxedMin) {
RunF64x2BinOpTest(execution_tier, kExprF64x2RelaxedMin, Minimum);
}
WASM_RELAXED_SIMD_TEST(F64x2RelaxedMax) {
RunF64x2BinOpTest(execution_tier, kExprF64x2RelaxedMax, Maximum);
}
#endif // V8_TARGET_ARCH_X64
#undef WASM_RELAXED_SIMD_TEST
......
......@@ -85,16 +85,6 @@ T Mul(T a, T b) {
return a * b;
}
template <typename T>
T Minimum(T a, T b) {
return std::min(a, b);
}
template <typename T>
T Maximum(T a, T b) {
return std::max(a, b);
}
template <typename T>
T UnsignedMinimum(T a, T b) {
using UnsignedT = typename std::make_unsigned<T>::type;
......
......@@ -5,12 +5,14 @@
#include "test/cctest/wasm/wasm-simd-utils.h"
#include <cmath>
#include <type_traits>
#include "src/base/logging.h"
#include "src/base/memory.h"
#include "src/common/globals.h"
#include "src/wasm/compilation-environment.h"
#include "src/wasm/value-type.h"
#include "src/wasm/wasm-opcodes-inl.h"
#include "src/wasm/wasm-opcodes.h"
#include "test/cctest/compiler/c-signature.h"
#include "test/cctest/compiler/value-helper.h"
......@@ -481,6 +483,21 @@ void RunF32x4UnOpTest(TestExecutionTier execution_tier, WasmOpcode opcode,
}
}
namespace {
// Relaxed-simd operations are deterministic only for some range of values.
// Exclude those from being tested. Currently this is only used for f32x4, f64x2
// relaxed min and max.
template <typename T>
typename std::enable_if<std::is_floating_point<T>::value, bool>::type
ShouldSkipTestingConstants(WasmOpcode opcode, T lhs, T rhs) {
bool has_nan = std::isnan(lhs) || std::isnan(rhs);
bool zeroes_of_opposite_signs =
(lhs == 0 && rhs == 0 && (std::signbit(lhs) != std::signbit(rhs)));
return WasmOpcodes::IsRelaxedSimdOpcode(opcode) &&
(has_nan || zeroes_of_opposite_signs);
}
} // namespace
void RunF32x4BinOpTest(TestExecutionTier execution_tier, WasmOpcode opcode,
FloatBinOp expected_op) {
WasmRunner<int32_t, float, float> r(execution_tier);
......@@ -500,6 +517,7 @@ void RunF32x4BinOpTest(TestExecutionTier execution_tier, WasmOpcode opcode,
if (!PlatformCanRepresent(x)) continue;
FOR_FLOAT32_INPUTS(y) {
if (!PlatformCanRepresent(y)) continue;
if (ShouldSkipTestingConstants(opcode, x, y)) continue;
float expected = expected_op(x, y);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x, y);
......@@ -516,6 +534,7 @@ void RunF32x4BinOpTest(TestExecutionTier execution_tier, WasmOpcode opcode,
FOR_FLOAT32_NAN_INPUTS(j) {
float y = bit_cast<float>(nan_test_array[j]);
if (!PlatformCanRepresent(y)) continue;
if (ShouldSkipTestingConstants(opcode, x, y)) continue;
float expected = expected_op(x, y);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x, y);
......@@ -664,6 +683,7 @@ void RunF64x2BinOpTest(TestExecutionTier execution_tier, WasmOpcode opcode,
if (!PlatformCanRepresent(x)) continue;
FOR_FLOAT64_INPUTS(y) {
if (!PlatformCanRepresent(x)) continue;
if (ShouldSkipTestingConstants(opcode, x, y)) continue;
double expected = expected_op(x, y);
if (!PlatformCanRepresent(expected)) continue;
r.Call(x, y);
......@@ -681,6 +701,7 @@ void RunF64x2BinOpTest(TestExecutionTier execution_tier, WasmOpcode opcode,
double y = bit_cast<double>(double_nan_test_array[j]);
double expected = expected_op(x, y);
if (!PlatformCanRepresent(expected)) continue;
if (ShouldSkipTestingConstants(opcode, x, y)) continue;
r.Call(x, y);
for (int i = 0; i < 2; i++) {
double actual = LANE(g, i);
......
......@@ -81,6 +81,16 @@ T Negate(T a) {
return -a;
}
template <typename T>
T Minimum(T a, T b) {
return std::min(a, b);
}
template <typename T>
T Maximum(T a, T b) {
return std::max(a, b);
}
#if V8_OS_AIX
template <typename T>
bool MightReverseSign(T float_op) {
......
......@@ -2338,6 +2338,10 @@ class WasmInterpreterInternals {
BINOP_CASE(F64x2Max, f64x2, float2, 2, JSMax(a, b))
BINOP_CASE(F64x2Pmin, f64x2, float2, 2, std::min(a, b))
BINOP_CASE(F64x2Pmax, f64x2, float2, 2, std::max(a, b))
BINOP_CASE(F32x4RelaxedMin, f32x4, float4, 4, std::min(a, b))
BINOP_CASE(F32x4RelaxedMax, f32x4, float4, 4, std::max(a, b))
BINOP_CASE(F64x2RelaxedMin, f64x2, float2, 2, std::min(a, b))
BINOP_CASE(F64x2RelaxedMax, f64x2, float2, 2, std::max(a, b))
BINOP_CASE(F32x4Add, f32x4, float4, 4, a + b)
BINOP_CASE(F32x4Sub, f32x4, float4, 4, a - b)
BINOP_CASE(F32x4Mul, f32x4, float4, 4, a * b)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment