Commit 4f1dbc72 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-relaxed-simd][ia32] Prototype relaxed int-float trunc

4 instructions, int32x4.trunc_f32x4_{s,u},
int32x4.trunc_f64x2_{s,u}_zero.

ia32 port of a7b208739d2dabe11ca4e792085aed3455e879d5.

Bug: v8:12284
Change-Id: Ia94ff572b63f9fd8e3bfe2dd8e5fa41212f84a76
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3255661Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77807}
parent ef1640b8
......@@ -3440,6 +3440,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1), i.InputSimd128Register(2));
break;
}
case kIA32I32x4TruncF64x2UZero: {
__ I32x4TruncF64x2UZero(i.OutputSimd128Register(),
i.InputSimd128Register(0), i.TempRegister(0),
kScratchDoubleReg);
break;
}
case kIA32I32x4TruncF32x4U: {
__ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.TempRegister(0), kScratchDoubleReg);
break;
}
case kIA32Cvttps2dq: {
__ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32Cvttpd2dq: {
__ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32Word32AtomicPairLoad: {
__ movq(kScratchDoubleReg, i.MemoryOperand());
__ Pextrd(i.OutputRegister(0), kScratchDoubleReg, 0);
......
......@@ -106,6 +106,10 @@ namespace compiler {
V(IA32Push) \
V(IA32Poke) \
V(IA32Peek) \
V(IA32Cvttps2dq) \
V(IA32Cvttpd2dq) \
V(IA32I32x4TruncF32x4U) \
V(IA32I32x4TruncF64x2UZero) \
V(IA32F64x2Splat) \
V(F64x2ExtractLane) \
V(F64x2ReplaceLane) \
......
......@@ -90,6 +90,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32BitcastFI:
case kIA32BitcastIF:
case kIA32Pblendvb:
case kIA32Cvttps2dq:
case kIA32Cvttpd2dq:
case kIA32I32x4TruncF32x4U:
case kIA32I32x4TruncF64x2UZero:
case kIA32F64x2Splat:
case kF64x2ExtractLane:
case kF64x2ReplaceLane:
......
......@@ -330,10 +330,14 @@ void VisitRROFloat(InstructionSelector* selector, Node* node,
}
}
// For float unary operations. Also allocates a temporary general register for
// used in external operands. If a temp is not required, use VisitRRSimd (since
// float and SIMD registers are the same on IA32.
void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
ArchOpcode opcode) {
IA32OperandGenerator g(selector);
InstructionOperand temps[] = {g.TempRegister()};
// No need for unique because inputs are float but temp is general.
if (selector->IsSupported(AVX)) {
selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input),
arraysize(temps), temps);
......@@ -3158,6 +3162,22 @@ void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
arraysize(temps), temps);
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
VisitRRSimd(this, node, kIA32Cvttpd2dq);
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kIA32I32x4TruncF64x2UZero);
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
VisitRRSimd(this, node, kIA32Cvttps2dq);
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kIA32I32x4TruncF32x4U);
}
void InstructionSelector::VisitI64x2GtS(Node* node) {
IA32OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {
......
......@@ -2789,21 +2789,6 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED();
......@@ -2821,6 +2806,18 @@ void InstructionSelector::VisitF32x4RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RelaxedMax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
......@@ -329,30 +329,6 @@ WASM_RELAXED_SIMD_TEST(F64x2RelaxedMin) {
WASM_RELAXED_SIMD_TEST(F64x2RelaxedMax) {
RunF64x2BinOpTest(execution_tier, kExprF64x2RelaxedMax, Maximum);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
// Output is only defined for indices in the range [0,15].
WasmRunner<int32_t> r(execution_tier);
static const int kElems = kSimd128Size / sizeof(uint8_t);
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
WASM_GLOBAL_GET(2))),
WASM_ONE);
for (int i = 0; i < kElems; i++) {
LANE(src, i) = kElems - i - 1;
LANE(indices, i) = kElems - i - 1;
}
CHECK_EQ(1, r.Call());
for (int i = 0; i < kElems; i++) {
CHECK_EQ(LANE(dst, i), i);
}
}
namespace {
// For relaxed trunc instructions, don't test out of range values.
......@@ -409,6 +385,30 @@ WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF32x4U) {
IntRelaxedTruncFloatTest<uint32_t, float>(
execution_tier, kExprI32x4RelaxedTruncF32x4U, kExprF32x4Splat);
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
// Output is only defined for indices in the range [0,15].
WasmRunner<int32_t> r(execution_tier);
static const int kElems = kSimd128Size / sizeof(uint8_t);
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
WASM_GLOBAL_GET(2))),
WASM_ONE);
for (int i = 0; i < kElems; i++) {
LANE(src, i) = kElems - i - 1;
LANE(indices, i) = kElems - i - 1;
}
CHECK_EQ(1, r.Call());
for (int i = 0; i < kElems; i++) {
CHECK_EQ(LANE(dst, i), i);
}
}
#endif // V8_TARGET_ARCH_X64
#undef WASM_RELAXED_SIMD_TEST
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment