Commit 4f1dbc72 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-relaxed-simd][ia32] Prototype relaxed int-float trunc

4 instructions, int32x4.trunc_f32x4_{s,u},
int32x4.trunc_f64x2_{s,u}_zero.

ia32 port of a7b208739d2dabe11ca4e792085aed3455e879d5.

Bug: v8:12284
Change-Id: Ia94ff572b63f9fd8e3bfe2dd8e5fa41212f84a76
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3255661Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77807}
parent ef1640b8
...@@ -3440,6 +3440,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3440,6 +3440,25 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1), i.InputSimd128Register(2)); i.InputSimd128Register(1), i.InputSimd128Register(2));
break; break;
} }
case kIA32I32x4TruncF64x2UZero: {
__ I32x4TruncF64x2UZero(i.OutputSimd128Register(),
i.InputSimd128Register(0), i.TempRegister(0),
kScratchDoubleReg);
break;
}
case kIA32I32x4TruncF32x4U: {
__ I32x4TruncF32x4U(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.TempRegister(0), kScratchDoubleReg);
break;
}
case kIA32Cvttps2dq: {
__ Cvttps2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32Cvttpd2dq: {
__ Cvttpd2dq(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kIA32Word32AtomicPairLoad: { case kIA32Word32AtomicPairLoad: {
__ movq(kScratchDoubleReg, i.MemoryOperand()); __ movq(kScratchDoubleReg, i.MemoryOperand());
__ Pextrd(i.OutputRegister(0), kScratchDoubleReg, 0); __ Pextrd(i.OutputRegister(0), kScratchDoubleReg, 0);
......
...@@ -106,6 +106,10 @@ namespace compiler { ...@@ -106,6 +106,10 @@ namespace compiler {
V(IA32Push) \ V(IA32Push) \
V(IA32Poke) \ V(IA32Poke) \
V(IA32Peek) \ V(IA32Peek) \
V(IA32Cvttps2dq) \
V(IA32Cvttpd2dq) \
V(IA32I32x4TruncF32x4U) \
V(IA32I32x4TruncF64x2UZero) \
V(IA32F64x2Splat) \ V(IA32F64x2Splat) \
V(F64x2ExtractLane) \ V(F64x2ExtractLane) \
V(F64x2ReplaceLane) \ V(F64x2ReplaceLane) \
......
...@@ -90,6 +90,10 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -90,6 +90,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32BitcastFI: case kIA32BitcastFI:
case kIA32BitcastIF: case kIA32BitcastIF:
case kIA32Pblendvb: case kIA32Pblendvb:
case kIA32Cvttps2dq:
case kIA32Cvttpd2dq:
case kIA32I32x4TruncF32x4U:
case kIA32I32x4TruncF64x2UZero:
case kIA32F64x2Splat: case kIA32F64x2Splat:
case kF64x2ExtractLane: case kF64x2ExtractLane:
case kF64x2ReplaceLane: case kF64x2ReplaceLane:
......
...@@ -330,10 +330,14 @@ void VisitRROFloat(InstructionSelector* selector, Node* node, ...@@ -330,10 +330,14 @@ void VisitRROFloat(InstructionSelector* selector, Node* node,
} }
} }
// For float unary operations. Also allocates a temporary general register for
// used in external operands. If a temp is not required, use VisitRRSimd (since
// float and SIMD registers are the same on IA32.
void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input, void VisitFloatUnop(InstructionSelector* selector, Node* node, Node* input,
ArchOpcode opcode) { ArchOpcode opcode) {
IA32OperandGenerator g(selector); IA32OperandGenerator g(selector);
InstructionOperand temps[] = {g.TempRegister()}; InstructionOperand temps[] = {g.TempRegister()};
// No need for unique because inputs are float but temp is general.
if (selector->IsSupported(AVX)) { if (selector->IsSupported(AVX)) {
selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input), selector->Emit(opcode, g.DefineAsRegister(node), g.UseRegister(input),
arraysize(temps), temps); arraysize(temps), temps);
...@@ -3158,6 +3162,22 @@ void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) { ...@@ -3158,6 +3162,22 @@ void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
arraysize(temps), temps); arraysize(temps), temps);
} }
void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
VisitRRSimd(this, node, kIA32Cvttpd2dq);
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kIA32I32x4TruncF64x2UZero);
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
VisitRRSimd(this, node, kIA32Cvttps2dq);
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
VisitFloatUnop(this, node, node->InputAt(0), kIA32I32x4TruncF32x4U);
}
void InstructionSelector::VisitI64x2GtS(Node* node) { void InstructionSelector::VisitI64x2GtS(Node* node) {
IA32OperandGenerator g(this); IA32OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
......
...@@ -2789,21 +2789,6 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); } ...@@ -2789,21 +2789,6 @@ void InstructionSelector::VisitF32x4Qfms(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM64 #endif // !V8_TARGET_ARCH_ARM64
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && !V8_TARGET_ARCH_PPC64
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) { void InstructionSelector::VisitI8x16RelaxedLaneSelect(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
...@@ -2821,6 +2806,18 @@ void InstructionSelector::VisitF32x4RelaxedMin(Node* node) { UNIMPLEMENTED(); } ...@@ -2821,6 +2806,18 @@ void InstructionSelector::VisitF32x4RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4RelaxedMax(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4RelaxedMax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMin(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2RelaxedMin(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2RelaxedMax(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2RelaxedMax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI32x4RelaxedTruncF64x2SZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF64x2UZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4RelaxedTruncF32x4U(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); } void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
......
...@@ -329,30 +329,6 @@ WASM_RELAXED_SIMD_TEST(F64x2RelaxedMin) { ...@@ -329,30 +329,6 @@ WASM_RELAXED_SIMD_TEST(F64x2RelaxedMin) {
WASM_RELAXED_SIMD_TEST(F64x2RelaxedMax) { WASM_RELAXED_SIMD_TEST(F64x2RelaxedMax) {
RunF64x2BinOpTest(execution_tier, kExprF64x2RelaxedMax, Maximum); RunF64x2BinOpTest(execution_tier, kExprF64x2RelaxedMax, Maximum);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
// Output is only defined for indices in the range [0,15].
WasmRunner<int32_t> r(execution_tier);
static const int kElems = kSimd128Size / sizeof(uint8_t);
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
WASM_GLOBAL_GET(2))),
WASM_ONE);
for (int i = 0; i < kElems; i++) {
LANE(src, i) = kElems - i - 1;
LANE(indices, i) = kElems - i - 1;
}
CHECK_EQ(1, r.Call());
for (int i = 0; i < kElems; i++) {
CHECK_EQ(LANE(dst, i), i);
}
}
namespace { namespace {
// For relaxed trunc instructions, don't test out of range values. // For relaxed trunc instructions, don't test out of range values.
...@@ -409,6 +385,30 @@ WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF32x4U) { ...@@ -409,6 +385,30 @@ WASM_RELAXED_SIMD_TEST(I32x4RelaxedTruncF32x4U) {
IntRelaxedTruncFloatTest<uint32_t, float>( IntRelaxedTruncFloatTest<uint32_t, float>(
execution_tier, kExprI32x4RelaxedTruncF32x4U, kExprF32x4Splat); execution_tier, kExprI32x4RelaxedTruncF32x4U, kExprF32x4Splat);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64
WASM_RELAXED_SIMD_TEST(I8x16RelaxedSwizzle) {
// Output is only defined for indices in the range [0,15].
WasmRunner<int32_t> r(execution_tier);
static const int kElems = kSimd128Size / sizeof(uint8_t);
uint8_t* dst = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* src = r.builder().AddGlobal<uint8_t>(kWasmS128);
uint8_t* indices = r.builder().AddGlobal<uint8_t>(kWasmS128);
BUILD(r,
WASM_GLOBAL_SET(
0, WASM_SIMD_BINOP(kExprI8x16RelaxedSwizzle, WASM_GLOBAL_GET(1),
WASM_GLOBAL_GET(2))),
WASM_ONE);
for (int i = 0; i < kElems; i++) {
LANE(src, i) = kElems - i - 1;
LANE(indices, i) = kElems - i - 1;
}
CHECK_EQ(1, r.Call());
for (int i = 0; i < kElems; i++) {
CHECK_EQ(LANE(dst, i), i);
}
}
#endif // V8_TARGET_ARCH_X64 #endif // V8_TARGET_ARCH_X64
#undef WASM_RELAXED_SIMD_TEST #undef WASM_RELAXED_SIMD_TEST
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment