Commit 2cd26485 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm] Prototype f32x4.trunc

Prototype f32x4.trunc on ARM for both ARM v7 and ARM v8. ARM v8 has
support for vrintz, and for ARM v7 we fallback to runtime.

Since ARM v8 uses vrintz, which is the same instruction used for F32
trunc (scalar), wasm-compiler reuses the Float32RoundTruncate check.

Bug: v8:10553
Change-Id: I65ddc36ccff21f8f0ff21a6e768184c084ffcfea
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2256770
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68498}
parent 9394cef2
......@@ -3892,7 +3892,7 @@ void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
emit(EncodeNeonVCVT(U32, dst, F32, src));
}
enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF, VRINTM, VRINTP };
enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF, VRINTM, VRINTP, VRINTZ };
static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
int dst_code, int src_code) {
......@@ -3926,6 +3926,9 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
case VRINTP:
op_encoding = B17 | 0xF * B7;
break;
case VRINTZ:
op_encoding = B17 | 0xB * B7;
break;
default:
UNREACHABLE();
}
......@@ -4584,7 +4587,7 @@ void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src) {
// SIMD vector round floating-point to integer towards -Infinity.
// See ARM DDI 0487F.b, F6-5493
// See ARM DDI 0487F.b, F6-5493.
DCHECK(IsEnabled(ARMv8));
emit(EncodeNeonUnaryOp(VRINTM, NEON_Q, NeonSize(dt), dst.code(), src.code()));
}
......@@ -4592,11 +4595,19 @@ void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst,
void Assembler::vrintp(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src) {
// SIMD vector round floating-point to integer towards +Infinity.
// See ARM DDI 0487F.b, F6-5501
// See ARM DDI 0487F.b, F6-5501.
DCHECK(IsEnabled(ARMv8));
emit(EncodeNeonUnaryOp(VRINTP, NEON_Q, NeonSize(dt), dst.code(), src.code()));
}
void Assembler::vrintz(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src) {
// SIMD vector round floating-point to integer towards Zero.
// See ARM DDI 0487F.b, F6-5511.
DCHECK(IsEnabled(ARMv8));
emit(EncodeNeonUnaryOp(VRINTZ, NEON_Q, NeonSize(dt), dst.code(), src.code()));
}
void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
QwNeonRegister src2) {
DCHECK(IsEnabled(NEON));
......
......@@ -914,6 +914,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
const QwNeonRegister src);
void vrintp(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src);
void vrintz(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src);
void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src, int shift);
void vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
......
......@@ -299,6 +299,7 @@ FUNCTION_REFERENCE(wasm_word64_rol, wasm::word64_rol_wrapper)
FUNCTION_REFERENCE(wasm_word64_ror, wasm::word64_ror_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_ceil, wasm::f32x4_ceil_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_floor, wasm::f32x4_floor_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_trunc, wasm::f32x4_trunc_wrapper)
FUNCTION_REFERENCE(wasm_memory_init, wasm::memory_init_wrapper)
FUNCTION_REFERENCE(wasm_memory_copy, wasm::memory_copy_wrapper)
FUNCTION_REFERENCE(wasm_memory_fill, wasm::memory_fill_wrapper)
......
......@@ -208,6 +208,7 @@ class StatsCounter;
V(wasm_word64_popcnt, "wasm::word64_popcnt") \
V(wasm_f32x4_ceil, "wasm::f32x4_ceil_wrapper") \
V(wasm_f32x4_floor, "wasm::f32x4_floor_wrapper") \
V(wasm_f32x4_trunc, "wasm::f32x4_trunc_wrapper") \
V(wasm_memory_init, "wasm::memory_init") \
V(wasm_memory_copy, "wasm::memory_copy") \
V(wasm_memory_fill, "wasm::memory_fill") \
......
......@@ -1486,7 +1486,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintzF32: {
CpuFeatureScope scope(tasm(), ARMv8);
__ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
if (instr->InputAt(0)->IsSimd128Register()) {
__ vrintz(NeonS32, i.OutputSimd128Register(),
i.InputSimd128Register(0));
} else {
__ vrintz(i.OutputFloatRegister(), i.InputFloatRegister(0));
}
break;
}
case kArmVrintzF64: {
......
......@@ -1497,7 +1497,8 @@ void InstructionSelector::VisitUint32Mod(Node* node) {
V(Float32RoundTiesEven, kArmVrintnF32) \
V(Float64RoundTiesEven, kArmVrintnF64) \
V(F32x4Ceil, kArmVrintpF32) \
V(F32x4Floor, kArmVrintmF32)
V(F32x4Floor, kArmVrintmF32) \
V(F32x4Trunc, kArmVrintzF32)
#define RRR_OP_LIST(V) \
V(Int32MulHigh, kArmSmmul) \
......
......@@ -2695,12 +2695,12 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
#if !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
// && !V8_TARGET_ARCH_IA32
......
......@@ -4052,6 +4052,12 @@ Node* WasmGraphBuilder::BuildF32x4Floor(Node* input) {
return BuildCFuncInstruction(ref, type, input);
}
Node* WasmGraphBuilder::BuildF32x4Trunc(Node* input) {
MachineType type = MachineType::Simd128();
ExternalReference ref = ExternalReference::wasm_f32x4_trunc();
return BuildCFuncInstruction(ref, type, input);
}
void WasmGraphBuilder::PrintDebugName(Node* node) {
PrintF("#%d:%s", node->id(), node->op()->mnemonic());
}
......@@ -4303,6 +4309,10 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return BuildF32x4Floor(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F32x4Floor(), inputs[0]);
case wasm::kExprF32x4Trunc:
// Architecture support for F32x4Trunc and Float32RoundTruncate is the
// same.
if (!mcgraph()->machine()->Float32RoundTruncate().IsSupported())
return BuildF32x4Trunc(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F32x4Trunc(), inputs[0]);
case wasm::kExprF32x4NearestInt:
return graph()->NewNode(mcgraph()->machine()->F32x4NearestInt(),
......
......@@ -557,6 +557,7 @@ class WasmGraphBuilder {
// Wasm SIMD.
Node* BuildF32x4Ceil(Node* input);
Node* BuildF32x4Floor(Node* input);
Node* BuildF32x4Trunc(Node* input);
void BuildEncodeException32BitValue(Node* values_array, uint32_t* index,
Node* value);
......
......@@ -2275,10 +2275,17 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.%c%i d%d, q%d", name,
type, size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bit(10) == 1) {
// NEON vrintm, vrintp
// NEON vrintm, vrintp, vrintz
bool dp_op = instr->Bit(6) == 0;
int rounding_mode = instr->Bits(9, 7);
switch (rounding_mode) {
case 3:
if (dp_op) {
Format(instr, "vrintz.f32 'Dd, 'Dm");
} else {
Format(instr, "vrintz.f32 'Qd, 'Qm");
}
break;
case 5:
if (dp_op) {
Format(instr, "vrintm.f32 'Dd, 'Dm");
......
......@@ -5375,7 +5375,8 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
} else {
UNIMPLEMENTED();
}
} else if (instr->Bits(19, 18) == 0x2 && instr->Bits(11, 8) == 0x5) {
} else if (instr->Bits(19, 18) == 0x2 && instr->Bits(17, 16) == 0x3 &&
instr->Bits(11, 8) == 0x5) {
// vrecpe/vrsqrte.f32 Qd, Qm.
int Vd = instr->VFPDRegValue(kSimd128Precision);
int Vm = instr->VFPMRegValue(kSimd128Precision);
......@@ -5450,6 +5451,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int rounding_mode = instr->Bits(9, 7);
float (*fproundint)(float) = nullptr;
switch (rounding_mode) {
case 3:
fproundint = &truncf;
break;
case 5:
fproundint = &floorf;
break;
......
......@@ -419,6 +419,10 @@ void f32x4_floor_wrapper(Address data) {
simd_float_round_wrapper<float, &floorf>(data);
}
void f32x4_trunc_wrapper(Address data) {
simd_float_round_wrapper<float, &truncf>(data);
}
namespace {
class ThreadNotInWasmScope {
// Asan on Windows triggers exceptions to allocate shadow memory lazily. When
......
......@@ -83,6 +83,8 @@ V8_EXPORT_PRIVATE void f32x4_ceil_wrapper(Address data);
V8_EXPORT_PRIVATE void f32x4_floor_wrapper(Address data);
V8_EXPORT_PRIVATE void f32x4_trunc_wrapper(Address data);
// The return type is {int32_t} instead of {bool} to enforce the compiler to
// zero-extend the result in the return register.
int32_t memory_init_wrapper(Address data);
......
......@@ -920,6 +920,7 @@ TEST(ARMv8_vrintX_disasm) {
// Advanced SIMD
COMPARE(vrintm(NeonS32, q0, q3), "f3ba06c6 vrintm.f32 q0, q3");
COMPARE(vrintp(NeonS32, q0, q3), "f3ba07c6 vrintp.f32 q0, q3");
COMPARE(vrintz(NeonS32, q0, q3), "f3ba05c6 vrintz.f32 q0, q3");
}
VERIFY_RUN();
......
......@@ -703,14 +703,14 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Floor) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Floor, floorf, true);
}
// TODO(zhin): Temporary convoluted way to exclude running these tests on ARM as
// we are implementing each opcode one at a time.
#if !V8_TARGET_ARCH_ARM
WASM_SIMD_TEST_NO_LOWERING(F32x4Trunc) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Trunc, truncf, true);
}
// TODO(zhin): Temporary convoluted way to exclude running these tests on ARM as
// we are implementing each opcode one at a time.
#if !V8_TARGET_ARCH_ARM
WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment