Commit f332380e authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm] Prototype f32x4.nearest

Prototype f32x4.nearest on ARM for both ARM v7 and ARM v8. ARM v8 has
support for vrintn, and for ARM v7 we fallback to runtime.

Since ARM v8 uses vrintn, which is the same instruction used for
F32RoundTiesEven (scalar), wasm-compiler reuses the Float32RoundTiesEven
check.

Bug: v8:10553
Change-Id: I066b8c5f10fd86294afe1c530c516493deeb7b53
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2258037Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68526}
parent e19c945b
......@@ -3892,7 +3892,18 @@ void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
emit(EncodeNeonVCVT(U32, dst, F32, src));
}
enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF, VRINTM, VRINTP, VRINTZ };
enum UnaryOp {
VMVN,
VSWP,
VABS,
VABSF,
VNEG,
VNEGF,
VRINTM,
VRINTN,
VRINTP,
VRINTZ
};
static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
int dst_code, int src_code) {
......@@ -3923,6 +3934,9 @@ static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
case VRINTM:
op_encoding = B17 | 0xD * B7;
break;
case VRINTN:
op_encoding = B17 | 0x8 * B7;
break;
case VRINTP:
op_encoding = B17 | 0xF * B7;
break;
......@@ -4592,6 +4606,14 @@ void Assembler::vrintm(NeonDataType dt, const QwNeonRegister dst,
emit(EncodeNeonUnaryOp(VRINTM, NEON_Q, NeonSize(dt), dst.code(), src.code()));
}
void Assembler::vrintn(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src) {
// SIMD vector round floating-point to integer to Nearest.
// See ARM DDI 0487F.b, F6-5497.
DCHECK(IsEnabled(ARMv8));
emit(EncodeNeonUnaryOp(VRINTN, NEON_Q, NeonSize(dt), dst.code(), src.code()));
}
void Assembler::vrintp(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src) {
// SIMD vector round floating-point to integer towards +Infinity.
......
......@@ -912,6 +912,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// ARMv8 rounding instructions (NEON).
void vrintm(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src);
void vrintn(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src);
void vrintp(NeonDataType dt, const QwNeonRegister dst,
const QwNeonRegister src);
void vrintz(NeonDataType dt, const QwNeonRegister dst,
......
......@@ -300,6 +300,7 @@ FUNCTION_REFERENCE(wasm_word64_ror, wasm::word64_ror_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_ceil, wasm::f32x4_ceil_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_floor, wasm::f32x4_floor_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_trunc, wasm::f32x4_trunc_wrapper)
FUNCTION_REFERENCE(wasm_f32x4_nearest_int, wasm::f32x4_nearest_int_wrapper)
FUNCTION_REFERENCE(wasm_memory_init, wasm::memory_init_wrapper)
FUNCTION_REFERENCE(wasm_memory_copy, wasm::memory_copy_wrapper)
FUNCTION_REFERENCE(wasm_memory_fill, wasm::memory_fill_wrapper)
......
......@@ -209,6 +209,7 @@ class StatsCounter;
V(wasm_f32x4_ceil, "wasm::f32x4_ceil_wrapper") \
V(wasm_f32x4_floor, "wasm::f32x4_floor_wrapper") \
V(wasm_f32x4_trunc, "wasm::f32x4_trunc_wrapper") \
V(wasm_f32x4_nearest_int, "wasm::f32x4_nearest_int_wrapper") \
V(wasm_memory_init, "wasm::memory_init") \
V(wasm_memory_copy, "wasm::memory_copy") \
V(wasm_memory_fill, "wasm::memory_fill") \
......
......@@ -1506,7 +1506,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kArmVrintnF32: {
CpuFeatureScope scope(tasm(), ARMv8);
__ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
if (instr->InputAt(0)->IsSimd128Register()) {
__ vrintn(NeonS32, i.OutputSimd128Register(),
i.InputSimd128Register(0));
} else {
__ vrintn(i.OutputFloatRegister(), i.InputFloatRegister(0));
}
break;
}
case kArmVrintnF64: {
......
......@@ -1498,7 +1498,8 @@ void InstructionSelector::VisitUint32Mod(Node* node) {
V(Float64RoundTiesEven, kArmVrintnF64) \
V(F32x4Ceil, kArmVrintpF32) \
V(F32x4Floor, kArmVrintmF32) \
V(F32x4Trunc, kArmVrintzF32)
V(F32x4Trunc, kArmVrintzF32) \
V(F32x4NearestInt, kArmVrintnF32)
#define RRR_OP_LIST(V) \
V(Int32MulHigh, kArmSmmul) \
......
......@@ -2696,12 +2696,12 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_ARM
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2NearestInt(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
// && !V8_TARGET_ARCH_IA32
......
......@@ -4058,6 +4058,12 @@ Node* WasmGraphBuilder::BuildF32x4Trunc(Node* input) {
return BuildCFuncInstruction(ref, type, input);
}
Node* WasmGraphBuilder::BuildF32x4NearestInt(Node* input) {
MachineType type = MachineType::Simd128();
ExternalReference ref = ExternalReference::wasm_f32x4_nearest_int();
return BuildCFuncInstruction(ref, type, input);
}
void WasmGraphBuilder::PrintDebugName(Node* node) {
PrintF("#%d:%s", node->id(), node->op()->mnemonic());
}
......@@ -4315,6 +4321,10 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return BuildF32x4Trunc(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F32x4Trunc(), inputs[0]);
case wasm::kExprF32x4NearestInt:
// Architecture support for F32x4NearestInt and Float32RoundTiesEven is
// the same.
if (!mcgraph()->machine()->Float32RoundTiesEven().IsSupported())
return BuildF32x4NearestInt(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F32x4NearestInt(),
inputs[0]);
case wasm::kExprI64x2Splat:
......
......@@ -560,6 +560,7 @@ class WasmGraphBuilder {
Node* BuildF32x4Ceil(Node* input);
Node* BuildF32x4Floor(Node* input);
Node* BuildF32x4Trunc(Node* input);
Node* BuildF32x4NearestInt(Node* input);
void BuildEncodeException32BitValue(Node* values_array, uint32_t* index,
Node* value);
......
......@@ -2275,10 +2275,17 @@ void Decoder::DecodeSpecialCondition(Instruction* instr) {
SNPrintF(out_buffer_ + out_buffer_pos_, "%s.%c%i d%d, q%d", name,
type, size, Vd, Vm);
} else if (instr->Bits(17, 16) == 0x2 && instr->Bit(10) == 1) {
// NEON vrintm, vrintp, vrintz
// NEON vrintm, vrintn, vrintp, vrintz.
bool dp_op = instr->Bit(6) == 0;
int rounding_mode = instr->Bits(9, 7);
switch (rounding_mode) {
case 0:
if (dp_op) {
Format(instr, "vrintn.f32 'Dd, 'Dm");
} else {
Format(instr, "vrintn.f32 'Qd, 'Qm");
}
break;
case 3:
if (dp_op) {
Format(instr, "vrintz.f32 'Dd, 'Dm");
......
......@@ -5451,6 +5451,9 @@ void Simulator::DecodeSpecialCondition(Instruction* instr) {
int rounding_mode = instr->Bits(9, 7);
float (*fproundint)(float) = nullptr;
switch (rounding_mode) {
case 0:
fproundint = &nearbyintf;
break;
case 3:
fproundint = &truncf;
break;
......
......@@ -423,6 +423,10 @@ void f32x4_trunc_wrapper(Address data) {
simd_float_round_wrapper<float, &truncf>(data);
}
void f32x4_nearest_int_wrapper(Address data) {
simd_float_round_wrapper<float, &nearbyintf>(data);
}
namespace {
class ThreadNotInWasmScope {
// Asan on Windows triggers exceptions to allocate shadow memory lazily. When
......
......@@ -85,6 +85,8 @@ V8_EXPORT_PRIVATE void f32x4_floor_wrapper(Address data);
V8_EXPORT_PRIVATE void f32x4_trunc_wrapper(Address data);
V8_EXPORT_PRIVATE void f32x4_nearest_int_wrapper(Address data);
// The return type is {int32_t} instead of {bool} to enforce the compiler to
// zero-extend the result in the return register.
int32_t memory_init_wrapper(Address data);
......
......@@ -919,6 +919,7 @@ TEST(ARMv8_vrintX_disasm) {
// Advanced SIMD
COMPARE(vrintm(NeonS32, q0, q3), "f3ba06c6 vrintm.f32 q0, q3");
COMPARE(vrintn(NeonS32, q0, q3), "f3ba0446 vrintn.f32 q0, q3");
COMPARE(vrintp(NeonS32, q0, q3), "f3ba07c6 vrintp.f32 q0, q3");
COMPARE(vrintz(NeonS32, q0, q3), "f3ba05c6 vrintz.f32 q0, q3");
}
......
......@@ -708,15 +708,11 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4Trunc) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Trunc, truncf, true);
}
// TODO(zhin): Temporary convoluted way to exclude running these tests on ARM as
// we are implementing each opcode one at a time.
#if !V8_TARGET_ARCH_ARM
WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) {
FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf,
true);
}
#endif // !V8_TARGET_ARCH_ARM
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
// V8_TARGET_ARCH_IA32 || V8_TARGET_ARCH_ARM
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment