Commit 19e6ead0 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Consolidate fp rounding opcodes

Remove 8 NEON rounding opcodes, merging them into the existing float
rounding opcodes, since the instruction used is the same, only the
register format is different, and can be determined at codegen time.

Bug: v8:10930
Change-Id: Ice19c1e2a31f6913c748976fe3a021035a752d88
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2436617Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70241}
parent 88dfe1c8
......@@ -418,6 +418,18 @@ void EmitMaybePoisonedFPLoad(CodeGenerator* codegen, InstructionCode opcode,
}
}
// Handles unary ops that work for float (scalar), double (scalar), or NEON.
template <typename Fn>
void EmitFpOrNeonUnop(TurboAssembler* tasm, Fn fn, Instruction* instr,
Arm64OperandConverter i, VectorFormat scalar,
VectorFormat vector) {
VectorFormat f = instr->InputAt(0)->IsSimd128Register() ? vector : scalar;
VRegister output = VRegister::Create(i.OutputDoubleRegister().code(), f);
VRegister input = VRegister::Create(i.InputDoubleRegister(0).code(), f);
(tasm->*fn)(output, input);
}
} // namespace
#define ASSEMBLE_SHIFT(asm_instr, width) \
......@@ -1030,31 +1042,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
ASSEMBLE_IEEE754_UNOP(tanh);
break;
case kArm64Float32RoundDown:
__ Frintm(i.OutputFloat32Register(), i.InputFloat32Register(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintm, instr, i, kFormatS,
kFormat4S);
break;
case kArm64Float64RoundDown:
__ Frintm(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintm, instr, i, kFormatD,
kFormat2D);
break;
case kArm64Float32RoundUp:
__ Frintp(i.OutputFloat32Register(), i.InputFloat32Register(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintp, instr, i, kFormatS,
kFormat4S);
break;
case kArm64Float64RoundUp:
__ Frintp(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintp, instr, i, kFormatD,
kFormat2D);
break;
case kArm64Float64RoundTiesAway:
__ Frinta(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frinta, instr, i, kFormatD,
kFormat2D);
break;
case kArm64Float32RoundTruncate:
__ Frintz(i.OutputFloat32Register(), i.InputFloat32Register(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintz, instr, i, kFormatS,
kFormat4S);
break;
case kArm64Float64RoundTruncate:
__ Frintz(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintz, instr, i, kFormatD,
kFormat2D);
break;
case kArm64Float32RoundTiesEven:
__ Frintn(i.OutputFloat32Register(), i.InputFloat32Register(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintn, instr, i, kFormatS,
kFormat4S);
break;
case kArm64Float64RoundTiesEven:
__ Frintn(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
EmitFpOrNeonUnop(tasm(), &TurboAssembler::Frintn, instr, i, kFormatD,
kFormat2D);
break;
case kArm64Add:
if (FlagsModeField::decode(opcode) != kFlags_none) {
......@@ -1940,22 +1961,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64F64x2RoundUp:
__ Frintp(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2D());
break;
case kArm64F64x2RoundDown:
__ Frintm(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2D());
break;
case kArm64F64x2RoundTruncate:
__ Frintz(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2D());
break;
case kArm64F64x2RoundTiesEven:
__ Frintn(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2D());
break;
case kArm64F32x4Splat: {
__ Dup(i.OutputSimd128Register().V4S(), i.InputSimd128Register(0).S(), 0);
break;
......@@ -2029,22 +2034,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Bsl(dst.V16B(), rhs.V16B(), lhs.V16B());
break;
}
case kArm64F32x4RoundUp:
__ Frintp(i.OutputSimd128Register().V4S(),
i.InputSimd128Register(0).V4S());
break;
case kArm64F32x4RoundDown:
__ Frintm(i.OutputSimd128Register().V4S(),
i.InputSimd128Register(0).V4S());
break;
case kArm64F32x4RoundTruncate:
__ Frintz(i.OutputSimd128Register().V4S(),
i.InputSimd128Register(0).V4S());
break;
case kArm64F32x4RoundTiesEven:
__ Frintn(i.OutputSimd128Register().V4S(),
i.InputSimd128Register(0).V4S());
break;
case kArm64I64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputRegister64(0));
break;
......
......@@ -188,10 +188,6 @@ namespace compiler {
V(Arm64F64x2Qfms) \
V(Arm64F64x2Pmin) \
V(Arm64F64x2Pmax) \
V(Arm64F64x2RoundUp) \
V(Arm64F64x2RoundDown) \
V(Arm64F64x2RoundTruncate) \
V(Arm64F64x2RoundTiesEven) \
V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \
......@@ -217,10 +213,6 @@ namespace compiler {
V(Arm64F32x4Qfms) \
V(Arm64F32x4Pmin) \
V(Arm64F32x4Pmax) \
V(Arm64F32x4RoundUp) \
V(Arm64F32x4RoundDown) \
V(Arm64F32x4RoundTruncate) \
V(Arm64F32x4RoundTiesEven) \
V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \
......
......@@ -158,10 +158,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2Qfms:
case kArm64F64x2Pmin:
case kArm64F64x2Pmax:
case kArm64F64x2RoundUp:
case kArm64F64x2RoundDown:
case kArm64F64x2RoundTruncate:
case kArm64F64x2RoundTiesEven:
case kArm64F32x4Splat:
case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane:
......@@ -187,10 +183,6 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4Qfms:
case kArm64F32x4Pmin:
case kArm64F32x4Pmax:
case kArm64F32x4RoundUp:
case kArm64F32x4RoundDown:
case kArm64F32x4RoundTruncate:
case kArm64F32x4RoundTiesEven:
case kArm64I64x2Splat:
case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane:
......
......@@ -1379,14 +1379,14 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
V(Float64ExtractLowWord32, kArm64Float64ExtractLowWord32) \
V(Float64ExtractHighWord32, kArm64Float64ExtractHighWord32) \
V(Float64SilenceNaN, kArm64Float64SilenceNaN) \
V(F32x4Ceil, kArm64F32x4RoundUp) \
V(F32x4Floor, kArm64F32x4RoundDown) \
V(F32x4Trunc, kArm64F32x4RoundTruncate) \
V(F32x4NearestInt, kArm64F32x4RoundTiesEven) \
V(F64x2Ceil, kArm64F64x2RoundUp) \
V(F64x2Floor, kArm64F64x2RoundDown) \
V(F64x2Trunc, kArm64F64x2RoundTruncate) \
V(F64x2NearestInt, kArm64F64x2RoundTiesEven)
V(F32x4Ceil, kArm64Float32RoundUp) \
V(F32x4Floor, kArm64Float32RoundDown) \
V(F32x4Trunc, kArm64Float32RoundTruncate) \
V(F32x4NearestInt, kArm64Float32RoundTiesEven) \
V(F64x2Ceil, kArm64Float64RoundUp) \
V(F64x2Floor, kArm64Float64RoundDown) \
V(F64x2Trunc, kArm64Float64RoundTruncate) \
V(F64x2NearestInt, kArm64Float64RoundTiesEven)
#define RRR_OP_LIST(V) \
V(Int32Div, kArm64Idiv32) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment