Commit 723cee9a authored by Sam Parker's avatar Sam Parker Committed by Commit Bot

[compiler][arm64] fabs(fsub(x, y)) to fabd(x, y)

Introduce two machine nodes for FABD and fold Float32/64 Abs,Sub
during instruction selection.

This gives ~1% speed improvement of the Bullet physics engine
compiled as wasm.

Change-Id: Ifd985538e6ebb280bc0eaf11b0ebfc687891cf91
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2786854Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Commit-Queue: Martyn Capewell <martyn.capewell@arm.com>
Cr-Commit-Position: refs/heads/master@{#73765}
parent f49f834f
......@@ -1572,6 +1572,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Float32Abs:
__ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
break;
case kArm64Float32Abd:
__ Fabd(i.OutputFloat32Register(), i.InputFloat32Register(0),
i.InputFloat32Register(1));
break;
case kArm64Float32Neg:
__ Fneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
break;
......@@ -1642,6 +1646,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Float64Abs:
__ Fabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
case kArm64Float64Abd:
__ Fabd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
break;
case kArm64Float64Neg:
__ Fneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break;
......
......@@ -99,6 +99,7 @@ namespace compiler {
V(Arm64Float32Mul) \
V(Arm64Float32Div) \
V(Arm64Float32Abs) \
V(Arm64Float32Abd) \
V(Arm64Float32Neg) \
V(Arm64Float32Sqrt) \
V(Arm64Float32Fnmul) \
......@@ -114,6 +115,7 @@ namespace compiler {
V(Arm64Float64Max) \
V(Arm64Float64Min) \
V(Arm64Float64Abs) \
V(Arm64Float64Abd) \
V(Arm64Float64Neg) \
V(Arm64Float64Sqrt) \
V(Arm64Float64Fnmul) \
......
......@@ -92,6 +92,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Float32Mul:
case kArm64Float32Div:
case kArm64Float32Abs:
case kArm64Float32Abd:
case kArm64Float32Neg:
case kArm64Float32Sqrt:
case kArm64Float32Fnmul:
......@@ -106,6 +107,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Float64Max:
case kArm64Float64Min:
case kArm64Float64Abs:
case kArm64Float64Abd:
case kArm64Float64Neg:
case kArm64Float64Sqrt:
case kArm64Float64Fnmul:
......
......@@ -1451,8 +1451,6 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
V(BitcastFloat64ToInt64, kArm64U64MoveFloat64) \
V(BitcastInt32ToFloat32, kArm64Float64MoveU64) \
V(BitcastInt64ToFloat64, kArm64Float64MoveU64) \
V(Float32Abs, kArm64Float32Abs) \
V(Float64Abs, kArm64Float64Abs) \
V(Float32Sqrt, kArm64Float32Sqrt) \
V(Float64Sqrt, kArm64Float64Sqrt) \
V(Float32RoundDown, kArm64Float32RoundDown) \
......@@ -3055,6 +3053,30 @@ void InstructionSelector::VisitFloat32Mul(Node* node) {
return VisitRRR(this, kArm64Float32Mul, node);
}
void InstructionSelector::VisitFloat32Abs(Node* node) {
Arm64OperandGenerator g(this);
Node* in = node->InputAt(0);
if (in->opcode() == IrOpcode::kFloat32Sub && CanCover(node, in)) {
Emit(kArm64Float32Abd, g.DefineAsRegister(node),
g.UseRegister(in->InputAt(0)), g.UseRegister(in->InputAt(1)));
return;
}
return VisitRR(this, kArm64Float32Abs, node);
}
void InstructionSelector::VisitFloat64Abs(Node* node) {
Arm64OperandGenerator g(this);
Node* in = node->InputAt(0);
if (in->opcode() == IrOpcode::kFloat64Sub && CanCover(node, in)) {
Emit(kArm64Float64Abd, g.DefineAsRegister(node),
g.UseRegister(in->InputAt(0)), g.UseRegister(in->InputAt(1)));
return;
}
return VisitRR(this, kArm64Float64Abs, node);
}
void InstructionSelector::VisitFloat32Equal(Node* node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
VisitFloat32Compare(this, node, &cont);
......
......@@ -4724,6 +4724,42 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
}
TEST_F(InstructionSelectorTest, Float32Abd) {
StreamBuilder m(this, MachineType::Float32(), MachineType::Float32(),
MachineType::Float32());
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* const fsub = m.Float32Sub(p0, p1);
Node* const fabs = m.Float32Abs(fsub);
m.Return(fabs);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64Float32Abd, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(fabs), s.ToVreg(s[0]->Output()));
}
TEST_F(InstructionSelectorTest, Float64Abd) {
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
MachineType::Float64());
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* const fsub = m.Float64Sub(p0, p1);
Node* const fabs = m.Float64Abs(fsub);
m.Return(fabs);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64Float64Abd, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(fabs), s.ToVreg(s[0]->Output()));
}
TEST_F(InstructionSelectorTest, Float64Max) {
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment