Commit 723cee9a authored by Sam Parker's avatar Sam Parker Committed by Commit Bot

[compiler][arm64] fabs(fsub(x, y)) to fabd(x, y)

Introduce two machine nodes for FABD and fold Float32/64 Abs,Sub
during instruction selection.

This gives ~1% speed improvement of the Bullet physics engine
compiled as wasm.

Change-Id: Ifd985538e6ebb280bc0eaf11b0ebfc687891cf91
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2786854Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Commit-Queue: Martyn Capewell <martyn.capewell@arm.com>
Cr-Commit-Position: refs/heads/master@{#73765}
parent f49f834f
...@@ -1572,6 +1572,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1572,6 +1572,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Float32Abs: case kArm64Float32Abs:
__ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0)); __ Fabs(i.OutputFloat32Register(), i.InputFloat32Register(0));
break; break;
case kArm64Float32Abd:
__ Fabd(i.OutputFloat32Register(), i.InputFloat32Register(0),
i.InputFloat32Register(1));
break;
case kArm64Float32Neg: case kArm64Float32Neg:
__ Fneg(i.OutputFloat32Register(), i.InputFloat32Register(0)); __ Fneg(i.OutputFloat32Register(), i.InputFloat32Register(0));
break; break;
...@@ -1642,6 +1646,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1642,6 +1646,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArm64Float64Abs: case kArm64Float64Abs:
__ Fabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); __ Fabs(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break; break;
case kArm64Float64Abd:
__ Fabd(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
i.InputDoubleRegister(1));
break;
case kArm64Float64Neg: case kArm64Float64Neg:
__ Fneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0)); __ Fneg(i.OutputDoubleRegister(), i.InputDoubleRegister(0));
break; break;
......
...@@ -99,6 +99,7 @@ namespace compiler { ...@@ -99,6 +99,7 @@ namespace compiler {
V(Arm64Float32Mul) \ V(Arm64Float32Mul) \
V(Arm64Float32Div) \ V(Arm64Float32Div) \
V(Arm64Float32Abs) \ V(Arm64Float32Abs) \
V(Arm64Float32Abd) \
V(Arm64Float32Neg) \ V(Arm64Float32Neg) \
V(Arm64Float32Sqrt) \ V(Arm64Float32Sqrt) \
V(Arm64Float32Fnmul) \ V(Arm64Float32Fnmul) \
...@@ -114,6 +115,7 @@ namespace compiler { ...@@ -114,6 +115,7 @@ namespace compiler {
V(Arm64Float64Max) \ V(Arm64Float64Max) \
V(Arm64Float64Min) \ V(Arm64Float64Min) \
V(Arm64Float64Abs) \ V(Arm64Float64Abs) \
V(Arm64Float64Abd) \
V(Arm64Float64Neg) \ V(Arm64Float64Neg) \
V(Arm64Float64Sqrt) \ V(Arm64Float64Sqrt) \
V(Arm64Float64Fnmul) \ V(Arm64Float64Fnmul) \
......
...@@ -92,6 +92,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -92,6 +92,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Float32Mul: case kArm64Float32Mul:
case kArm64Float32Div: case kArm64Float32Div:
case kArm64Float32Abs: case kArm64Float32Abs:
case kArm64Float32Abd:
case kArm64Float32Neg: case kArm64Float32Neg:
case kArm64Float32Sqrt: case kArm64Float32Sqrt:
case kArm64Float32Fnmul: case kArm64Float32Fnmul:
...@@ -106,6 +107,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -106,6 +107,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64Float64Max: case kArm64Float64Max:
case kArm64Float64Min: case kArm64Float64Min:
case kArm64Float64Abs: case kArm64Float64Abs:
case kArm64Float64Abd:
case kArm64Float64Neg: case kArm64Float64Neg:
case kArm64Float64Sqrt: case kArm64Float64Sqrt:
case kArm64Float64Fnmul: case kArm64Float64Fnmul:
......
...@@ -1451,8 +1451,6 @@ void InstructionSelector::VisitWord64Ror(Node* node) { ...@@ -1451,8 +1451,6 @@ void InstructionSelector::VisitWord64Ror(Node* node) {
V(BitcastFloat64ToInt64, kArm64U64MoveFloat64) \ V(BitcastFloat64ToInt64, kArm64U64MoveFloat64) \
V(BitcastInt32ToFloat32, kArm64Float64MoveU64) \ V(BitcastInt32ToFloat32, kArm64Float64MoveU64) \
V(BitcastInt64ToFloat64, kArm64Float64MoveU64) \ V(BitcastInt64ToFloat64, kArm64Float64MoveU64) \
V(Float32Abs, kArm64Float32Abs) \
V(Float64Abs, kArm64Float64Abs) \
V(Float32Sqrt, kArm64Float32Sqrt) \ V(Float32Sqrt, kArm64Float32Sqrt) \
V(Float64Sqrt, kArm64Float64Sqrt) \ V(Float64Sqrt, kArm64Float64Sqrt) \
V(Float32RoundDown, kArm64Float32RoundDown) \ V(Float32RoundDown, kArm64Float32RoundDown) \
...@@ -3055,6 +3053,30 @@ void InstructionSelector::VisitFloat32Mul(Node* node) { ...@@ -3055,6 +3053,30 @@ void InstructionSelector::VisitFloat32Mul(Node* node) {
return VisitRRR(this, kArm64Float32Mul, node); return VisitRRR(this, kArm64Float32Mul, node);
} }
void InstructionSelector::VisitFloat32Abs(Node* node) {
Arm64OperandGenerator g(this);
Node* in = node->InputAt(0);
if (in->opcode() == IrOpcode::kFloat32Sub && CanCover(node, in)) {
Emit(kArm64Float32Abd, g.DefineAsRegister(node),
g.UseRegister(in->InputAt(0)), g.UseRegister(in->InputAt(1)));
return;
}
return VisitRR(this, kArm64Float32Abs, node);
}
void InstructionSelector::VisitFloat64Abs(Node* node) {
Arm64OperandGenerator g(this);
Node* in = node->InputAt(0);
if (in->opcode() == IrOpcode::kFloat64Sub && CanCover(node, in)) {
Emit(kArm64Float64Abd, g.DefineAsRegister(node),
g.UseRegister(in->InputAt(0)), g.UseRegister(in->InputAt(1)));
return;
}
return VisitRR(this, kArm64Float64Abs, node);
}
void InstructionSelector::VisitFloat32Equal(Node* node) { void InstructionSelector::VisitFloat32Equal(Node* node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node); FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
VisitFloat32Compare(this, node, &cont); VisitFloat32Compare(this, node, &cont);
......
...@@ -4724,6 +4724,42 @@ TEST_F(InstructionSelectorTest, Float64Abs) { ...@@ -4724,6 +4724,42 @@ TEST_F(InstructionSelectorTest, Float64Abs) {
EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output())); EXPECT_EQ(s.ToVreg(n), s.ToVreg(s[0]->Output()));
} }
TEST_F(InstructionSelectorTest, Float32Abd) {
StreamBuilder m(this, MachineType::Float32(), MachineType::Float32(),
MachineType::Float32());
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* const fsub = m.Float32Sub(p0, p1);
Node* const fabs = m.Float32Abs(fsub);
m.Return(fabs);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64Float32Abd, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(fabs), s.ToVreg(s[0]->Output()));
}
TEST_F(InstructionSelectorTest, Float64Abd) {
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
MachineType::Float64());
Node* const p0 = m.Parameter(0);
Node* const p1 = m.Parameter(1);
Node* const fsub = m.Float64Sub(p0, p1);
Node* const fabs = m.Float64Abs(fsub);
m.Return(fabs);
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64Float64Abd, s[0]->arch_opcode());
ASSERT_EQ(2U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
EXPECT_EQ(s.ToVreg(p1), s.ToVreg(s[0]->InputAt(1)));
ASSERT_EQ(1U, s[0]->OutputCount());
EXPECT_EQ(s.ToVreg(fabs), s.ToVreg(s[0]->Output()));
}
TEST_F(InstructionSelectorTest, Float64Max) { TEST_F(InstructionSelectorTest, Float64Max) {
StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(), StreamBuilder m(this, MachineType::Float64(), MachineType::Float64(),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment