Commit 27bd1747 authored by pierre.langlois's avatar pierre.langlois Committed by Commit bot

[turbofan] ARM64: Match 64 bit compare with zero and branch

This patch enables the following transformations in the instruction
selector:

| Before           | After                  |
|------------------+------------------------|
| and x3, x1, #0x1 | tb{,n}z w1, #0, #+0x78 |
| cmp x3, #0x0     |                        |
| b.{eq,ne} #+0x80 |                        |
|------------------+------------------------|
| cmp x0, #0x0     | cb{,n}z x0, #+0x48     |
| b.{eq,ne} #+0x4c |                        |

I have not seen these patterns beeing generated by turbofan, however the
stubs hit these cases frequently. A particular reason is that we are
turning operations that check for a Smi into a single `tbz`.

As a concequence, the interpreter is affected thanks to inlining
turbofan stubs into it's bytecode handlers. I have noticed the size of
the interpreter was reduced by 200 instructions.

BUG=

Review-Url: https://codereview.chromium.org/2022073002
Cr-Commit-Position: refs/heads/master@{#36632}
parent f2c0264a
...@@ -111,7 +111,6 @@ T ReverseBits(T value) { ...@@ -111,7 +111,6 @@ T ReverseBits(T value) {
return result; return result;
} }
// CountTrailingZeros32(value) returns the number of zero bits preceding the // CountTrailingZeros32(value) returns the number of zero bits preceding the
// least significant 1 bit in |value| if |value| is non-zero, otherwise it // least significant 1 bit in |value| if |value| is non-zero, otherwise it
// returns 32. // returns 32.
...@@ -147,6 +146,14 @@ inline unsigned CountTrailingZeros64(uint64_t value) { ...@@ -147,6 +146,14 @@ inline unsigned CountTrailingZeros64(uint64_t value) {
#endif #endif
} }
// Overloaded versions of CountTrailingZeros32/64.
inline unsigned CountTrailingZeros(uint32_t value) {
return CountTrailingZeros32(value);
}
inline unsigned CountTrailingZeros(uint64_t value) {
return CountTrailingZeros64(value);
}
// Returns true iff |value| is a power of 2. // Returns true iff |value| is a power of 2.
inline bool IsPowerOfTwo32(uint32_t value) { inline bool IsPowerOfTwo32(uint32_t value) {
......
...@@ -1038,6 +1038,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1038,6 +1038,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Pseudo instructions turned into tbz/tbnz in AssembleArchBranch. // Pseudo instructions turned into tbz/tbnz in AssembleArchBranch.
break; break;
case kArm64CompareAndBranch32: case kArm64CompareAndBranch32:
case kArm64CompareAndBranch:
// Pseudo instruction turned into cbz/cbnz in AssembleArchBranch. // Pseudo instruction turned into cbz/cbnz in AssembleArchBranch.
break; break;
case kArm64ClaimCSP: { case kArm64ClaimCSP: {
...@@ -1503,6 +1504,17 @@ void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) { ...@@ -1503,6 +1504,17 @@ void CodeGenerator::AssembleArchBranch(Instruction* instr, BranchInfo* branch) {
default: default:
UNREACHABLE(); UNREACHABLE();
} }
} else if (opcode == kArm64CompareAndBranch) {
switch (condition) {
case kEqual:
__ Cbz(i.InputRegister64(0), tlabel);
break;
case kNotEqual:
__ Cbnz(i.InputRegister64(0), tlabel);
break;
default:
UNREACHABLE();
}
} else if (opcode == kArm64TestAndBranch32) { } else if (opcode == kArm64TestAndBranch32) {
switch (condition) { switch (condition) {
case kEqual: case kEqual:
......
...@@ -78,6 +78,7 @@ namespace compiler { ...@@ -78,6 +78,7 @@ namespace compiler {
V(Arm64TestAndBranch32) \ V(Arm64TestAndBranch32) \
V(Arm64TestAndBranch) \ V(Arm64TestAndBranch) \
V(Arm64CompareAndBranch32) \ V(Arm64CompareAndBranch32) \
V(Arm64CompareAndBranch) \
V(Arm64ClaimCSP) \ V(Arm64ClaimCSP) \
V(Arm64ClaimJSSP) \ V(Arm64ClaimJSSP) \
V(Arm64PokeCSP) \ V(Arm64PokeCSP) \
......
...@@ -136,6 +136,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -136,6 +136,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64TestAndBranch32: case kArm64TestAndBranch32:
case kArm64TestAndBranch: case kArm64TestAndBranch:
case kArm64CompareAndBranch32: case kArm64CompareAndBranch32:
case kArm64CompareAndBranch:
return kIsBlockTerminator; return kIsBlockTerminator;
case kArm64LdrS: case kArm64LdrS:
......
...@@ -1860,6 +1860,23 @@ void VisitWord64Test(InstructionSelector* selector, Node* node, ...@@ -1860,6 +1860,23 @@ void VisitWord64Test(InstructionSelector* selector, Node* node,
VisitWordTest(selector, node, kArm64Tst, cont); VisitWordTest(selector, node, kArm64Tst, cont);
} }
template <typename Matcher, ArchOpcode kOpcode>
bool TryEmitTestAndBranch(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
Arm64OperandGenerator g(selector);
Matcher m(node);
if (cont->IsBranch() && m.right().HasValue() &&
(base::bits::CountPopulation(m.right().Value()) == 1)) {
// If the mask has only one bit set, we can use tbz/tbnz.
DCHECK((cont->condition() == kEqual) || (cont->condition() == kNotEqual));
selector->Emit(
cont->Encode(kOpcode), g.NoOutput(), g.UseRegister(m.left().node()),
g.TempImmediate(base::bits::CountTrailingZeros(m.right().Value())),
g.Label(cont->true_block()), g.Label(cont->false_block()));
return true;
}
return false;
}
// Shared routine for multiple float32 compare operations. // Shared routine for multiple float32 compare operations.
void VisitFloat32Compare(InstructionSelector* selector, Node* node, void VisitFloat32Compare(InstructionSelector* selector, Node* node,
...@@ -1904,6 +1921,8 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user, ...@@ -1904,6 +1921,8 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user,
while (selector->CanCover(user, value)) { while (selector->CanCover(user, value)) {
switch (value->opcode()) { switch (value->opcode()) {
case IrOpcode::kWord32Equal: { case IrOpcode::kWord32Equal: {
// Combine with comparisons against 0 by simply inverting the
// continuation.
Int32BinopMatcher m(value); Int32BinopMatcher m(value);
if (m.right().Is(0)) { if (m.right().Is(0)) {
user = value; user = value;
...@@ -1926,10 +1945,33 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user, ...@@ -1926,10 +1945,33 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user,
case IrOpcode::kUint32LessThanOrEqual: case IrOpcode::kUint32LessThanOrEqual:
cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual); cont->OverwriteAndNegateIfEqual(kUnsignedLessThanOrEqual);
return VisitWord32Compare(selector, value, cont); return VisitWord32Compare(selector, value, cont);
case IrOpcode::kWord64Equal: case IrOpcode::kWord64Equal: {
cont->OverwriteAndNegateIfEqual(kEqual); cont->OverwriteAndNegateIfEqual(kEqual);
Int64BinopMatcher m(value);
if (m.right().Is(0)) {
Node* const left = m.left().node();
if (selector->CanCover(value, left) &&
left->opcode() == IrOpcode::kWord64And) {
// Attempt to merge the Word64Equal(Word64And(x, y), 0) comparison
// into a tbz/tbnz instruction.
if (TryEmitTestAndBranch<Uint64BinopMatcher, kArm64TestAndBranch>(
selector, left, cont)) {
return;
}
return VisitWordCompare(selector, left, kArm64Tst, cont, true,
kLogical64Imm);
}
// Merge the Word64Equal(x, 0) comparison into a cbz instruction.
if (cont->IsBranch()) {
selector->Emit(cont->Encode(kArm64CompareAndBranch), g.NoOutput(),
g.UseRegister(left), g.Label(cont->true_block()),
g.Label(cont->false_block()));
return;
}
}
return VisitWordCompare(selector, value, kArm64Cmp, cont, false, return VisitWordCompare(selector, value, kArm64Cmp, cont, false,
kArithmeticImm); kArithmeticImm);
}
case IrOpcode::kInt64LessThan: case IrOpcode::kInt64LessThan:
cont->OverwriteAndNegateIfEqual(kSignedLessThan); cont->OverwriteAndNegateIfEqual(kSignedLessThan);
return VisitWordCompare(selector, value, kArm64Cmp, cont, false, return VisitWordCompare(selector, value, kArm64Cmp, cont, false,
...@@ -2004,42 +2046,20 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user, ...@@ -2004,42 +2046,20 @@ void VisitWordCompareZero(InstructionSelector* selector, Node* user,
kArithmeticImm); kArithmeticImm);
case IrOpcode::kInt32Sub: case IrOpcode::kInt32Sub:
return VisitWord32Compare(selector, value, cont); return VisitWord32Compare(selector, value, cont);
case IrOpcode::kWord32And: { case IrOpcode::kWord32And:
Int32BinopMatcher m(value); if (TryEmitTestAndBranch<Uint32BinopMatcher, kArm64TestAndBranch32>(
if (cont->IsBranch() && m.right().HasValue() && selector, value, cont)) {
(base::bits::CountPopulation32(m.right().Value()) == 1)) {
// If the mask has only one bit set, we can use tbz/tbnz.
DCHECK((cont->condition() == kEqual) ||
(cont->condition() == kNotEqual));
selector->Emit(
cont->Encode(kArm64TestAndBranch32), g.NoOutput(),
g.UseRegister(m.left().node()),
g.TempImmediate(
base::bits::CountTrailingZeros32(m.right().Value())),
g.Label(cont->true_block()), g.Label(cont->false_block()));
return; return;
} }
return VisitWordCompare(selector, value, kArm64Tst32, cont, true, return VisitWordCompare(selector, value, kArm64Tst32, cont, true,
kLogical32Imm); kLogical32Imm);
} case IrOpcode::kWord64And:
case IrOpcode::kWord64And: { if (TryEmitTestAndBranch<Uint64BinopMatcher, kArm64TestAndBranch>(
Int64BinopMatcher m(value); selector, value, cont)) {
if (cont->IsBranch() && m.right().HasValue() &&
(base::bits::CountPopulation64(m.right().Value()) == 1)) {
// If the mask has only one bit set, we can use tbz/tbnz.
DCHECK((cont->condition() == kEqual) ||
(cont->condition() == kNotEqual));
selector->Emit(
cont->Encode(kArm64TestAndBranch), g.NoOutput(),
g.UseRegister(m.left().node()),
g.TempImmediate(
base::bits::CountTrailingZeros64(m.right().Value())),
g.Label(cont->true_block()), g.Label(cont->false_block()));
return; return;
} }
return VisitWordCompare(selector, value, kArm64Tst, cont, true, return VisitWordCompare(selector, value, kArm64Tst, cont, true,
kLogical64Imm); kLogical64Imm);
}
default: default:
break; break;
} }
......
...@@ -1178,7 +1178,6 @@ TEST_F(InstructionSelectorTest, Word32AndBranchWithOneBitMaskOnRight) { ...@@ -1178,7 +1178,6 @@ TEST_F(InstructionSelectorTest, Word32AndBranchWithOneBitMaskOnRight) {
} }
} }
TEST_F(InstructionSelectorTest, Word32AndBranchWithOneBitMaskOnLeft) { TEST_F(InstructionSelectorTest, Word32AndBranchWithOneBitMaskOnLeft) {
TRACED_FORRANGE(int, bit, 0, 31) { TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit; uint32_t mask = 1 << bit;
...@@ -1261,6 +1260,91 @@ TEST_F(InstructionSelectorTest, Word64AndBranchWithOneBitMaskOnLeft) { ...@@ -1261,6 +1260,91 @@ TEST_F(InstructionSelectorTest, Word64AndBranchWithOneBitMaskOnLeft) {
} }
} }
TEST_F(InstructionSelectorTest, Word32EqualZeroAndBranchWithOneBitMask) {
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
m.Branch(m.Word32Equal(m.Word32And(m.Int32Constant(mask), m.Parameter(0)),
m.Int32Constant(0)),
&a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt32(s[0]->InputAt(1)));
}
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
m.Branch(
m.Word32NotEqual(m.Word32And(m.Int32Constant(mask), m.Parameter(0)),
m.Int32Constant(0)),
&a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt32(s[0]->InputAt(1)));
}
}
TEST_F(InstructionSelectorTest, Word64EqualZeroAndBranchWithOneBitMask) {
TRACED_FORRANGE(int, bit, 0, 63) {
uint64_t mask = V8_UINT64_C(1) << bit;
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
m.Branch(m.Word64Equal(m.Word64And(m.Int64Constant(mask), m.Parameter(0)),
m.Int64Constant(0)),
&a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt64(s[0]->InputAt(1)));
}
TRACED_FORRANGE(int, bit, 0, 63) {
uint64_t mask = V8_UINT64_C(1) << bit;
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
m.Branch(
m.Word64NotEqual(m.Word64And(m.Int64Constant(mask), m.Parameter(0)),
m.Int64Constant(0)),
&a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt64(s[0]->InputAt(1)));
}
}
TEST_F(InstructionSelectorTest, CompareAgainstZeroAndBranch) { TEST_F(InstructionSelectorTest, CompareAgainstZeroAndBranch) {
{ {
...@@ -1298,6 +1382,75 @@ TEST_F(InstructionSelectorTest, CompareAgainstZeroAndBranch) { ...@@ -1298,6 +1382,75 @@ TEST_F(InstructionSelectorTest, CompareAgainstZeroAndBranch) {
} }
} }
TEST_F(InstructionSelectorTest, EqualZeroAndBranch) {
{
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
Node* p0 = m.Parameter(0);
m.Branch(m.Word32Equal(p0, m.Int32Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64CompareAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
}
{
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
Node* p0 = m.Parameter(0);
m.Branch(m.Word32NotEqual(p0, m.Int32Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64CompareAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
}
{
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
Node* p0 = m.Parameter(0);
m.Branch(m.Word64Equal(p0, m.Int64Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64CompareAndBranch, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
}
{
StreamBuilder m(this, MachineType::Int64(), MachineType::Int64());
RawMachineLabel a, b;
Node* p0 = m.Parameter(0);
m.Branch(m.Word64NotEqual(p0, m.Int64Constant(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int64Constant(1));
m.Bind(&b);
m.Return(m.Int64Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64CompareAndBranch, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(3U, s[0]->InputCount());
EXPECT_EQ(s.ToVreg(p0), s.ToVreg(s[0]->InputAt(0)));
}
}
// ----------------------------------------------------------------------------- // -----------------------------------------------------------------------------
// Add and subtract instructions with overflow. // Add and subtract instructions with overflow.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment