Commit 77021584 authored by Pierre Langlois's avatar Pierre Langlois Committed by Commit Bot

[arm64] Generate TBNZ for 32-bit '(x & (1 << N)) == (1 << N)'

Add support for matching '(x & mask) == mask' when mask has a single bit set,
and translate this into a tbnz instruction. This patch only does this for 32-bit
operations, we can port it to 64-bit operations as a follow-up if we find
matches.

This transformation mostly touches the snapshot where we get ~120 hits. This pattern can
also show up in JavaScript when introduced by the EffectControlLinearizer pass.

Bug: 
Change-Id: Ib37c6e0bd3831b7c17709357b00ca53735621605
Reviewed-on: https://chromium-review.googlesource.com/803272Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
Cr-Commit-Position: refs/heads/master@{#49822}
parent e71b8022
......@@ -2007,19 +2007,17 @@ void EmitBranchOrDeoptimize(InstructionSelector* selector,
}
// Try to emit TBZ, TBNZ, CBZ or CBNZ for certain comparisons of {node}
// against zero, depending on the condition.
bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, Node* user,
FlagsCondition cond, FlagsContinuation* cont) {
Int32BinopMatcher m_user(user);
USE(m_user);
DCHECK(m_user.right().Is(0) || m_user.left().Is(0));
// against {value}, depending on the condition.
bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, uint32_t value,
Node* user, FlagsCondition cond, FlagsContinuation* cont) {
// Only handle branches and deoptimisations.
if (!cont->IsBranch() && !cont->IsDeoptimize()) return false;
switch (cond) {
case kSignedLessThan:
case kSignedGreaterThanOrEqual: {
// Here we handle sign tests, aka. comparisons with zero.
if (value != 0) return false;
// We don't generate TBZ/TBNZ for deoptimisations, as they have a
// shorter range than conditional branches and generating them for
// deoptimisations results in more veneers.
......@@ -2045,9 +2043,29 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, Node* user,
return true;
}
case kEqual:
case kNotEqual:
case kNotEqual: {
if (node->opcode() == IrOpcode::kWord32And) {
// Emit a tbz/tbnz if we are comparing with a single-bit mask:
// Branch(Word32Equal(Word32And(x, 1 << N), 1 << N), true, false)
Int32BinopMatcher m_and(node);
if (cont->IsBranch() && base::bits::IsPowerOfTwo(value) &&
m_and.right().Is(value) && selector->CanCover(user, node)) {
Arm64OperandGenerator g(selector);
// In the code generator, Equal refers to a bit being cleared. We want
// the opposite here so negate the condition.
cont->Negate();
selector->Emit(cont->Encode(kArm64TestAndBranch32), g.NoOutput(),
g.UseRegister(m_and.left().node()),
g.TempImmediate(base::bits::CountTrailingZeros(value)),
g.Label(cont->true_block()),
g.Label(cont->false_block()));
return true;
}
}
} // Fall through.
case kUnsignedLessThanOrEqual:
case kUnsignedGreaterThan: {
if (value != 0) return false;
Arm64OperandGenerator g(selector);
cont->Overwrite(MapForCbz(cond));
EmitBranchOrDeoptimize(selector, kArm64CompareAndBranch32,
......@@ -2062,15 +2080,20 @@ bool TryEmitCbzOrTbz(InstructionSelector* selector, Node* node, Node* user,
void VisitWord32Compare(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
Int32BinopMatcher m(node);
ArchOpcode opcode = kArm64Cmp32;
FlagsCondition cond = cont->condition();
if (m.right().Is(0)) {
if (TryEmitCbzOrTbz(selector, m.left().node(), node, cond, cont)) return;
} else if (m.left().Is(0)) {
if (m.right().HasValue()) {
if (TryEmitCbzOrTbz(selector, m.left().node(), m.right().Value(), node,
cond, cont)) {
return;
}
} else if (m.left().HasValue()) {
FlagsCondition commuted_cond = CommuteFlagsCondition(cond);
if (TryEmitCbzOrTbz(selector, m.right().node(), node, commuted_cond, cont))
if (TryEmitCbzOrTbz(selector, m.right().node(), m.left().Value(), node,
commuted_cond, cont)) {
return;
}
}
ArchOpcode opcode = kArm64Cmp32;
ImmediateMode immediate_mode = kArithmeticImm;
if (m.right().Is(0) && (m.left().IsInt32Add() || m.left().IsWord32And())) {
// Emit flag setting add/and instructions for comparisons against zero.
......@@ -2141,7 +2164,7 @@ bool TryEmitTestAndBranch(InstructionSelector* selector, Node* node,
Arm64OperandGenerator g(selector);
Matcher m(node);
if (cont->IsBranch() && m.right().HasValue() &&
(base::bits::CountPopulation(m.right().Value()) == 1)) {
base::bits::IsPowerOfTwo(m.right().Value())) {
// If the mask has only one bit set, we can use tbz/tbnz.
DCHECK((cont->condition() == kEqual) || (cont->condition() == kNotEqual));
selector->Emit(
......
......@@ -1165,87 +1165,126 @@ TEST_F(InstructionSelectorTest, AddBranchWithImmediateOnLeft) {
}
}
struct TestAndBranch {
MachInst<std::function<Node*(InstructionSelectorTest::StreamBuilder&, Node*,
uint32_t mask)>>
mi;
FlagsCondition cond;
};
TEST_F(InstructionSelectorTest, Word32AndBranchWithOneBitMaskOnRight) {
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
m.Branch(m.Word32And(m.Parameter(0), m.Int32Constant(mask)), &a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt32(s[0]->InputAt(1)));
}
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
m.Branch(
m.Word32BinaryNot(m.Word32And(m.Parameter(0), m.Int32Constant(mask))),
&a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt32(s[0]->InputAt(1)));
}
std::ostream& operator<<(std::ostream& os, const TestAndBranch& tb) {
return os << tb.mi;
}
TEST_F(InstructionSelectorTest, Word32AndBranchWithOneBitMaskOnLeft) {
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
m.Branch(m.Word32And(m.Int32Constant(mask), m.Parameter(0)), &a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kNotEqual, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt32(s[0]->InputAt(1)));
}
const TestAndBranch kTestAndBranchMatchers32[] = {
// Branch on the result of Word32And directly.
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x, uint32_t mask)
-> Node* { return m.Word32And(x, m.Int32Constant(mask)); },
"if (x and mask)", kArm64TestAndBranch32, MachineType::Int32()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32BinaryNot(m.Word32And(x, m.Int32Constant(mask)));
},
"if not (x and mask)", kArm64TestAndBranch32, MachineType::Int32()},
kEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x, uint32_t mask)
-> Node* { return m.Word32And(m.Int32Constant(mask), x); },
"if (mask and x)", kArm64TestAndBranch32, MachineType::Int32()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32BinaryNot(m.Word32And(m.Int32Constant(mask), x));
},
"if not (mask and x)", kArm64TestAndBranch32, MachineType::Int32()},
kEqual},
// Branch on the result of '(x and mask) == mask'. This tests that a bit is
// set rather than cleared which is why conditions are inverted.
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32Equal(m.Word32And(x, m.Int32Constant(mask)),
m.Int32Constant(mask));
},
"if ((x and mask) == mask)", kArm64TestAndBranch32, MachineType::Int32()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32BinaryNot(m.Word32Equal(
m.Word32And(x, m.Int32Constant(mask)), m.Int32Constant(mask)));
},
"if ((x and mask) != mask)", kArm64TestAndBranch32, MachineType::Int32()},
kEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32Equal(m.Int32Constant(mask),
m.Word32And(x, m.Int32Constant(mask)));
},
"if (mask == (x and mask))", kArm64TestAndBranch32, MachineType::Int32()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32BinaryNot(m.Word32Equal(
m.Int32Constant(mask), m.Word32And(x, m.Int32Constant(mask))));
},
"if (mask != (x and mask))", kArm64TestAndBranch32, MachineType::Int32()},
kEqual},
// Same as above but swap 'mask' and 'x'.
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32Equal(m.Word32And(m.Int32Constant(mask), x),
m.Int32Constant(mask));
},
"if ((mask and x) == mask)", kArm64TestAndBranch32, MachineType::Int32()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32BinaryNot(m.Word32Equal(
m.Word32And(m.Int32Constant(mask), x), m.Int32Constant(mask)));
},
"if ((mask and x) != mask)", kArm64TestAndBranch32, MachineType::Int32()},
kEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32Equal(m.Int32Constant(mask),
m.Word32And(m.Int32Constant(mask), x));
},
"if (mask == (mask and x))", kArm64TestAndBranch32, MachineType::Int32()},
kNotEqual},
{{[](InstructionSelectorTest::StreamBuilder& m, Node* x,
uint32_t mask) -> Node* {
return m.Word32BinaryNot(m.Word32Equal(
m.Int32Constant(mask), m.Word32And(m.Int32Constant(mask), x)));
},
"if (mask != (mask and x))", kArm64TestAndBranch32, MachineType::Int32()},
kEqual}};
typedef InstructionSelectorTestWithParam<TestAndBranch>
InstructionSelectorTestAndBranchTest;
TEST_P(InstructionSelectorTestAndBranchTest, TestAndBranch32) {
const TestAndBranch inst = GetParam();
TRACED_FORRANGE(int, bit, 0, 31) {
uint32_t mask = 1 << bit;
StreamBuilder m(this, MachineType::Int32(), MachineType::Int32());
RawMachineLabel a, b;
m.Branch(
m.Word32BinaryNot(m.Word32And(m.Int32Constant(mask), m.Parameter(0))),
&a, &b);
m.Branch(inst.mi.constructor(m, m.Parameter(0), mask), &a, &b);
m.Bind(&a);
m.Return(m.Int32Constant(1));
m.Bind(&b);
m.Return(m.Int32Constant(0));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kArm64TestAndBranch32, s[0]->arch_opcode());
EXPECT_EQ(kEqual, s[0]->flags_condition());
EXPECT_EQ(inst.mi.arch_opcode, s[0]->arch_opcode());
EXPECT_EQ(inst.cond, s[0]->flags_condition());
EXPECT_EQ(4U, s[0]->InputCount());
EXPECT_EQ(InstructionOperand::IMMEDIATE, s[0]->InputAt(1)->kind());
EXPECT_EQ(bit, s.ToInt32(s[0]->InputAt(1)));
}
}
INSTANTIATE_TEST_CASE_P(InstructionSelectorTest,
InstructionSelectorTestAndBranchTest,
::testing::ValuesIn(kTestAndBranchMatchers32));
TEST_F(InstructionSelectorTest, Word64AndBranchWithOneBitMaskOnRight) {
TRACED_FORRANGE(int, bit, 0, 63) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment