Commit 317dc057 authored by georgia.kouveli's avatar georgia.kouveli Committed by Commit bot

[arm64] Generate adds/ands.

Perform the following transformation:

    | Before           | After               |
    |------------------+---------------------|
    | add w2, w0, w1   | adds w2, w0, w1     |
    | cmp w2, #0x0     | b.<cond'> <addr>    |
    | b.<cond> <addr>  |                     |
    |------------------+---------------------|
    | add w2, w0, w1   | adds w2, w0, w1     |
    | cmp #0x0, w2     | b.<cond'> <addr>    |
    | b.<cond> <addr>  |                     |

and the same for and instructions instead of add.  When the result of the
add/and is not used, generate cmn/tst instead. We need to take care with which
conditions we can handle and what new condition we map them to.

BUG=

Review-Url: https://codereview.chromium.org/2065243005
Cr-Commit-Position: refs/heads/master@{#37400}
parent f58dd088
......@@ -296,6 +296,10 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
return vs;
case kNotOverflow:
return vc;
case kPositiveOrZero:
return pl;
case kNegative:
return mi;
default:
break;
}
......
......@@ -394,6 +394,10 @@ Condition FlagsConditionToCondition(FlagsCondition condition) {
case kUnorderedEqual:
case kUnorderedNotEqual:
break;
case kPositiveOrZero:
return pl;
case kNegative:
return mi;
}
UNREACHABLE();
return nv;
......@@ -897,12 +901,34 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
case kArm64And:
if (FlagsModeField::decode(opcode) != kFlags_none) {
// The ands instruction only sets N and Z, so only the following
// conditions make sense.
DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
FlagsConditionField::decode(opcode) == kNotEqual ||
FlagsConditionField::decode(opcode) == kPositiveOrZero ||
FlagsConditionField::decode(opcode) == kNegative);
__ Ands(i.OutputRegister(), i.InputOrZeroRegister64(0),
i.InputOperand2_64(1));
} else {
__ And(i.OutputRegister(), i.InputOrZeroRegister64(0),
i.InputOperand2_64(1));
}
break;
case kArm64And32:
if (FlagsModeField::decode(opcode) != kFlags_none) {
// The ands instruction only sets N and Z, so only the following
// conditions make sense.
DCHECK(FlagsConditionField::decode(opcode) == kEqual ||
FlagsConditionField::decode(opcode) == kNotEqual ||
FlagsConditionField::decode(opcode) == kPositiveOrZero ||
FlagsConditionField::decode(opcode) == kNegative);
__ Ands(i.OutputRegister32(), i.InputOrZeroRegister32(0),
i.InputOperand2_32(1));
} else {
__ And(i.OutputRegister32(), i.InputOrZeroRegister32(0),
i.InputOperand2_32(1));
}
break;
case kArm64Bic:
__ Bic(i.OutputRegister(), i.InputOrZeroRegister64(0),
......@@ -1205,10 +1231,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Cmn(i.InputOrZeroRegister32(0), i.InputOperand2_32(1));
break;
case kArm64Tst:
__ Tst(i.InputRegister(0), i.InputOperand(1));
__ Tst(i.InputOrZeroRegister64(0), i.InputOperand(1));
break;
case kArm64Tst32:
__ Tst(i.InputRegister32(0), i.InputOperand32(1));
__ Tst(i.InputOrZeroRegister32(0), i.InputOperand32(1));
break;
case kArm64Float32Cmp:
if (instr->InputAt(1)->IsFPRegister()) {
......
......@@ -1921,14 +1921,126 @@ void VisitWordCompare(InstructionSelector* selector, Node* node,
}
}
// This function checks whether we can convert:
// ((a <op> b) cmp 0), b.<cond>
// to:
// (a <ops> b), b.<cond'>
// where <ops> is the flag setting version of <op>.
// We only generate conditions <cond'> that are a combination of the N
// and Z flags. This avoids the need to make this function dependent on
// the flag-setting operation.
bool CanUseFlagSettingBinop(FlagsCondition cond) {
switch (cond) {
case kEqual:
case kNotEqual:
case kSignedLessThan:
case kSignedGreaterThanOrEqual:
case kUnsignedLessThanOrEqual: // x <= 0 -> x == 0
case kUnsignedGreaterThan: // x > 0 -> x != 0
return true;
default:
return false;
}
}
// Map <cond> to <cond'> so that the following transformation is possible:
// ((a <op> b) cmp 0), b.<cond>
// to:
// (a <ops> b), b.<cond'>
// where <ops> is the flag setting version of <op>.
FlagsCondition MapForFlagSettingBinop(FlagsCondition cond) {
DCHECK(CanUseFlagSettingBinop(cond));
switch (cond) {
case kEqual:
case kNotEqual:
return cond;
case kSignedLessThan:
return kNegative;
case kSignedGreaterThanOrEqual:
return kPositiveOrZero;
case kUnsignedLessThanOrEqual: // x <= 0 -> x == 0
return kEqual;
case kUnsignedGreaterThan: // x > 0 -> x != 0
return kNotEqual;
default:
UNREACHABLE();
return cond;
}
}
// This function checks if we can perform the transformation:
// ((a <op> b) cmp 0), b.<cond>
// to:
// (a <ops> b), b.<cond'>
// where <ops> is the flag setting version of <op>, and if so,
// updates {node}, {opcode} and {cont} accordingly.
void MaybeReplaceCmpZeroWithFlagSettingBinop(InstructionSelector* selector,
Node** node, Node* binop,
ArchOpcode* opcode,
FlagsCondition cond,
FlagsContinuation* cont,
ImmediateMode* immediate_mode) {
ArchOpcode binop_opcode;
ArchOpcode no_output_opcode;
ImmediateMode binop_immediate_mode;
switch (binop->opcode()) {
case IrOpcode::kInt32Add:
binop_opcode = kArm64Add32;
no_output_opcode = kArm64Cmn32;
binop_immediate_mode = kArithmeticImm;
break;
case IrOpcode::kWord32And:
binop_opcode = kArm64And32;
no_output_opcode = kArm64Tst32;
binop_immediate_mode = kLogical32Imm;
break;
default:
UNREACHABLE();
return;
}
if (selector->CanCover(*node, binop)) {
// The comparison is the only user of the add or and, so we can generate
// a cmn or tst instead.
cont->Overwrite(MapForFlagSettingBinop(cond));
*opcode = no_output_opcode;
*node = binop;
*immediate_mode = binop_immediate_mode;
} else if (selector->IsOnlyUserOfNodeInSameBlock(*node, binop)) {
// We can also handle the case where the add and the compare are in the
// same basic block, and the compare is the only use of add in this basic
// block (the add has users in other basic blocks).
cont->Overwrite(MapForFlagSettingBinop(cond));
*opcode = binop_opcode;
*node = binop;
*immediate_mode = binop_immediate_mode;
}
}
void VisitWord32Compare(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
Int32BinopMatcher m(node);
ArchOpcode opcode = kArm64Cmp32;
FlagsCondition cond = cont->condition();
ImmediateMode immediate_mode = kArithmeticImm;
if (m.right().Is(0) && (m.left().IsInt32Add() || m.left().IsWord32And())) {
// Emit flag setting add/and instructions for comparisons against zero.
if (CanUseFlagSettingBinop(cond)) {
Node* binop = m.left().node();
MaybeReplaceCmpZeroWithFlagSettingBinop(selector, &node, binop, &opcode,
cond, cont, &immediate_mode);
}
} else if (m.left().Is(0) &&
(m.right().IsInt32Add() || m.right().IsWord32And())) {
// Same as above, but we need to commute the condition before we
// continue with the rest of the checks.
cond = CommuteFlagsCondition(cond);
if (CanUseFlagSettingBinop(cond)) {
Node* binop = m.right().node();
MaybeReplaceCmpZeroWithFlagSettingBinop(selector, &node, binop, &opcode,
cond, cont, &immediate_mode);
}
} else if (m.right().IsInt32Sub()) {
// Select negated compare for comparisons with negated right input.
if (m.right().IsInt32Sub()) {
Node* sub = m.right().node();
Int32BinopMatcher msub(sub);
if (msub.left().Is(0)) {
......@@ -1946,7 +2058,7 @@ void VisitWord32Compare(InstructionSelector* selector, Node* node,
opcode = kArm64Cmn32;
}
}
VisitBinop<Int32BinopMatcher>(selector, node, opcode, kArithmeticImm, cont);
VisitBinop<Int32BinopMatcher>(selector, node, opcode, immediate_mode, cont);
}
......@@ -2245,20 +2357,24 @@ void InstructionSelector::VisitWord32Equal(Node* const node) {
if (CanCover(user, value)) {
switch (value->opcode()) {
case IrOpcode::kInt32Add:
return VisitWordCompare(this, value, kArm64Cmn32, &cont, true,
kArithmeticImm);
case IrOpcode::kWord32And:
return VisitWord32Compare(this, node, &cont);
case IrOpcode::kInt32Sub:
return VisitWordCompare(this, value, kArm64Cmp32, &cont, false,
kArithmeticImm);
case IrOpcode::kWord32And:
return VisitWordCompare(this, value, kArm64Tst32, &cont, true,
kLogical32Imm);
case IrOpcode::kWord32Equal: {
// Word32Equal(Word32Equal(x, y), 0) => Word32Compare(x, y, ne).
Int32BinopMatcher mequal(value);
node->ReplaceInput(0, mequal.left().node());
node->ReplaceInput(1, mequal.right().node());
cont.Negate();
// {node} still does not cover its new operands, because {mequal} is
// still using them.
// Since we won't generate any more code for {mequal}, set its
// operands to zero to make sure {node} can cover them.
// This improves pattern matching in VisitWord32Compare.
mequal.node()->ReplaceInput(0, m.right().node());
mequal.node()->ReplaceInput(1, m.right().node());
return VisitWord32Compare(this, node, &cont);
}
default:
......
......@@ -171,7 +171,9 @@ enum FlagsCondition {
kUnorderedEqual,
kUnorderedNotEqual,
kOverflow,
kNotOverflow
kNotOverflow,
kPositiveOrZero,
kNegative
};
inline FlagsCondition NegateFlagsCondition(FlagsCondition condition) {
......
......@@ -354,6 +354,8 @@ class FlagsContinuation final {
condition_ = CommuteFlagsCondition(condition_);
}
void Overwrite(FlagsCondition condition) { condition_ = condition; }
void OverwriteAndNegateIfEqual(FlagsCondition condition) {
bool negate = condition_ == kEqual;
condition_ = condition;
......
......@@ -241,6 +241,20 @@ bool InstructionSelector::CanCover(Node* user, Node* node) const {
return true;
}
bool InstructionSelector::IsOnlyUserOfNodeInSameBlock(Node* user,
Node* node) const {
BasicBlock* bb_user = schedule()->block(user);
BasicBlock* bb_node = schedule()->block(node);
if (bb_user != bb_node) return false;
for (Edge const edge : node->use_edges()) {
Node* from = edge.from();
if ((from != user) && (schedule()->block(from) == bb_user)) {
return false;
}
}
return true;
}
int InstructionSelector::GetVirtualRegister(const Node* node) {
DCHECK_NOT_NULL(node);
size_t const id = node->id();
......
......@@ -151,6 +151,31 @@ class InstructionSelector final {
// edge and the two are in the same basic block.
bool CanCover(Node* user, Node* node) const;
// Used in pattern matching during code generation.
// This function checks that {node} and {user} are in the same basic block,
// and that {user} is the only user of {node} in this basic block. This
// check guarantees that there are no users of {node} scheduled between
// {node} and {user}, and thus we can select a single instruction for both
// nodes, if such an instruction exists. This check can be used for example
// when selecting instructions for:
// n = Int32Add(a, b)
// c = Word32Compare(n, 0, cond)
// Branch(c, true_label, false_label)
// Here we can generate a flag-setting add instruction, even if the add has
// uses in other basic blocks, since the flag-setting add instruction will
// still generate the result of the addition and not just set the flags.
// However, if we had uses of the add in the same basic block, we could have:
// n = Int32Add(a, b)
// o = OtherOp(n, ...)
// c = Word32Compare(n, 0, cond)
// Branch(c, true_label, false_label)
// where we cannot select the add and the compare together. If we were to
// select a flag-setting add instruction for Word32Compare and Int32Add while
// visiting Word32Compare, we would then have to select an instruction for
// OtherOp *afterwards*, which means we would attempt to use the result of
// the add before we have defined it.
bool IsOnlyUserOfNodeInSameBlock(Node* user, Node* node) const;
// Checks if {node} was already defined, and therefore code was already
// generated for it.
bool IsDefined(Node* node) const;
......
......@@ -48,6 +48,10 @@ FlagsCondition CommuteFlagsCondition(FlagsCondition condition) {
return kFloatGreaterThanOrEqualOrUnordered;
case kFloatGreaterThan:
return kFloatLessThan;
case kPositiveOrZero:
case kNegative:
UNREACHABLE();
break;
case kEqual:
case kNotEqual:
case kOverflow:
......@@ -448,6 +452,10 @@ std::ostream& operator<<(std::ostream& os, const FlagsCondition& fc) {
return os << "overflow";
case kNotOverflow:
return os << "not overflow";
case kPositiveOrZero:
return os << "positive or zero";
case kNegative:
return os << "negative";
}
UNREACHABLE();
return os;
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment