[TurboProp] Add support for deferred block spills in fast reg alloc

Adds support for avoiding spills in non-deferred blocks by instead restricting the spill ranges to deferred blocks if the virtual register is only spilled in deferred blocks. It does this by tracking registers that reach the exit point of deferred blocks and spilling them them pre-emptively in the deferred block while treating them as committed from the point of view of the non-deferred blocks. We also now track whether virtual registers need to be spilled at their SSA definition point (where they are output by an instruction), or can instead be spilled at the entry to deferred blocks for use as spill slots within those deferred blocks. In both cases, the tracking of these deferred spills is kept as a pending operation until the allocator confirms that adding these spills will avoid spills in the non-deferred pathways, to avoid adding unnecessary extra spills in deferred blocks. BUG=v8:9684 Change-Id: Ib151e795567f0e4e7f95538415a8cc117d235b64 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2440603 Commit-Queue: Ross McIlroy <rmcilroy@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Cr-Commit-Position: refs/heads/master@{#70374}

[TurboProp] Add support for deferred block spills in fast reg alloc
Adds support for avoiding spills in non-deferred blocks by instead restricting the spill ranges to deferred blocks if the virtual register is only spilled in deferred blocks. It does this by tracking registers that reach the exit point of deferred blocks and spilling them them pre-emptively in the deferred block while treating them as committed from the point of view of the non-deferred blocks. We also now track whether virtual registers need to be spilled at their SSA definition point (where they are output by an instruction), or can instead be spilled at the entry to deferred blocks for use as spill slots within those deferred blocks. In both cases, the tracking of these deferred spills is kept as a pending operation until the allocator confirms that adding these spills will avoid spills in the non-deferred pathways, to avoid adding unnecessary extra spills in deferred blocks. BUG=v8:9684 Change-Id: Ib151e795567f0e4e7f95538415a8cc117d235b64 Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2440603 Commit-Queue: Ross McIlroy <rmcilroy@chromium.org> Reviewed-by: Tobias Tebbi <tebbi@chromium.org> Cr-Commit-Position: refs/heads/master@{#70374}
4a601911 · Ross McIlroy · Commit Bot · a19cf8e2 · 4a601911 · 4a601911
Commit 4a601911 authored Oct 07, 2020 by Ross McIlroy Committed by Commit Bot Oct 07, 2020
4 changed files
--- a/src/compiler/backend/mid-tier-register-allocator.cc
+++ b/src/compiler/backend/mid-tier-register-allocator.cc
--- a/src/utils/bit-vector.cc
+++ b/src/utils/bit-vector.cc
@@ -11,7 +11,7 @@ namespace v8 {
 namespace internal {
 #ifdef DEBUG
-void BitVector::Print() {
+void BitVector::Print() const {
  bool first = true;
  PrintF("{");
  for (int i = 0; i < length(); i++) {

--- a/src/utils/bit-vector.h
+++ b/src/utils/bit-vector.h
@@ -277,7 +277,7 @@ class V8_EXPORT_PRIVATE BitVector : public ZoneObject {
  int length() const { return length_; }
 #ifdef DEBUG
-  void Print();
+  void Print() const;
 #endif
  MOVE_ONLY_NO_DEFAULT_CONSTRUCTOR(BitVector);

--- a/test/unittests/compiler/regalloc/mid-tier-register-allocator-unittest.cc
+++ b/test/unittests/compiler/regalloc/mid-tier-register-allocator-unittest.cc
@@ -12,6 +12,67 @@ namespace compiler {
 namespace {
+// We can't just use the size of the moves collection, because of
+// redundant moves which need to be discounted.
+int GetMoveCount(const ParallelMove& moves) {
+  int move_count = 0;
+  for (auto move : moves) {
+    if (move->IsEliminated() || move->IsRedundant()) continue;
+    ++move_count;
+  }
+  return move_count;
+}
+bool AreOperandsOfSameType(
+    const AllocatedOperand& op,
+    const InstructionSequenceTest::TestOperand& test_op) {
+  bool test_op_is_reg =
+      (test_op.type_ ==
+           InstructionSequenceTest::TestOperandType::kFixedRegister ||
+       test_op.type_ == InstructionSequenceTest::TestOperandType::kRegister);
+  return (op.IsRegister() && test_op_is_reg) ||
+         (op.IsStackSlot() && !test_op_is_reg);
+}
+bool AllocatedOperandMatches(
+    const AllocatedOperand& op,
+    const InstructionSequenceTest::TestOperand& test_op) {
+  return AreOperandsOfSameType(op, test_op) &&
+         ((op.IsRegister() ? op.GetRegister().code() : op.index()) ==
+              test_op.value_ ||
+          test_op.value_ == InstructionSequenceTest::kNoValue);
+}
+int GetParallelMoveCount(int instr_index, Instruction::GapPosition gap_pos,
+                         const InstructionSequence* sequence) {
+  const ParallelMove* moves =
+      sequence->InstructionAt(instr_index)->GetParallelMove(gap_pos);
+  if (moves == nullptr) return 0;
+  return GetMoveCount(*moves);
+}
+bool IsParallelMovePresent(int instr_index, Instruction::GapPosition gap_pos,
+                           const InstructionSequence* sequence,
+                           const InstructionSequenceTest::TestOperand& src,
+                           const InstructionSequenceTest::TestOperand& dest) {
+  const ParallelMove* moves =
+      sequence->InstructionAt(instr_index)->GetParallelMove(gap_pos);
+  EXPECT_NE(nullptr, moves);
+  bool found_match = false;
+  for (auto move : *moves) {
+    if (move->IsEliminated() || move->IsRedundant()) continue;
+    if (AllocatedOperandMatches(AllocatedOperand::cast(move->source()), src) &&
+        AllocatedOperandMatches(AllocatedOperand::cast(move->destination()),
+                                dest)) {
+      found_match = true;
+      break;
+    }
+  }
+  return found_match;
+}
 class MidTierRegisterAllocatorTest : public InstructionSequenceTest {
 public:
  void Allocate() {
@@ -608,6 +669,99 @@ TEST_F(MidTierRegisterAllocatorTest, DiamondWithCallSecondBlock) {
  Allocate();
 }
+TEST_F(MidTierRegisterAllocatorTest, SingleDeferredBlockSpill) {
+  StartBlock();  // B0
+  auto var = EmitOI(Reg(0));
+  EndBlock(Branch(Reg(var), 1, 2));
+  StartBlock();  // B1
+  EndBlock(Jump(2));
+  StartBlock(true);  // B2
+  EmitCall(Slot(-1), Slot(var));
+  EndBlock();
+  StartBlock();  // B3
+  EmitNop();
+  EndBlock();
+  StartBlock();  // B4
+  Return(Reg(var, 0));
+  EndBlock();
+  Allocate();
+  const int var_def_index = 1;
+  const int call_index = 3;
+  // We should have no parallel moves at the "var_def_index" position.
+  EXPECT_EQ(
+      0, GetParallelMoveCount(var_def_index, Instruction::START, sequence()));
+  // The spill should be performed at the position "call_index".
+  EXPECT_TRUE(IsParallelMovePresent(call_index, Instruction::START, sequence(),
+                                    Reg(0), Slot(0)));
+}
+TEST_F(MidTierRegisterAllocatorTest, ValidMultipleDeferredBlockSpills) {
+  StartBlock();  // B0
+  auto var1 = EmitOI(Reg(0));
+  auto var2 = EmitOI(Reg(1));
+  auto var3 = EmitOI(Reg(2));
+  EndBlock(Branch(Reg(var1, 0), 1, 2));
+  StartBlock(true);  // B1
+  EmitCall(Slot(-2), Slot(var1));
+  EndBlock(Jump(5));
+  StartBlock();  // B2
+  EmitNop();
+  EndBlock();
+  StartBlock();  // B3
+  EmitNop();
+  EndBlock(Branch(Reg(var2, 0), 1, 2));
+  StartBlock(true);  // B4
+  EmitCall(Slot(-1), Slot(var2));
+  EndBlock(Jump(2));
+  StartBlock();  // B5
+  EmitNop();
+  EndBlock();
+  StartBlock();  // B6
+  Return(Reg(var3, 2));
+  EndBlock();
+  const int def_of_v2 = 2;
+  const int start_of_b1 = 4;
+  const int start_of_b4 = 10;
+  const int end_of_b1 = 5;
+  const int end_of_b4 = 11;
+  const int start_of_b6 = 14;
+  Allocate();
+  const int var3_reg = 2;
+  const int var3_slot = 2;
+  EXPECT_FALSE(IsParallelMovePresent(def_of_v2, Instruction::START, sequence(),
+                                     Reg(var3_reg), Slot()));
+  EXPECT_TRUE(IsParallelMovePresent(start_of_b1, Instruction::START, sequence(),
+                                    Reg(var3_reg), Slot(var3_slot)));
+  EXPECT_TRUE(IsParallelMovePresent(end_of_b1, Instruction::END, sequence(),
+                                    Slot(var3_slot), Reg()));
+  EXPECT_TRUE(IsParallelMovePresent(start_of_b4, Instruction::START, sequence(),
+                                    Reg(var3_reg), Slot(var3_slot)));
+  EXPECT_TRUE(IsParallelMovePresent(end_of_b4, Instruction::END, sequence(),
+                                    Slot(var3_slot), Reg()));
+  EXPECT_EQ(0,
+            GetParallelMoveCount(start_of_b6, Instruction::START, sequence()));
+}
 namespace {
 enum class ParameterType { kFixedSlot, kSlot, kRegister, kFixedRegister };