instruction-scheduler.cc 9.05 KB
Newer Older
1 2 3 4 5 6 7
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/compiler/instruction-scheduler.h"

#include "src/base/adapters.h"
8
#include "src/base/utils/random-number-generator.h"
9 10 11 12 13

namespace v8 {
namespace internal {
namespace compiler {

14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
// Compare the two nodes and return true if node1 is a better candidate than
// node2 (i.e. node1 should be scheduled before node2).
bool InstructionScheduler::CriticalPathFirstQueue::CompareNodes(
    ScheduleGraphNode *node1, ScheduleGraphNode *node2) const {
  return node1->total_latency() > node2->total_latency();
}


InstructionScheduler::ScheduleGraphNode*
InstructionScheduler::CriticalPathFirstQueue::PopBestCandidate(int cycle) {
  DCHECK(!IsEmpty());
  auto candidate = nodes_.end();
  for (auto iterator = nodes_.begin(); iterator != nodes_.end(); ++iterator) {
    // We only consider instructions that have all their operands ready and
    // we try to schedule the critical path first.
    if (cycle >= (*iterator)->start_cycle()) {
      if ((candidate == nodes_.end()) || CompareNodes(*iterator, *candidate)) {
        candidate = iterator;
      }
    }
  }

  if (candidate != nodes_.end()) {
    ScheduleGraphNode *result = *candidate;
    nodes_.erase(candidate);
    return result;
  }

  return nullptr;
}


InstructionScheduler::ScheduleGraphNode*
InstructionScheduler::StressSchedulerQueue::PopBestCandidate(int cycle) {
  DCHECK(!IsEmpty());
  // Choose a random element from the ready list.
  auto candidate = nodes_.begin();
  std::advance(candidate, isolate()->random_number_generator()->NextInt(
      static_cast<int>(nodes_.size())));
  ScheduleGraphNode *result = *candidate;
  nodes_.erase(candidate);
  return result;
}


59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
InstructionScheduler::ScheduleGraphNode::ScheduleGraphNode(
    Zone* zone,
    Instruction* instr)
    : instr_(instr),
      successors_(zone),
      unscheduled_predecessors_count_(0),
      latency_(GetInstructionLatency(instr)),
      total_latency_(-1),
      start_cycle_(-1) {
}


void InstructionScheduler::ScheduleGraphNode::AddSuccessor(
    ScheduleGraphNode* node) {
  successors_.push_back(node);
  node->unscheduled_predecessors_count_++;
}


InstructionScheduler::InstructionScheduler(Zone* zone,
                                           InstructionSequence* sequence)
    : zone_(zone),
      sequence_(sequence),
      graph_(zone),
      last_side_effect_instr_(nullptr),
      pending_loads_(zone),
      last_live_in_reg_marker_(nullptr) {
}


void InstructionScheduler::StartBlock(RpoNumber rpo) {
  DCHECK(graph_.empty());
  DCHECK(last_side_effect_instr_ == nullptr);
  DCHECK(pending_loads_.empty());
  DCHECK(last_live_in_reg_marker_ == nullptr);
  sequence()->StartBlock(rpo);
}


void InstructionScheduler::EndBlock(RpoNumber rpo) {
99 100 101 102 103
  if (FLAG_turbo_stress_instruction_scheduling) {
    ScheduleBlock<StressSchedulerQueue>();
  } else {
    ScheduleBlock<CriticalPathFirstQueue>();
  }
104 105 106 107 108 109 110 111 112 113 114 115 116 117
  sequence()->EndBlock(rpo);
  graph_.clear();
  last_side_effect_instr_ = nullptr;
  pending_loads_.clear();
  last_live_in_reg_marker_ = nullptr;
}


void InstructionScheduler::AddInstruction(Instruction* instr) {
  ScheduleGraphNode* new_node = new (zone()) ScheduleGraphNode(zone(), instr);

  if (IsBlockTerminator(instr)) {
    // Make sure that basic block terminators are not moved by adding them
    // as successor of every instruction.
118
    for (ScheduleGraphNode* node : graph_) {
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
      node->AddSuccessor(new_node);
    }
  } else if (IsFixedRegisterParameter(instr)) {
    if (last_live_in_reg_marker_ != nullptr) {
      last_live_in_reg_marker_->AddSuccessor(new_node);
    }
    last_live_in_reg_marker_ = new_node;
  } else {
    if (last_live_in_reg_marker_ != nullptr) {
      last_live_in_reg_marker_->AddSuccessor(new_node);
    }

    // Instructions with side effects and memory operations can't be
    // reordered with respect to each other.
    if (HasSideEffect(instr)) {
      if (last_side_effect_instr_ != nullptr) {
        last_side_effect_instr_->AddSuccessor(new_node);
      }
137
      for (ScheduleGraphNode* load : pending_loads_) {
138 139 140 141 142 143 144 145 146 147 148 149 150 151
        load->AddSuccessor(new_node);
      }
      pending_loads_.clear();
      last_side_effect_instr_ = new_node;
    } else if (IsLoadOperation(instr)) {
      // Load operations can't be reordered with side effects instructions but
      // independent loads can be reordered with respect to each other.
      if (last_side_effect_instr_ != nullptr) {
        last_side_effect_instr_->AddSuccessor(new_node);
      }
      pending_loads_.push_back(new_node);
    }

    // Look for operand dependencies.
152
    for (ScheduleGraphNode* node : graph_) {
153 154 155 156 157 158 159 160 161 162
      if (HasOperandDependency(node->instruction(), instr)) {
        node->AddSuccessor(new_node);
      }
    }
  }

  graph_.push_back(new_node);
}


163
template <typename QueueType>
164
void InstructionScheduler::ScheduleBlock() {
165
  QueueType ready_list(this);
166 167 168 169 170

  // Compute total latencies so that we can schedule the critical path first.
  ComputeTotalLatencies();

  // Add nodes which don't have dependencies to the ready list.
171
  for (ScheduleGraphNode* node : graph_) {
172
    if (!node->HasUnscheduledPredecessor()) {
173
      ready_list.AddNode(node);
174 175 176 177 178
    }
  }

  // Go through the ready list and schedule the instructions.
  int cycle = 0;
179
  while (!ready_list.IsEmpty()) {
180
    ScheduleGraphNode* candidate = ready_list.PopBestCandidate(cycle);
181

182 183
    if (candidate != nullptr) {
      sequence()->AddInstruction(candidate->instruction());
184

185
      for (ScheduleGraphNode* successor : candidate->successors()) {
186 187 188
        successor->DropUnscheduledPredecessor();
        successor->set_start_cycle(
            std::max(successor->start_cycle(),
189
                     cycle + candidate->latency()));
190 191

        if (!successor->HasUnscheduledPredecessor()) {
192
          ready_list.AddNode(successor);
193 194 195 196 197 198 199 200 201 202 203 204 205
        }
      }
    }

    cycle++;
  }
}


int InstructionScheduler::GetInstructionFlags(const Instruction* instr) const {
  switch (instr->arch_opcode()) {
    case kArchNop:
    case kArchFramePointer:
206
    case kArchParentFramePointer:
207
    case kArchTruncateDoubleToI:
208
    case kArchStackSlot:
209 210
      return kNoOpcodeFlags;

211 212 213 214 215
    case kArchStackPointer:
      // ArchStackPointer instruction loads the current stack pointer value and
      // must not be reordered with instruction with side effects.
      return kIsLoadOperation;

216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298
    case kArchPrepareCallCFunction:
    case kArchPrepareTailCall:
    case kArchCallCFunction:
    case kArchCallCodeObject:
    case kArchCallJSFunction:
      return kHasSideEffect;

    case kArchTailCallCodeObject:
    case kArchTailCallJSFunction:
      return kHasSideEffect | kIsBlockTerminator;

    case kArchDeoptimize:
    case kArchJmp:
    case kArchLookupSwitch:
    case kArchTableSwitch:
    case kArchRet:
    case kArchThrowTerminator:
      return kIsBlockTerminator;

    case kCheckedLoadInt8:
    case kCheckedLoadUint8:
    case kCheckedLoadInt16:
    case kCheckedLoadUint16:
    case kCheckedLoadWord32:
    case kCheckedLoadWord64:
    case kCheckedLoadFloat32:
    case kCheckedLoadFloat64:
      return kIsLoadOperation;

    case kCheckedStoreWord8:
    case kCheckedStoreWord16:
    case kCheckedStoreWord32:
    case kCheckedStoreWord64:
    case kCheckedStoreFloat32:
    case kCheckedStoreFloat64:
    case kArchStoreWithWriteBarrier:
      return kHasSideEffect;

#define CASE(Name) case k##Name:
    TARGET_ARCH_OPCODE_LIST(CASE)
#undef CASE
      return GetTargetInstructionFlags(instr);
  }

  UNREACHABLE();
  return kNoOpcodeFlags;
}


bool InstructionScheduler::HasOperandDependency(
    const Instruction* instr1, const Instruction* instr2) const {
  for (size_t i = 0; i < instr1->OutputCount(); ++i) {
    for (size_t j = 0; j < instr2->InputCount(); ++j) {
      const InstructionOperand* output = instr1->OutputAt(i);
      const InstructionOperand* input = instr2->InputAt(j);

      if (output->IsUnallocated() && input->IsUnallocated() &&
          (UnallocatedOperand::cast(output)->virtual_register() ==
           UnallocatedOperand::cast(input)->virtual_register())) {
        return true;
      }

      if (output->IsConstant() && input->IsUnallocated() &&
          (ConstantOperand::cast(output)->virtual_register() ==
           UnallocatedOperand::cast(input)->virtual_register())) {
        return true;
      }
    }
  }

  // TODO(bafsa): Do we need to look for anti-dependencies/output-dependencies?

  return false;
}


bool InstructionScheduler::IsBlockTerminator(const Instruction* instr) const {
  return ((GetInstructionFlags(instr) & kIsBlockTerminator) ||
          (instr->flags_mode() == kFlags_branch));
}


void InstructionScheduler::ComputeTotalLatencies() {
299
  for (ScheduleGraphNode* node : base::Reversed(graph_)) {
300 301
    int max_latency = 0;

302
    for (ScheduleGraphNode* successor : node->successors()) {
303 304 305 306 307 308 309 310 311 312 313 314 315
      DCHECK(successor->total_latency() != -1);
      if (successor->total_latency() > max_latency) {
        max_latency = successor->total_latency();
      }
    }

    node->set_total_latency(max_latency + node->latency());
  }
}

}  // namespace compiler
}  // namespace internal
}  // namespace v8