Commit e16e8d8b authored by Toon Verwaest's avatar Toon Verwaest Committed by V8 LUCI CQ

[maglev] Various regalloc fixes

  * Move fixed temporary allocation before arbitrary input allocation,
    so that fixed temporaries don't accidentally clobber the arbitrary
    input register. Now the input allocation will pick a different
    register.
  * For the above, make temporary allocation 'block' the register with a
    sentinel value, rather than marking it free, so that the subsequent
    input allocation knows not to use those registers (including
    spilling into them).
  * Similarly, move arbitrary input allocation after phi resolution when
    allocating control nodes, since phis may have fixed requirements.
  * Allow deopts to spill their inputs if they are not in registers and
    not yet loadable. This is done during the equivalent of input
    allocation for deopts.
  * Allow there to be multiple targets for a single source during gap
    move collection / cycle detection. There can still only be a single
    source per target, therefore there can only be one cycle for each
    connected component -- this is DCHECKed.
  * Make register validation more complete -- also walk the entire
    graph, and check whether value nodes' result register states match
    the current register allocator state.
  * Add much more printing to --trace-maglev-regalloc because these bugs
    ain't easy to debug.

Bug: v8:7700
Change-Id: Id98259c2920d772ce168bf27497162e78b136f9f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3714235
Auto-Submit: Toon Verwaest <verwaest@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Commit-Queue: Igor Sheludko <ishell@chromium.org>
Cr-Commit-Position: refs/heads/main@{#81252}
parent 6697ae18
......@@ -7,6 +7,7 @@
#include "src/base/hashmap.h"
#include "src/codegen/code-desc.h"
#include "src/codegen/register.h"
#include "src/codegen/reglist.h"
#include "src/codegen/safepoint-table.h"
#include "src/codegen/x64/register-x64.h"
#include "src/deoptimizer/translation-array.h"
......@@ -31,17 +32,7 @@ namespace maglev {
namespace {
template <typename T, size_t... Is>
std::array<T, sizeof...(Is)> repeat(T value, std::index_sequence<Is...>) {
return {((void)Is, value)...};
}
template <size_t N, typename T>
std::array<T, N> repeat(T value) {
return repeat<T>(value, std::make_index_sequence<N>());
}
using RegisterMoves = std::array<Register, Register::kNumRegisters>;
using RegisterMoves = std::array<RegList, Register::kNumRegisters>;
using RegisterReloads = std::array<ValueNode*, Register::kNumRegisters>;
class MaglevCodeGeneratingNodeProcessor {
......@@ -173,47 +164,54 @@ class MaglevCodeGeneratingNodeProcessor {
}
}
void EmitSingleParallelMove(Register source, Register target,
void EmitSingleParallelMove(Register source, RegList targets,
RegisterMoves& moves) {
DCHECK(!moves[target.code()].is_valid());
__ movq(target, source);
moves[source.code()] = Register::no_reg();
for (Register target : targets) {
DCHECK(moves[target.code()].is_empty());
__ movq(target, source);
}
moves[source.code()] = kEmptyRegList;
}
bool RecursivelyEmitParallelMoveChain(Register chain_start, Register source,
Register target, RegisterMoves& moves) {
if (target == chain_start) {
RegList targets, RegisterMoves& moves) {
if (targets.has(chain_start)) {
// The target of this move is the start of the move chain -- this
// means that there is a cycle, and we have to break it by moving
// the chain start into a temporary.
__ RecordComment("-- * Cycle");
EmitSingleParallelMove(target, kScratchRegister, moves);
EmitSingleParallelMove(source, target, moves);
EmitSingleParallelMove(chain_start, {kScratchRegister}, moves);
EmitSingleParallelMove(source, targets, moves);
return true;
}
bool is_cycle = false;
if (moves[target.code()].is_valid()) {
is_cycle = RecursivelyEmitParallelMoveChain(chain_start, target,
moves[target.code()], moves);
} else {
__ RecordComment("-- * Chain start");
bool has_cycle = false;
for (Register target : targets) {
if (!moves[target.code()].is_empty()) {
bool is_cycle = RecursivelyEmitParallelMoveChain(
chain_start, target, moves[target.code()], moves);
// There can only be one cycle in a connected graph.
DCHECK_IMPLIES(has_cycle, !is_cycle);
has_cycle |= is_cycle;
} else {
__ RecordComment("-- * Chain start");
}
}
if (is_cycle && source == chain_start) {
EmitSingleParallelMove(kScratchRegister, target, moves);
if (has_cycle && source == chain_start) {
EmitSingleParallelMove(kScratchRegister, targets, moves);
__ RecordComment("-- * end cycle");
} else {
EmitSingleParallelMove(source, target, moves);
EmitSingleParallelMove(source, targets, moves);
}
return is_cycle;
return has_cycle;
}
void EmitParallelMoveChain(Register source, RegisterMoves& moves) {
Register target = moves[source.code()];
if (!target.is_valid()) return;
RegList targets = moves[source.code()];
if (targets.is_empty()) return;
DCHECK_NE(source, target);
RecursivelyEmitParallelMoveChain(source, source, target, moves);
DCHECK(!targets.has(source));
RecursivelyEmitParallelMoveChain(source, source, targets, moves);
}
void EmitRegisterReload(ValueNode* node, Register target) {
......@@ -230,8 +228,8 @@ class MaglevCodeGeneratingNodeProcessor {
// move in the set of parallel register moves, to be resolved later.
Register source_reg = ToRegister(source);
if (target_reg != source_reg) {
DCHECK(!register_moves[source_reg.code()].is_valid());
register_moves[source_reg.code()] = target_reg;
DCHECK(!register_moves[source_reg.code()].has(target_reg));
register_moves[source_reg.code()].set(target_reg);
}
} else {
// For register loads from memory, don't emit the move yet, but instead
......@@ -276,16 +274,14 @@ class MaglevCodeGeneratingNodeProcessor {
// moves. Note that the mapping is:
//
// register_moves[source] = target.
RegisterMoves register_moves =
repeat<Register::kNumRegisters>(Register::no_reg());
RegisterMoves register_moves = {};
// Save registers restored from a memory location in an array, so that we
// can execute them after the parallel moves have read the register values.
// Note that the mapping is:
//
// register_reloads[target] = node.
ValueNode* n = nullptr;
RegisterReloads register_reloads = repeat<Register::kNumRegisters>(n);
RegisterReloads register_reloads = {};
__ RecordComment("-- Gap moves:");
......
......@@ -714,7 +714,7 @@ void StoreField::AllocateVreg(MaglevVregAllocationState* vreg_state) {
// TODO(leszeks): Add input clobbering to remove the need for this
// unconditional value scratch register.
RequireSpecificTemporary(WriteBarrierDescriptor::SlotAddressRegister());
set_temporaries_needed(2);
set_temporaries_needed(1);
}
void StoreField::GenerateCode(MaglevCodeGenState* code_gen_state,
const ProcessingState& state) {
......
......@@ -661,7 +661,7 @@ class NodeBase : public ZoneObject {
// Specify that there need to be a certain number of registers free (i.e.
// useable as scratch registers) on entry into this node.
//
// Includes any registers requested by RequireSpecificTemporary.
// Does not include any registers requested by RequireSpecificTemporary.
void set_temporaries_needed(int value) {
DCHECK_EQ(num_temporaries_needed_, 0);
num_temporaries_needed_ = value;
......@@ -895,6 +895,15 @@ class ValueNode : public Node {
return double_registers_with_result_.has(reg);
}
RegList result_registers() {
DCHECK(!use_double_register());
return registers_with_result_;
}
DoubleRegList result_double_registers() {
DCHECK(use_double_register());
return double_registers_with_result_;
}
compiler::InstructionOperand allocation() const {
if (has_register()) {
return compiler::AllocatedOperand(compiler::LocationOperand::REGISTER,
......
This diff is collapsed.
......@@ -32,15 +32,15 @@ class RegisterFrameState {
"RegisterFrameState should be used only for Register and "
"DoubleRegister.");
using RegList = RegListBase<RegisterT>;
using RegTList = RegListBase<RegisterT>;
static constexpr RegList kAllocatableRegisters =
static constexpr RegTList kAllocatableRegisters =
AllocatableRegisters<RegisterT>::kRegisters;
static constexpr RegList kEmptyRegList = {};
static constexpr RegTList kEmptyRegList = {};
RegList empty() const { return kEmptyRegList; }
RegList free() const { return free_; }
RegList used() const {
RegTList empty() const { return kEmptyRegList; }
RegTList free() const { return free_; }
RegTList used() const {
// Only allocatable registers should be free.
DCHECK_EQ(free_, free_ & kAllocatableRegisters);
return kAllocatableRegisters ^ free_;
......@@ -58,9 +58,10 @@ class RegisterFrameState {
RegisterT TakeFirstFree() { return free_.PopFirst(); }
void RemoveFromFree(RegisterT reg) { free_.clear(reg); }
void AddToFree(RegisterT reg) { free_.set(reg); }
void AddToFree(RegTList list) { free_ |= list; }
void FreeRegistersUsedBy(ValueNode* node) {
RegList list = node->ClearRegisters<RegisterT>();
RegTList list = node->ClearRegisters<RegisterT>();
DCHECK_EQ(free_ & list, kEmptyRegList);
free_ |= list;
}
......@@ -70,6 +71,10 @@ class RegisterFrameState {
values_[reg.code()] = node;
node->AddRegister(reg);
}
void SetSentinelValue(RegisterT reg, ValueNode* node) {
DCHECK(!free_.has(reg));
values_[reg.code()] = node;
}
ValueNode* GetValue(RegisterT reg) const {
DCHECK(!free_.has(reg));
ValueNode* node = values_[reg.code()];
......@@ -81,7 +86,7 @@ class RegisterFrameState {
private:
ValueNode* values_[RegisterT::kNumRegisters];
RegList free_ = kAllocatableRegisters;
RegTList free_ = kAllocatableRegisters;
};
class StraightForwardRegisterAllocator {
......@@ -111,8 +116,8 @@ class StraightForwardRegisterAllocator {
SpillSlots untagged_;
SpillSlots tagged_;
void ComputePostDominatingHoles(Graph* graph);
void AllocateRegisters(Graph* graph);
void ComputePostDominatingHoles();
void AllocateRegisters();
void PrintLiveRegs() const;
......@@ -129,8 +134,9 @@ class StraightForwardRegisterAllocator {
void AllocateNodeResult(ValueNode* node);
void AssignFixedInput(Input& input);
void AssignArbitraryRegisterInput(Input& input);
void AssignInputs(NodeBase* node);
void AssignTemporaries(NodeBase* node);
void AssignInputs(Node* node);
void AssignFixedTemporaries(NodeBase* node);
void AssignArbitraryTemporaries(NodeBase* node);
void TryAllocateToInput(Phi* phi);
void VerifyInputs(NodeBase* node);
......@@ -150,10 +156,10 @@ class StraightForwardRegisterAllocator {
void FreeRegistersUsedBy(ValueNode* node);
template <typename RegisterT>
void FreeSomeRegister(RegisterFrameState<RegisterT>& registers,
AllocationStage stage);
void FreeSomeGeneralRegister(AllocationStage stage);
void FreeSomeDoubleRegister(AllocationStage stage);
RegisterT FreeSomeRegister(RegisterFrameState<RegisterT>& registers,
AllocationStage stage);
Register FreeSomeGeneralRegister(AllocationStage stage);
DoubleRegister FreeSomeDoubleRegister(AllocationStage stage);
template <typename RegisterT>
void DropRegisterValue(RegisterFrameState<RegisterT>& registers,
......@@ -200,6 +206,7 @@ class StraightForwardRegisterAllocator {
MaglevCompilationInfo* compilation_info_;
std::unique_ptr<MaglevPrintingVisitor> printing_visitor_;
Graph* graph_;
BlockConstIterator block_it_;
NodeIterator node_it_;
// The current node, whether a Node in the body or the ControlNode.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment