Commit b3d748a2 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[regalloc] Use an adaptive data structure for live sets

Live sets represent sets of live virtual registers at block entry and
exit points. They are usually sparsely populated; for example, a sample
taken from Octane2 shows 80% of sampled live sets with a fill ratio of
10% or less.

Prior to this CL, live sets were implemented as a statically-sized bit
vector. This is fine for low-ish virtual register counts, but becomes
wasteful at higher numbers.

This CL attempts to address this issue through an adaptive
implementation. Small live sets remain bit vectors, while larger sets
switch to a PersistentMap-based implementation. PersistentMap has very
memory-efficient add/remove/copy operations.

Of course, with adaptive data structures we enter the territory of
parameter fiddling. In this case, two parameters are used:
kMaxSmallSetSize controls when to switch implementations, and
kMaxDeletionsBeforePrune controls when pruning (= managing the # of
deleted entries in the map) sets in.

On the (degenerate) test case from the linked bug, the register
allocation zone shrinks from 1008MB to 475MB. For more realistic cases
I expect savings on the order of 10s of KB.

Bug: v8:9574
Change-Id: Id903bbe23f030b418e8d887ef4839c8d65126c52
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1891693Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarThibaud Michaud <thibaudm@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64872}
parent 1d403ce7
......@@ -11,6 +11,7 @@
#include "src/codegen/assembler-inl.h"
#include "src/codegen/tick-counter.h"
#include "src/compiler/linkage.h"
#include "src/compiler/persistent-map.h"
#include "src/strings/string-stream.h"
#include "src/utils/vector.h"
......@@ -96,6 +97,178 @@ int GetByteWidth(MachineRepresentation rep) {
} // namespace
// Represents a set of live virtual registers.
// Implemented as an adaptive data structure to handle two extremes of usage
// patterns:
//
// 1. Low-ish virtual register counts should use a statically-sized bit vector
// for constant-time insertions/lookups and compact memory-representation.
// This is expected to be the common case.
// 2. For high virtual register counts, the set is expected to be very sparsely
// populated. In this case, a bit vector would lead to unacceptable memory
// overhead (since it reserves O(capacity) memory instead of O(size)), and
// we use a set of indices instead. The PersistentMap was chosen for its
// memory-efficient add/remove/copy operations.
//
// The maximal capacity of the set is determined at construction.
class LiveSet : public ZoneObject {
private:
// Switch to a set backing store when this limit is exceeded.
// The threshold is fairly arbitrary, picked s.t. benchmarks do not regress.
// It should be a good tradeoff between wasted space and fast/simple bit
// operations on small bit vectors sizes.
static constexpr int kMaxSmallSetSize = 16 * kBitsPerSystemPointer;
using KeyT = int;
using ValueT = bool; // Emulates set semantics in the map.
static constexpr ValueT kNotPresent = false;
static constexpr ValueT kPresent = true;
using PersistentLiveSet = PersistentMap<KeyT, ValueT>;
static PersistentLiveSet* NewPersistentLiveSet(Zone* zone) {
return new (zone->New(sizeof(PersistentLiveSet)))
PersistentLiveSet(zone, kNotPresent);
}
public:
LiveSet(int size, Zone* zone)
: vector_(size <= kMaxSmallSetSize ? new (zone) BitVector(size, zone)
: nullptr),
map_(size <= kMaxSmallSetSize ? nullptr : NewPersistentLiveSet(zone)) {}
void Add(int v) {
if (is_small()) {
vector_->Add(v);
} else {
map_->Set(v, kPresent);
}
}
bool Contains(int v) const {
if (is_small()) return vector_->Contains(v);
return map_->Get(v) == kPresent;
}
void Remove(int v) {
if (is_small()) {
vector_->Remove(v);
} else {
map_->Set(v, kNotPresent);
deletions_++;
MaybePrune();
}
}
void Union(const LiveSet& that) {
if (is_small()) {
vector_->Union(*that.vector_);
} else {
DCHECK(!that.is_small());
// The other map is empty, nothing to do.
if (that.map_->begin() == that.map_->end()) return;
// This map is empty, copy the other map. Note that PersistentMap copies
// have only low, constant memory cost.
if (map_->begin() == map_->end()) {
*map_ = *that.map_;
deletions_ = that.deletions_;
return;
}
// Both are non-empty.
for (const auto& entry : *that.map_) {
if (entry.second == kPresent) map_->Set(entry.first, kPresent);
}
}
}
int Count() const {
if (is_small()) return vector_->Count();
// Slow. Use only for debugging purposes.
int count = 0;
for (const auto& entry : *map_) {
if (entry.second == kPresent) count++;
}
return count;
}
class Iterator {
private:
BitVector empty_vector_;
public:
explicit Iterator(LiveSet* target)
: is_small_(target->is_small()),
small_it_(is_small_ ? target->vector_ : &empty_vector_),
large_it_(is_small_ ? PersistentLiveSet::iterator::end(kNotPresent)
: target->map_->begin()) {}
~Iterator() = default;
bool Done() const {
return is_small_ ? small_it_.Done() : large_it_.is_end();
}
void Advance() {
if (is_small_) {
small_it_.Advance();
} else {
++large_it_;
}
}
int Current() const {
if (is_small_) return small_it_.Current();
return (*large_it_).first;
}
private:
const bool is_small_;
BitVector::Iterator small_it_;
PersistentLiveSet::iterator large_it_;
};
private:
bool is_small() const { return vector_ != nullptr; }
void MaybePrune() {
DCHECK(!is_small());
// The PersistentMap data structure never shrinks by itself; deletions are
// internally treated as insertions of the kNotPresent value. This becomes
// problematic when the data structure begins to drag along more deleted
// keys than non-deleted keys. Pruning addresses this by creating a fresh
// deep copy of the map after a threshold is reached.
//
// Note: This is where the adaptive data structure starts to get hacky.
// Ideally we'd be able to avoid arbitrary parametrization such as
// kMaxDeletions and kMaxSmallSetSize. Maybe it's just a complexity cost we
// have to pay.
// Fairly arbitrary constant, chosen s.t. our tests do not regress.
static constexpr uint16_t kMaxDeletionsBeforePrune = 128;
if (deletions_ < kMaxDeletionsBeforePrune) return;
PersistentLiveSet* new_map = NewPersistentLiveSet(map_->zone());
for (const auto& entry : *map_) {
if (entry.second == kPresent) new_map->Set(entry.first, kPresent);
}
map_ = new_map;
deletions_ = 0;
}
// The decision between backing stores is made once when the LiveSet is
// constructed. The chosen backing store is set while the other remains
// nullptr.
BitVector* const vector_;
PersistentLiveSet* map_;
uint16_t deletions_ = 0;
};
class LiveRangeBound {
public:
explicit LiveRangeBound(LiveRange* range, bool skip)
......@@ -1594,7 +1767,7 @@ RegisterAllocationData::PhiMapValue* RegisterAllocationData::GetPhiMapValueFor(
bool RegisterAllocationData::ExistsUseWithoutDefinition() {
bool found = false;
BitVector::Iterator iterator(live_in_sets()[0]);
LiveSet::Iterator iterator(live_in_sets()[0]);
while (!iterator.Done()) {
found = true;
int operand_index = iterator.Current();
......@@ -2022,23 +2195,23 @@ LiveRangeBuilder::LiveRangeBuilder(RegisterAllocationData* data,
Zone* local_zone)
: data_(data), phi_hints_(local_zone) {}
BitVector* LiveRangeBuilder::ComputeLiveOut(const InstructionBlock* block,
RegisterAllocationData* data) {
LiveSet* LiveRangeBuilder::ComputeLiveOut(const InstructionBlock* block,
RegisterAllocationData* data) {
size_t block_index = block->rpo_number().ToSize();
BitVector* live_out = data->live_out_sets()[block_index];
LiveSet* live_out = data->live_out_sets()[block_index];
if (live_out == nullptr) {
// Compute live out for the given block, except not including backward
// successor edges.
Zone* zone = data->allocation_zone();
const InstructionSequence* code = data->code();
live_out = new (zone) BitVector(code->VirtualRegisterCount(), zone);
live_out = new (zone) LiveSet(code->VirtualRegisterCount(), zone);
// Process all successor blocks.
for (const RpoNumber& succ : block->successors()) {
// Add values live on entry to the successor.
if (succ <= block->rpo_number()) continue;
BitVector* live_in = data->live_in_sets()[succ.ToSize()];
LiveSet* live_in = data->live_in_sets()[succ.ToSize()];
if (live_in != nullptr) live_out->Union(*live_in);
// All phi input operands corresponding to this successor edge are live
......@@ -2056,7 +2229,7 @@ BitVector* LiveRangeBuilder::ComputeLiveOut(const InstructionBlock* block,
}
void LiveRangeBuilder::AddInitialIntervals(const InstructionBlock* block,
BitVector* live_out) {
LiveSet* live_out) {
// Add an interval that includes the entire block to the live range for
// each live_out value.
LifetimePosition start = LifetimePosition::GapFromInstructionIndex(
......@@ -2064,7 +2237,7 @@ void LiveRangeBuilder::AddInitialIntervals(const InstructionBlock* block,
LifetimePosition end = LifetimePosition::InstructionFromInstructionIndex(
block->last_instruction_index())
.NextStart();
BitVector::Iterator iterator(live_out);
LiveSet::Iterator iterator(live_out);
while (!iterator.Done()) {
int operand_index = iterator.Current();
TopLevelLiveRange* range = data()->GetOrCreateLiveRangeFor(operand_index);
......@@ -2224,7 +2397,7 @@ UsePosition* LiveRangeBuilder::Use(LifetimePosition block_start,
}
void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block,
BitVector* live) {
LiveSet* live) {
int block_start = block->first_instruction_index();
LifetimePosition block_start_position =
LifetimePosition::GapFromInstructionIndex(block_start);
......@@ -2440,7 +2613,7 @@ void LiveRangeBuilder::ProcessInstructions(const InstructionBlock* block,
}
void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block,
BitVector* live) {
LiveSet* live) {
for (PhiInstruction* phi : block->phis()) {
// The live range interval already ends at the first instruction of the
// block.
......@@ -2562,11 +2735,11 @@ void LiveRangeBuilder::ProcessPhis(const InstructionBlock* block,
}
void LiveRangeBuilder::ProcessLoopHeader(const InstructionBlock* block,
BitVector* live) {
LiveSet* live) {
DCHECK(block->IsLoopHeader());
// Add a live range stretching from the first loop instruction to the last
// for each value live on entry to the header.
BitVector::Iterator iterator(live);
LiveSet::Iterator iterator(live);
LifetimePosition start = LifetimePosition::GapFromInstructionIndex(
block->first_instruction_index());
LifetimePosition end = LifetimePosition::GapFromInstructionIndex(
......@@ -2593,7 +2766,7 @@ void LiveRangeBuilder::BuildLiveRanges() {
data_->tick_counter()->DoTick();
InstructionBlock* block =
code()->InstructionBlockAt(RpoNumber::FromInt(block_id));
BitVector* live = ComputeLiveOut(block, data());
LiveSet* live = ComputeLiveOut(block, data());
// Initially consider all live_out values live for the entire block. We
// will shorten these intervals if necessary.
AddInitialIntervals(block, live);
......@@ -4949,11 +5122,11 @@ bool LiveRangeConnector::CanEagerlyResolveControlFlow(
void LiveRangeConnector::ResolveControlFlow(Zone* local_zone) {
// Lazily linearize live ranges in memory for fast lookup.
LiveRangeFinder finder(data(), local_zone);
ZoneVector<BitVector*>& live_in_sets = data()->live_in_sets();
ZoneVector<LiveSet*>& live_in_sets = data()->live_in_sets();
for (const InstructionBlock* block : code()->instruction_blocks()) {
if (CanEagerlyResolveControlFlow(block)) continue;
BitVector* live = live_in_sets[block->rpo_number().ToInt()];
BitVector::Iterator iterator(live);
LiveSet* live = live_in_sets[block->rpo_number().ToInt()];
LiveSet::Iterator iterator(live);
while (!iterator.Done()) {
data()->tick_counter()->DoTick();
int vreg = iterator.Current();
......
......@@ -21,6 +21,8 @@ class TickCounter;
namespace compiler {
class LiveSet;
static const int32_t kUnassignedRegister = RegisterConfiguration::kMaxRegisters;
enum RegisterKind { GENERAL_REGISTERS, FP_REGISTERS };
......@@ -277,8 +279,8 @@ class RegisterAllocationData final : public ZoneObject {
const ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() const {
return fixed_simd128_live_ranges_;
}
ZoneVector<BitVector*>& live_in_sets() { return live_in_sets_; }
ZoneVector<BitVector*>& live_out_sets() { return live_out_sets_; }
ZoneVector<LiveSet*>& live_in_sets() { return live_in_sets_; }
ZoneVector<LiveSet*>& live_out_sets() { return live_out_sets_; }
ZoneVector<SpillRange*>& spill_ranges() { return spill_ranges_; }
DelayedReferences& delayed_references() { return delayed_references_; }
InstructionSequence* code() const { return code_; }
......@@ -352,8 +354,8 @@ class RegisterAllocationData final : public ZoneObject {
const char* const debug_name_;
const RegisterConfiguration* const config_;
PhiMap phi_map_;
ZoneVector<BitVector*> live_in_sets_;
ZoneVector<BitVector*> live_out_sets_;
ZoneVector<LiveSet*> live_in_sets_;
ZoneVector<LiveSet*> live_out_sets_;
ZoneVector<TopLevelLiveRange*> live_ranges_;
ZoneVector<TopLevelLiveRange*> fixed_live_ranges_;
ZoneVector<TopLevelLiveRange*> fixed_float_live_ranges_;
......@@ -1103,8 +1105,8 @@ class LiveRangeBuilder final : public ZoneObject {
// Phase 3: compute liveness of all virtual register.
void BuildLiveRanges();
static BitVector* ComputeLiveOut(const InstructionBlock* block,
RegisterAllocationData* data);
static LiveSet* ComputeLiveOut(const InstructionBlock* block,
RegisterAllocationData* data);
private:
using SpillMode = RegisterAllocationData::SpillMode;
......@@ -1116,9 +1118,7 @@ class LiveRangeBuilder final : public ZoneObject {
Zone* allocation_zone() const { return data()->allocation_zone(); }
Zone* code_zone() const { return code()->zone(); }
const RegisterConfiguration* config() const { return data()->config(); }
ZoneVector<BitVector*>& live_in_sets() const {
return data()->live_in_sets();
}
ZoneVector<LiveSet*>& live_in_sets() const { return data()->live_in_sets(); }
// Verification.
void Verify() const;
......@@ -1128,10 +1128,10 @@ class LiveRangeBuilder final : public ZoneObject {
bool NextIntervalStartsInDifferentBlocks(const UseInterval* interval) const;
// Liveness analysis support.
void AddInitialIntervals(const InstructionBlock* block, BitVector* live_out);
void ProcessInstructions(const InstructionBlock* block, BitVector* live);
void ProcessPhis(const InstructionBlock* block, BitVector* live);
void ProcessLoopHeader(const InstructionBlock* block, BitVector* live);
void AddInitialIntervals(const InstructionBlock* block, LiveSet* live_out);
void ProcessInstructions(const InstructionBlock* block, LiveSet* live);
void ProcessPhis(const InstructionBlock* block, LiveSet* live);
void ProcessLoopHeader(const InstructionBlock* block, LiveSet* live);
static int FixedLiveRangeID(int index) { return -index - 1; }
int FixedFPLiveRangeID(int index, MachineRepresentation rep);
......
......@@ -88,6 +88,8 @@ class PersistentMap {
return !(*this == other);
}
Zone* zone() const { return zone_; }
// The iterator produces key-value pairs in the lexicographical order of
// hash value and key. It produces exactly the key-value pairs where the value
// is not the default value.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment