Commit 14a970f3 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[arm64] Refactor veneer pool emission

Assembler::EmitVeneers has potential quadratic behavior, which appears
as hangs on chromecrash (see the linked bug). We iterate a list of
branches (unresolved_branches_), and for each branch iterate yet another
list of branches (the label link list in
RemoveBranchFromLabelLinkChain).

Ordering decisions increase the problem, by iterating in the outer loop
in ascending pc offset order, and in the inner loop (which removes the
branch from the linked list) in descending order.

This CL mostly refactors the outer loop:

- Instead of iterating over the whole unresolved_branches_ list, iterate
only the relevant part.
- Call RemoveBranchFromLabelLinkChain in descending pc offset order.
- Keep veneer emission in ascending pc offset order.

Bug: chromium:1162080
Change-Id: I77bb3d961c1b19ef1c31e777b640b213869bc1d6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2794435
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73762}
parent 82ca5eca
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include "src/base/bits.h" #include "src/base/bits.h"
#include "src/base/cpu.h" #include "src/base/cpu.h"
#include "src/base/small-vector.h"
#include "src/codegen/arm64/assembler-arm64-inl.h" #include "src/codegen/arm64/assembler-arm64-inl.h"
#include "src/codegen/register-configuration.h" #include "src/codegen/register-configuration.h"
#include "src/codegen/safepoint-table.h" #include "src/codegen/safepoint-table.h"
...@@ -4472,12 +4473,15 @@ const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB; ...@@ -4472,12 +4473,15 @@ const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB;
const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB; const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB;
const size_t ConstantPool::kApproxMaxEntryCount = 512; const size_t ConstantPool::kApproxMaxEntryCount = 512;
bool Assembler::ShouldEmitVeneer(int max_reachable_pc, size_t margin) { intptr_t Assembler::MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin) {
// Account for the branch around the veneers and the guard. // Account for the branch and guard around the veneers.
int protection_offset = 2 * kInstrSize; static constexpr int kBranchSizeInBytes = kInstrSize;
return static_cast<intptr_t>(pc_offset() + margin + protection_offset + static constexpr int kGuardSizeInBytes = kInstrSize;
unresolved_branches_.size() * const size_t max_veneer_size_in_bytes =
kMaxVeneerCodeSize) >= max_reachable_pc; unresolved_branches_.size() * kVeneerCodeSize;
return static_cast<intptr_t>(pc_offset() + kBranchSizeInBytes +
kGuardSizeInBytes + max_veneer_size_in_bytes +
margin);
} }
void Assembler::RecordVeneerPool(int location_offset, int size) { void Assembler::RecordVeneerPool(int location_offset, int size) {
...@@ -4508,51 +4512,80 @@ void Assembler::EmitVeneers(bool force_emit, bool need_protection, ...@@ -4508,51 +4512,80 @@ void Assembler::EmitVeneers(bool force_emit, bool need_protection,
EmitVeneersGuard(); EmitVeneersGuard();
#ifdef DEBUG // We only emit veneers if needed (unless emission is forced), i.e. when the
Label veneer_size_check; // max-reachable-pc of the branch has been exhausted by the current codegen
#endif // state. Specifically, we emit when the max-reachable-pc of the branch <= the
// max-pc-after-veneers (over-approximated).
const intptr_t max_pc_after_veneers =
MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin);
// The `unresolved_branches_` multimap is sorted by max-reachable-pc in
// ascending order. For efficiency reasons, we want to call
// RemoveBranchFromLabelLinkChain in descending order. The actual veneers are
// then generated in ascending order.
// TODO(jgruber): This is still inefficient in multiple ways, thoughts on how
// we could improve in the future:
// - Don't erase individual elements from the multimap, erase a range instead.
// - Replace the multimap by a simpler data structure (like a plain vector or
// a circular array).
// - Refactor s.t. RemoveBranchFromLabelLinkChain does not need the linear
// lookup in the link chain.
static constexpr int kStaticTasksSize = 16; // Arbitrary.
base::SmallVector<FarBranchInfo, kStaticTasksSize> tasks;
{
auto it = unresolved_branches_.begin();
while (it != unresolved_branches_.end()) {
const int max_reachable_pc = it->first;
if (!force_emit && max_reachable_pc > max_pc_after_veneers) break;
std::multimap<int, FarBranchInfo>::iterator it, it_to_delete; // Found a task. We'll emit a veneer for this.
tasks.emplace_back(it->second);
auto eraser_it = it++;
unresolved_branches_.erase(eraser_it);
}
}
it = unresolved_branches_.begin(); // Update next_veneer_pool_check_ (tightly coupled with unresolved_branches_).
while (it != unresolved_branches_.end()) { if (unresolved_branches_.empty()) {
if (force_emit || ShouldEmitVeneer(it->first, margin)) { next_veneer_pool_check_ = kMaxInt;
Instruction* branch = InstructionAt(it->second.pc_offset_); } else {
Label* label = it->second.label_; next_veneer_pool_check_ =
unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
}
// Reminder: We iterate in reverse order to avoid duplicate linked-list
// iteration in RemoveBranchFromLabelLinkChain (which starts at the target
// label, and iterates backwards through linked branch instructions).
const int tasks_size = static_cast<int>(tasks.size());
for (int i = tasks_size - 1; i >= 0; i--) {
Instruction* branch = InstructionAt(tasks[i].pc_offset_);
Instruction* veneer = reinterpret_cast<Instruction*>(
reinterpret_cast<uintptr_t>(pc_) + i * kVeneerCodeSize);
RemoveBranchFromLabelLinkChain(branch, tasks[i].label_, veneer);
}
// Now emit the actual veneer and patch up the incoming branch.
for (const FarBranchInfo& info : tasks) {
#ifdef DEBUG #ifdef DEBUG
Label veneer_size_check;
bind(&veneer_size_check); bind(&veneer_size_check);
#endif #endif
// Patch the branch to point to the current position, and emit a branch Instruction* branch = InstructionAt(info.pc_offset_);
// to the label.
Instruction* veneer = reinterpret_cast<Instruction*>(pc_); Instruction* veneer = reinterpret_cast<Instruction*>(pc_);
RemoveBranchFromLabelLinkChain(branch, label, veneer);
branch->SetImmPCOffsetTarget(options(), veneer); branch->SetImmPCOffsetTarget(options(), veneer);
b(label); b(info.label_); // This may end up pointing at yet another veneer later on.
#ifdef DEBUG DCHECK_EQ(SizeOfCodeGeneratedSince(&veneer_size_check),
DCHECK(SizeOfCodeGeneratedSince(&veneer_size_check) <= static_cast<uint64_t>(kVeneerCodeSize));
static_cast<uint64_t>(kMaxVeneerCodeSize));
veneer_size_check.Unuse();
#endif
it_to_delete = it++;
unresolved_branches_.erase(it_to_delete);
} else {
++it;
}
} }
// Record the veneer pool size. // Record the veneer pool size.
int pool_size = static_cast<int>(SizeOfCodeGeneratedSince(&size_check)); int pool_size = static_cast<int>(SizeOfCodeGeneratedSince(&size_check));
RecordVeneerPool(veneer_pool_relocinfo_loc, pool_size); RecordVeneerPool(veneer_pool_relocinfo_loc, pool_size);
if (unresolved_branches_.empty()) {
next_veneer_pool_check_ = kMaxInt;
} else {
next_veneer_pool_check_ =
unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
}
bind(&end); bind(&end);
RecordComment("]"); RecordComment("]");
......
...@@ -2389,18 +2389,23 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -2389,18 +2389,23 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
constpool_.Check(Emission::kIfNeeded, Jump::kRequired, margin); constpool_.Check(Emission::kIfNeeded, Jump::kRequired, margin);
} }
// Used by veneer checks below - returns the max (= overapproximated) pc
// offset after the veneer pool, if the veneer pool were to be emitted
// immediately.
intptr_t MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin);
// Returns true if we should emit a veneer as soon as possible for a branch // Returns true if we should emit a veneer as soon as possible for a branch
// which can at most reach to specified pc. // which can at most reach to specified pc.
bool ShouldEmitVeneer(int max_reachable_pc, bool ShouldEmitVeneer(int max_reachable_pc, size_t margin) {
size_t margin = kVeneerDistanceMargin); return max_reachable_pc < MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin);
}
bool ShouldEmitVeneers(size_t margin = kVeneerDistanceMargin) { bool ShouldEmitVeneers(size_t margin = kVeneerDistanceMargin) {
return ShouldEmitVeneer(unresolved_branches_first_limit(), margin); return ShouldEmitVeneer(unresolved_branches_first_limit(), margin);
} }
// The maximum code size generated for a veneer. Currently one branch // The code size generated for a veneer. Currently one branch
// instruction. This is for code size checking purposes, and can be extended // instruction. This is for code size checking purposes, and can be extended
// in the future for example if we decide to add nops between the veneers. // in the future for example if we decide to add nops between the veneers.
static constexpr int kMaxVeneerCodeSize = 1 * kInstrSize; static constexpr int kVeneerCodeSize = 1 * kInstrSize;
void RecordVeneerPool(int location_offset, int size); void RecordVeneerPool(int location_offset, int size);
// Emits veneers for branches that are approaching their maximum range. // Emits veneers for branches that are approaching their maximum range.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment