Commit 14a970f3 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[arm64] Refactor veneer pool emission

Assembler::EmitVeneers has potential quadratic behavior, which appears
as hangs on chromecrash (see the linked bug). We iterate a list of
branches (unresolved_branches_), and for each branch iterate yet another
list of branches (the label link list in
RemoveBranchFromLabelLinkChain).

Ordering decisions increase the problem, by iterating in the outer loop
in ascending pc offset order, and in the inner loop (which removes the
branch from the linked list) in descending order.

This CL mostly refactors the outer loop:

- Instead of iterating over the whole unresolved_branches_ list, iterate
only the relevant part.
- Call RemoveBranchFromLabelLinkChain in descending pc offset order.
- Keep veneer emission in ascending pc offset order.

Bug: chromium:1162080
Change-Id: I77bb3d961c1b19ef1c31e777b640b213869bc1d6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2794435
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#73762}
parent 82ca5eca
......@@ -32,6 +32,7 @@
#include "src/base/bits.h"
#include "src/base/cpu.h"
#include "src/base/small-vector.h"
#include "src/codegen/arm64/assembler-arm64-inl.h"
#include "src/codegen/register-configuration.h"
#include "src/codegen/safepoint-table.h"
......@@ -4472,12 +4473,15 @@ const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB;
const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB;
const size_t ConstantPool::kApproxMaxEntryCount = 512;
bool Assembler::ShouldEmitVeneer(int max_reachable_pc, size_t margin) {
// Account for the branch around the veneers and the guard.
int protection_offset = 2 * kInstrSize;
return static_cast<intptr_t>(pc_offset() + margin + protection_offset +
unresolved_branches_.size() *
kMaxVeneerCodeSize) >= max_reachable_pc;
intptr_t Assembler::MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin) {
// Account for the branch and guard around the veneers.
static constexpr int kBranchSizeInBytes = kInstrSize;
static constexpr int kGuardSizeInBytes = kInstrSize;
const size_t max_veneer_size_in_bytes =
unresolved_branches_.size() * kVeneerCodeSize;
return static_cast<intptr_t>(pc_offset() + kBranchSizeInBytes +
kGuardSizeInBytes + max_veneer_size_in_bytes +
margin);
}
void Assembler::RecordVeneerPool(int location_offset, int size) {
......@@ -4508,51 +4512,80 @@ void Assembler::EmitVeneers(bool force_emit, bool need_protection,
EmitVeneersGuard();
#ifdef DEBUG
Label veneer_size_check;
#endif
// We only emit veneers if needed (unless emission is forced), i.e. when the
// max-reachable-pc of the branch has been exhausted by the current codegen
// state. Specifically, we emit when the max-reachable-pc of the branch <= the
// max-pc-after-veneers (over-approximated).
const intptr_t max_pc_after_veneers =
MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin);
// The `unresolved_branches_` multimap is sorted by max-reachable-pc in
// ascending order. For efficiency reasons, we want to call
// RemoveBranchFromLabelLinkChain in descending order. The actual veneers are
// then generated in ascending order.
// TODO(jgruber): This is still inefficient in multiple ways, thoughts on how
// we could improve in the future:
// - Don't erase individual elements from the multimap, erase a range instead.
// - Replace the multimap by a simpler data structure (like a plain vector or
// a circular array).
// - Refactor s.t. RemoveBranchFromLabelLinkChain does not need the linear
// lookup in the link chain.
static constexpr int kStaticTasksSize = 16; // Arbitrary.
base::SmallVector<FarBranchInfo, kStaticTasksSize> tasks;
{
auto it = unresolved_branches_.begin();
while (it != unresolved_branches_.end()) {
const int max_reachable_pc = it->first;
if (!force_emit && max_reachable_pc > max_pc_after_veneers) break;
std::multimap<int, FarBranchInfo>::iterator it, it_to_delete;
// Found a task. We'll emit a veneer for this.
tasks.emplace_back(it->second);
auto eraser_it = it++;
unresolved_branches_.erase(eraser_it);
}
}
it = unresolved_branches_.begin();
while (it != unresolved_branches_.end()) {
if (force_emit || ShouldEmitVeneer(it->first, margin)) {
Instruction* branch = InstructionAt(it->second.pc_offset_);
Label* label = it->second.label_;
// Update next_veneer_pool_check_ (tightly coupled with unresolved_branches_).
if (unresolved_branches_.empty()) {
next_veneer_pool_check_ = kMaxInt;
} else {
next_veneer_pool_check_ =
unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
}
// Reminder: We iterate in reverse order to avoid duplicate linked-list
// iteration in RemoveBranchFromLabelLinkChain (which starts at the target
// label, and iterates backwards through linked branch instructions).
const int tasks_size = static_cast<int>(tasks.size());
for (int i = tasks_size - 1; i >= 0; i--) {
Instruction* branch = InstructionAt(tasks[i].pc_offset_);
Instruction* veneer = reinterpret_cast<Instruction*>(
reinterpret_cast<uintptr_t>(pc_) + i * kVeneerCodeSize);
RemoveBranchFromLabelLinkChain(branch, tasks[i].label_, veneer);
}
// Now emit the actual veneer and patch up the incoming branch.
for (const FarBranchInfo& info : tasks) {
#ifdef DEBUG
Label veneer_size_check;
bind(&veneer_size_check);
#endif
// Patch the branch to point to the current position, and emit a branch
// to the label.
Instruction* branch = InstructionAt(info.pc_offset_);
Instruction* veneer = reinterpret_cast<Instruction*>(pc_);
RemoveBranchFromLabelLinkChain(branch, label, veneer);
branch->SetImmPCOffsetTarget(options(), veneer);
b(label);
#ifdef DEBUG
DCHECK(SizeOfCodeGeneratedSince(&veneer_size_check) <=
static_cast<uint64_t>(kMaxVeneerCodeSize));
veneer_size_check.Unuse();
#endif
it_to_delete = it++;
unresolved_branches_.erase(it_to_delete);
} else {
++it;
}
b(info.label_); // This may end up pointing at yet another veneer later on.
DCHECK_EQ(SizeOfCodeGeneratedSince(&veneer_size_check),
static_cast<uint64_t>(kVeneerCodeSize));
}
// Record the veneer pool size.
int pool_size = static_cast<int>(SizeOfCodeGeneratedSince(&size_check));
RecordVeneerPool(veneer_pool_relocinfo_loc, pool_size);
if (unresolved_branches_.empty()) {
next_veneer_pool_check_ = kMaxInt;
} else {
next_veneer_pool_check_ =
unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
}
bind(&end);
RecordComment("]");
......
......@@ -2389,18 +2389,23 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
constpool_.Check(Emission::kIfNeeded, Jump::kRequired, margin);
}
// Used by veneer checks below - returns the max (= overapproximated) pc
// offset after the veneer pool, if the veneer pool were to be emitted
// immediately.
intptr_t MaxPCOffsetAfterVeneerPoolIfEmittedNow(size_t margin);
// Returns true if we should emit a veneer as soon as possible for a branch
// which can at most reach to specified pc.
bool ShouldEmitVeneer(int max_reachable_pc,
size_t margin = kVeneerDistanceMargin);
bool ShouldEmitVeneer(int max_reachable_pc, size_t margin) {
return max_reachable_pc < MaxPCOffsetAfterVeneerPoolIfEmittedNow(margin);
}
bool ShouldEmitVeneers(size_t margin = kVeneerDistanceMargin) {
return ShouldEmitVeneer(unresolved_branches_first_limit(), margin);
}
// The maximum code size generated for a veneer. Currently one branch
// The code size generated for a veneer. Currently one branch
// instruction. This is for code size checking purposes, and can be extended
// in the future for example if we decide to add nops between the veneers.
static constexpr int kMaxVeneerCodeSize = 1 * kInstrSize;
static constexpr int kVeneerCodeSize = 1 * kInstrSize;
void RecordVeneerPool(int location_offset, int size);
// Emits veneers for branches that are approaching their maximum range.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment