Commit 2fbb686e authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[wasm] Tune inlining heuristics

The key idea is that we can now use call count feedback into
account consistently for all kinds of calls that support inlining.

Bug: v8:12166
Change-Id: I764b8686b6c825a9b24f0032e81f7d1217ef1371
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3574554Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Reviewed-by: 's avatarManos Koukoutos <manoskouk@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79861}
parent e6e4da2f
......@@ -1622,7 +1622,7 @@ struct WasmInliningPhase {
void Run(PipelineData* data, Zone* temp_zone, wasm::CompilationEnv* env,
uint32_t function_index, const wasm::WireBytesStorage* wire_bytes,
std::vector<compiler::WasmLoopInfo>* loop_info) {
if (WasmInliner::any_inlining_impossible(data->graph()->NodeCount())) {
if (!WasmInliner::graph_size_allows_inlining(data->graph()->NodeCount())) {
return;
}
GraphReducer graph_reducer(
......@@ -1630,9 +1630,11 @@ struct WasmInliningPhase {
data->jsgraph()->Dead(), data->observe_node_manager());
DeadCodeElimination dead(&graph_reducer, data->graph(), data->common(),
temp_zone);
std::unique_ptr<char[]> debug_name = data->info()->GetDebugName();
WasmInliner inliner(&graph_reducer, env, function_index,
data->source_positions(), data->node_origins(),
data->mcgraph(), wire_bytes, loop_info);
data->mcgraph(), wire_bytes, loop_info,
debug_name.get());
AddReducer(data, &graph_reducer, &dead);
AddReducer(data, &graph_reducer, &inliner);
graph_reducer.ReduceGraph();
......
......@@ -29,7 +29,47 @@ Reduction WasmInliner::Reduce(Node* node) {
}
#define TRACE(...) \
if (FLAG_trace_wasm_inlining) PrintF(__VA_ARGS__);
if (FLAG_trace_wasm_inlining) PrintF(__VA_ARGS__)
void WasmInliner::Trace(Node* call, int inlinee, const char* decision) {
TRACE("[function %d: considering node %d, call to %d: %s]\n", function_index_,
call->id(), inlinee, decision);
}
uint32_t WasmInliner::FindOriginatingFunction(Node* call) {
DCHECK_EQ(inlined_functions_.size(), first_node_id_.size());
NodeId id = call->id();
if (inlined_functions_.size() == 0 || id < first_node_id_[0]) {
return function_index_;
}
for (size_t i = 1; i < first_node_id_.size(); i++) {
if (id < first_node_id_[i]) return inlined_functions_[i - 1];
}
DCHECK_GE(id, first_node_id_.back());
return inlined_functions_.back();
}
int WasmInliner::GetCallCount(Node* call) {
if (!FLAG_wasm_speculative_inlining) return 0;
base::MutexGuard guard(&module()->type_feedback.mutex);
wasm::WasmCodePosition position =
source_positions_->GetSourcePosition(call).ScriptOffset();
uint32_t func = FindOriginatingFunction(call);
auto maybe_feedback =
module()->type_feedback.feedback_for_function.find(func);
if (maybe_feedback == module()->type_feedback.feedback_for_function.end()) {
return 0;
}
wasm::FunctionTypeFeedback feedback = maybe_feedback->second;
// It's possible that we haven't processed the feedback yet. Currently,
// this can happen for targets of call_direct that haven't gotten hot yet,
// and for functions where Liftoff bailed out.
if (feedback.feedback_vector.size() == 0) return 0;
auto index_in_vector = feedback.positions.find(position);
if (index_in_vector == feedback.positions.end()) return 0;
return feedback.feedback_vector[index_in_vector->second]
.absolute_call_frequency;
}
// TODO(12166): Save inlined frames for trap/--trace-wasm purposes. Consider
// tail calls.
......@@ -55,45 +95,22 @@ Reduction WasmInliner::ReduceCall(Node* call) {
}
auto info = OpParameter<RelocatablePtrConstantInfo>(callee->op());
uint32_t inlinee_index = static_cast<uint32_t>(info.value());
TRACE("[function %d: considering node %d, call to %d... ", function_index_,
call->id(), inlinee_index)
if (info.rmode() != RelocInfo::WASM_CALL) {
TRACE("not a wasm call]\n")
Trace(call, inlinee_index, "not a wasm call");
return NoChange();
}
if (inlinee_index < module()->num_imported_functions) {
TRACE("imported function]\n")
Trace(call, inlinee_index, "imported function");
return NoChange();
}
if (inlinee_index == function_index_) {
TRACE("recursive call]\n")
Trace(call, inlinee_index, "recursive call");
return NoChange();
}
TRACE("adding to inlining candidates!]\n")
int call_count = 0;
if (FLAG_wasm_speculative_inlining) {
base::MutexGuard guard(&module()->type_feedback.mutex);
auto maybe_feedback =
module()->type_feedback.feedback_for_function.find(function_index_);
if (maybe_feedback != module()->type_feedback.feedback_for_function.end()) {
wasm::FunctionTypeFeedback feedback = maybe_feedback->second;
wasm::WasmCodePosition position =
source_positions_->GetSourcePosition(call).ScriptOffset();
DCHECK_NE(position, wasm::kNoCodePosition);
// It could be that we haven't processed the feedback yet, because e.g.:
// - Liftoff bailed out for this function
// - the call is in an inlined function that isn't hot yet
auto index_in_feedback_vector = feedback.positions.find(position);
if (index_in_feedback_vector != feedback.positions.end() &&
feedback.feedback_vector.size() > 0) {
const wasm::CallSiteFeedback& call_site_feedback =
feedback.feedback_vector[index_in_feedback_vector->second];
call_count = call_site_feedback.absolute_call_frequency;
}
}
}
Trace(call, inlinee_index, "adding to inlining candidates!");
int call_count = GetCallCount(call);
CHECK_LT(inlinee_index, module()->functions.size());
const wasm::WasmFunction* inlinee = &module()->functions[inlinee_index];
......@@ -106,19 +123,45 @@ Reduction WasmInliner::ReduceCall(Node* call) {
return NoChange();
}
bool SmallEnoughToInline(size_t current_graph_size, uint32_t candidate_size) {
if (WasmInliner::graph_size_allows_inlining(current_graph_size)) {
return true;
}
// For truly tiny functions, let's be a bit more generous.
return candidate_size < 10 &&
WasmInliner::graph_size_allows_inlining(current_graph_size - 100);
}
void WasmInliner::Trace(const CandidateInfo& candidate, const char* decision) {
TRACE(
" [function %d: considering candidate {@%d, index=%d, count=%d, "
"size=%d}: %s]\n",
function_index_, candidate.node->id(), candidate.inlinee_index,
candidate.call_count, candidate.wire_byte_size, decision);
}
void WasmInliner::Finalize() {
TRACE("function %d: going though inlining candidates...\n", function_index_);
TRACE("function %d %s: going though inlining candidates...\n",
function_index_, debug_name_);
if (inlining_candidates_.empty()) return;
while (!inlining_candidates_.empty()) {
CandidateInfo candidate = inlining_candidates_.top();
inlining_candidates_.pop();
Node* call = candidate.node;
TRACE(
" [function %d: considering candidate {@%d, index=%d, count=%d, "
"size=%d}... ",
function_index_, call->id(), candidate.inlinee_index,
candidate.call_count, candidate.wire_byte_size);
if (call->IsDead()) {
TRACE("dead node]\n");
Trace(candidate, "dead node");
continue;
}
int min_count_for_inlining = candidate.wire_byte_size / 2;
if (candidate.call_count < min_count_for_inlining) {
Trace(candidate, "not called often enough");
continue;
}
// We could build the candidate's graph first and consider its node count,
// but it turns out that wire byte size and node count are quite strongly
// correlated, at about 1.16 nodes per wire byte (measured for J2Wasm).
if (!SmallEnoughToInline(current_graph_size_, candidate.wire_byte_size)) {
Trace(candidate, "not enough inlining budget");
continue;
}
const wasm::WasmFunction* inlinee =
......@@ -170,7 +213,16 @@ void WasmInliner::Finalize() {
if (result.failed()) {
// This can happen if the inlinee has never been compiled before and is
// invalid. Return, as there is no point to keep optimizing.
TRACE("failed to compile]\n")
// TODO(jkummerow): This can also happen as a consequence of the
// opportunistic signature specialization we did above! When parameters
// are reassigned (as locals), the subtypes can make that invalid.
// Fix this for now by detecting when it happens and retrying the
// inlining with the original signature.
// A better long-term fix would be to port check elimination to the
// TF graph, so we won't need the signature "trick" and more.
Trace(candidate, "failed to compile");
return;
}
......@@ -180,16 +232,11 @@ void WasmInliner::Finalize() {
}
size_t additional_nodes = graph()->NodeCount() - subgraph_min_node_id;
if (current_graph_size_ + additional_nodes >
size_limit(initial_graph_size_)) {
// This is not based on the accurate graph size, as it may have been
// shrunk by other optimizations. We could recompute the accurate size
// with a traversal, but it is most probably not worth the time.
TRACE("not enough inlining budget]\n");
continue;
}
TRACE("inlining!]\n");
Trace(candidate, "inlining!");
current_graph_size_ += additional_nodes;
inlined_functions_.push_back(candidate.inlinee_index);
static_assert(std::is_same_v<NodeId, uint32_t>);
first_node_id_.push_back(static_cast<uint32_t>(subgraph_min_node_id));
if (call->opcode() == IrOpcode::kCall) {
InlineCall(call, inlinee_start, inlinee_end, inlinee->sig,
......
......@@ -39,7 +39,7 @@ class WasmInliner final : public AdvancedReducer {
uint32_t function_index, SourcePositionTable* source_positions,
NodeOriginTable* node_origins, MachineGraph* mcgraph,
const wasm::WireBytesStorage* wire_bytes,
std::vector<WasmLoopInfo>* loop_infos)
std::vector<WasmLoopInfo>* loop_infos, const char* debug_name)
: AdvancedReducer(editor),
env_(env),
function_index_(function_index),
......@@ -48,6 +48,7 @@ class WasmInliner final : public AdvancedReducer {
mcgraph_(mcgraph),
wire_bytes_(wire_bytes),
loop_infos_(loop_infos),
debug_name_(debug_name),
initial_graph_size_(mcgraph->graph()->NodeCount()),
current_graph_size_(initial_graph_size_),
inlining_candidates_() {}
......@@ -57,9 +58,8 @@ class WasmInliner final : public AdvancedReducer {
Reduction Reduce(Node* node) final;
void Finalize() final;
static bool any_inlining_impossible(size_t initial_graph_size) {
return size_limit(initial_graph_size) - initial_graph_size <
kMinimumFunctionNodeCount;
static bool graph_size_allows_inlining(size_t initial_graph_size) {
return initial_graph_size < 5000;
}
private:
......@@ -79,33 +79,7 @@ class WasmInliner final : public AdvancedReducer {
}
};
// TODO(manoskouk): This has not been found to be useful, but something
// similar may be tried again in the future.
// struct AdvancedOrdering {
// // Returns if c1 should be prioritized less than c2.
// bool operator()(CandidateInfo& c1, CandidateInfo& c2) {
// if (c1.is_speculative_call_ref && c2.is_speculative_call_ref) {
// if (c1.call_count > c2.call_count) return false;
// if (c2.call_count > c1.call_count) return true;
// return c1.wire_byte_size > c2.wire_byte_size;
// }
// if (!c1.is_speculative_call_ref && !c2.is_speculative_call_ref) {
// return c1.wire_byte_size > c2.wire_byte_size;
// }
//
// constexpr int kAssumedCallCountForDirectCalls = 3;
//
// int c1_call_count = c1.is_speculative_call_ref
// ? c1.call_count
// : kAssumedCallCountForDirectCalls;
// int c2_call_count = c2.is_speculative_call_ref
// ? c2.call_count
// : kAssumedCallCountForDirectCalls;
//
// return static_cast<float>(c1_call_count) / c1.wire_byte_size <
// static_cast<float>(c2_call_count) / c2.wire_byte_size;
// }
//};
uint32_t FindOriginatingFunction(Node* call);
Zone* zone() const { return mcgraph_->zone(); }
CommonOperatorBuilder* common() const { return mcgraph_->common(); }
......@@ -113,17 +87,6 @@ class WasmInliner final : public AdvancedReducer {
MachineGraph* mcgraph() const { return mcgraph_; }
const wasm::WasmModule* module() const;
// A limit to the size of the inlined graph as a function of its initial size.
static size_t size_limit(size_t initial_graph_size) {
return initial_graph_size +
std::min(FLAG_wasm_inlining_max_size,
FLAG_wasm_inlining_budget_factor / initial_graph_size);
}
// The smallest size in TF nodes any meaningful wasm function can have
// (start, return, IntConstant(0), end).
static constexpr size_t kMinimumFunctionNodeCount = 4;
Reduction ReduceCall(Node* call);
void InlineCall(Node* call, Node* callee_start, Node* callee_end,
const wasm::FunctionSig* inlinee_sig,
......@@ -131,6 +94,11 @@ class WasmInliner final : public AdvancedReducer {
void InlineTailCall(Node* call, Node* callee_start, Node* callee_end);
void RewireFunctionEntry(Node* call, Node* callee_start);
int GetCallCount(Node* call);
void Trace(Node* call, int inlinee, const char* decision);
void Trace(const CandidateInfo& candidate, const char* decision);
wasm::CompilationEnv* const env_;
uint32_t function_index_;
SourcePositionTable* const source_positions_;
......@@ -138,12 +106,18 @@ class WasmInliner final : public AdvancedReducer {
MachineGraph* const mcgraph_;
const wasm::WireBytesStorage* const wire_bytes_;
std::vector<WasmLoopInfo>* const loop_infos_;
const char* debug_name_;
const size_t initial_graph_size_;
size_t current_graph_size_;
std::priority_queue<CandidateInfo, std::vector<CandidateInfo>,
LexicographicOrdering>
inlining_candidates_;
std::unordered_set<Node*> seen_;
std::vector<uint32_t> inlined_functions_;
// Stores the graph size before an inlining was performed, to make it
// possible to map back from nodes to the function they came from.
// Guaranteed to have the same length as {inlined_functions_}.
std::vector<uint32_t> first_node_id_;
};
} // namespace compiler
......
......@@ -1244,15 +1244,56 @@ bool CompileLazy(Isolate* isolate, Handle<WasmInstanceObject> instance,
return true;
}
std::vector<CallSiteFeedback> ProcessTypeFeedback(
Isolate* isolate, Handle<WasmInstanceObject> instance, int func_index) {
int which_vector = declared_function_index(instance->module(), func_index);
Object maybe_feedback = instance->feedback_vectors().get(which_vector);
if (!maybe_feedback.IsFixedArray()) return {};
class TransitiveTypeFeedbackProcessor {
public:
TransitiveTypeFeedbackProcessor(const WasmModule* module,
Handle<WasmInstanceObject> instance,
int func_index)
: instance_(instance),
feedback_for_function_(module->type_feedback.feedback_for_function) {
base::MutexGuard mutex_guard(&module->type_feedback.mutex);
queue_.insert(func_index);
while (!queue_.empty()) {
auto next = queue_.cbegin();
Process(*next);
queue_.erase(next);
}
}
private:
void Process(int func_index);
void EnqueueCallees(std::vector<CallSiteFeedback> feedback) {
for (size_t i = 0; i < feedback.size(); i++) {
int func = feedback[i].function_index;
// TODO(jkummerow): Find a way to get the target function ID for
// direct calls (which currently requires decoding the function).
if (func == -1) continue;
// Don't spend time on calls that have never been executed.
if (feedback[i].absolute_call_frequency == 0) continue;
// Don't recompute feedback that has already been processed.
auto existing = feedback_for_function_.find(func);
if (existing != feedback_for_function_.end() &&
existing->second.feedback_vector.size() > 0) {
continue;
}
queue_.insert(func);
}
}
Handle<WasmInstanceObject> instance_;
std::map<uint32_t, FunctionTypeFeedback>& feedback_for_function_;
std::unordered_set<int> queue_;
};
void TransitiveTypeFeedbackProcessor::Process(int func_index) {
int which_vector = declared_function_index(instance_->module(), func_index);
Object maybe_feedback = instance_->feedback_vectors().get(which_vector);
if (!maybe_feedback.IsFixedArray()) return;
FixedArray feedback = FixedArray::cast(maybe_feedback);
std::vector<CallSiteFeedback> result(feedback.length() / 2);
int imported_functions =
static_cast<int>(instance->module()->num_imported_functions);
static_cast<int>(instance_->module()->num_imported_functions);
for (int i = 0; i < feedback.length(); i += 2) {
Object value = feedback.get(i);
if (value.IsWasmInternalFunction() &&
......@@ -1263,7 +1304,7 @@ std::vector<CallSiteFeedback> ProcessTypeFeedback(
// if it's defined in the same module.
WasmExportedFunction target = WasmExportedFunction::cast(
WasmInternalFunction::cast(value).external());
if (target.instance() == *instance &&
if (target.instance() == *instance_ &&
target.function_index() >= imported_functions) {
if (FLAG_trace_wasm_speculative_inlining) {
PrintF("[Function #%d call_ref #%d inlineable (monomorphic)]\n",
......@@ -1304,7 +1345,7 @@ std::vector<CallSiteFeedback> ProcessTypeFeedback(
}
WasmExportedFunction target =
WasmExportedFunction::cast(internal.external());
if (target.instance() != *instance ||
if (target.instance() != *instance_ ||
target.function_index() < imported_functions) {
continue;
}
......@@ -1343,7 +1384,8 @@ std::vector<CallSiteFeedback> ProcessTypeFeedback(
}
result[i / 2] = {-1, -1};
}
return result;
EnqueueCallees(result);
feedback_for_function_[func_index].feedback_vector = std::move(result);
}
void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
......@@ -1372,13 +1414,10 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
priority = saved_priority;
}
if (FLAG_wasm_speculative_inlining) {
auto feedback = ProcessTypeFeedback(isolate, instance, func_index);
base::MutexGuard mutex_guard(&module->type_feedback.mutex);
// TODO(jkummerow): we could have collisions here if two different instances
// of the same module schedule tier-ups of the same function at the same
// time. If that ever becomes a problem, figure out a solution.
module->type_feedback.feedback_for_function[func_index].feedback_vector =
std::move(feedback);
// TODO(jkummerow): we could have collisions here if different instances
// of the same module have collected different feedback. If that ever
// becomes a problem, figure out a solution.
TransitiveTypeFeedbackProcessor process(module, instance, func_index);
}
compilation_state->AddTopTierPriorityCompilationUnit(tiering_unit, priority);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment