Commit 1f215c20 authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[wasm-gc] Polymorphic inlining for call_ref

When call_ref has seen more than one call target, we now support
inlining all of them (constrained by budget/heuristics).

Bug: v8:7748,v8:12166
Change-Id: Iae16e74da1bad5e7a117f70efb6c61b3f39f832c
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3650607Reviewed-by: 's avatarManos Koukoutos <manoskouk@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#80602}
parent ea07528c
...@@ -52,6 +52,8 @@ const kAnyTableType: constexpr int31 ...@@ -52,6 +52,8 @@ const kAnyTableType: constexpr int31
generates 'wasm::kWasmAnyRef.raw_bit_field()'; generates 'wasm::kWasmAnyRef.raw_bit_field()';
const kAnyNonNullTableType: constexpr int31 const kAnyNonNullTableType: constexpr int31
generates 'wasm::kWasmAnyNonNullableRef.raw_bit_field()'; generates 'wasm::kWasmAnyNonNullableRef.raw_bit_field()';
const kMaxPolymorphism:
constexpr int31 generates 'wasm::kMaxPolymorphism';
extern macro WasmBuiltinsAssembler::LoadInstanceFromFrame(): WasmInstanceObject; extern macro WasmBuiltinsAssembler::LoadInstanceFromFrame(): WasmInstanceObject;
...@@ -496,7 +498,7 @@ macro GetTargetAndInstance(funcref: WasmInternalFunction): TargetAndInstance { ...@@ -496,7 +498,7 @@ macro GetTargetAndInstance(funcref: WasmInternalFunction): TargetAndInstance {
// - monomorphic: (funcref, count (smi)). The second slot is a counter for how // - monomorphic: (funcref, count (smi)). The second slot is a counter for how
// often the funcref in the first slot has been seen. // often the funcref in the first slot has been seen.
// - polymorphic: (fixed_array, <unused>). In this case, the array // - polymorphic: (fixed_array, <unused>). In this case, the array
// contains 2..4 pairs (funcref, count (smi)) (like monomorphic data). // contains 2..kMaxPolymorphism pairs (funcref, count (smi))
// - megamorphic: ("megamorphic" sentinel, <unused>) // - megamorphic: ("megamorphic" sentinel, <unused>)
// //
// TODO(rstz): The counter might overflow if it exceeds the range of a Smi. // TODO(rstz): The counter might overflow if it exceeds the range of a Smi.
...@@ -533,7 +535,8 @@ builtin CallRefIC( ...@@ -533,7 +535,8 @@ builtin CallRefIC(
} else if (Is<FixedArray>(value)) { } else if (Is<FixedArray>(value)) {
// Polymorphic miss. // Polymorphic miss.
const entries = UnsafeCast<FixedArray>(value); const entries = UnsafeCast<FixedArray>(value);
if (entries.length == SmiConstant(8)) { // 4 entries, 2 slots each. const kMaxSlots = kMaxPolymorphism * 2; // 2 slots per entry.
if (entries.length == SmiConstant(kMaxSlots)) {
vector.objects[index] = ic::kMegamorphicSymbol; vector.objects[index] = ic::kMegamorphicSymbol;
vector.objects[index + 1] = ic::kMegamorphicSymbol; vector.objects[index + 1] = ic::kMegamorphicSymbol;
} else { } else {
......
...@@ -2962,9 +2962,11 @@ Node* WasmGraphBuilder::BuildCallRef(const wasm::FunctionSig* real_sig, ...@@ -2962,9 +2962,11 @@ Node* WasmGraphBuilder::BuildCallRef(const wasm::FunctionSig* real_sig,
return call; return call;
} }
void WasmGraphBuilder::CompareToInternalFunctionAtIndex( void WasmGraphBuilder::CompareToInternalFunctionAtIndex(Node* func_ref,
Node* func_ref, uint32_t function_index, Node** success_control, uint32_t function_index,
Node** failure_control) { Node** success_control,
Node** failure_control,
bool is_last_case) {
// Since we are comparing to a function reference, it is guaranteed that // Since we are comparing to a function reference, it is guaranteed that
// instance->wasm_internal_functions() has been initialized. // instance->wasm_internal_functions() has been initialized.
Node* internal_functions = gasm_->LoadImmutable( Node* internal_functions = gasm_->LoadImmutable(
...@@ -2974,8 +2976,9 @@ void WasmGraphBuilder::CompareToInternalFunctionAtIndex( ...@@ -2974,8 +2976,9 @@ void WasmGraphBuilder::CompareToInternalFunctionAtIndex(
Node* function_ref_at_index = gasm_->LoadFixedArrayElement( Node* function_ref_at_index = gasm_->LoadFixedArrayElement(
internal_functions, gasm_->IntPtrConstant(function_index), internal_functions, gasm_->IntPtrConstant(function_index),
MachineType::AnyTagged()); MachineType::AnyTagged());
BranchHint hint = is_last_case ? BranchHint::kTrue : BranchHint::kNone;
gasm_->Branch(gasm_->TaggedEqual(function_ref_at_index, func_ref), gasm_->Branch(gasm_->TaggedEqual(function_ref_at_index, func_ref),
success_control, failure_control, BranchHint::kTrue); success_control, failure_control, hint);
} }
Node* WasmGraphBuilder::CallRef(const wasm::FunctionSig* real_sig, Node* WasmGraphBuilder::CallRef(const wasm::FunctionSig* real_sig,
......
...@@ -371,7 +371,8 @@ class WasmGraphBuilder { ...@@ -371,7 +371,8 @@ class WasmGraphBuilder {
void CompareToInternalFunctionAtIndex(Node* func_ref, uint32_t function_index, void CompareToInternalFunctionAtIndex(Node* func_ref, uint32_t function_index,
Node** success_control, Node** success_control,
Node** failure_control); Node** failure_control,
bool is_last_case);
void BrOnNull(Node* ref_object, Node** non_null_node, Node** null_node); void BrOnNull(Node* ref_object, Node** non_null_node, Node** null_node);
......
...@@ -94,11 +94,12 @@ Reduction WasmInliner::ReduceCall(Node* call) { ...@@ -94,11 +94,12 @@ Reduction WasmInliner::ReduceCall(Node* call) {
} }
bool SmallEnoughToInline(size_t current_graph_size, uint32_t candidate_size) { bool SmallEnoughToInline(size_t current_graph_size, uint32_t candidate_size) {
if (WasmInliner::graph_size_allows_inlining(current_graph_size)) { if (WasmInliner::graph_size_allows_inlining(current_graph_size +
candidate_size)) {
return true; return true;
} }
// For truly tiny functions, let's be a bit more generous. // For truly tiny functions, let's be a bit more generous.
return candidate_size < 10 && return candidate_size <= 12 &&
WasmInliner::graph_size_allows_inlining(current_graph_size - 100); WasmInliner::graph_size_allows_inlining(current_graph_size - 100);
} }
......
...@@ -58,8 +58,8 @@ class WasmInliner final : public AdvancedReducer { ...@@ -58,8 +58,8 @@ class WasmInliner final : public AdvancedReducer {
Reduction Reduce(Node* node) final; Reduction Reduce(Node* node) final;
void Finalize() final; void Finalize() final;
static bool graph_size_allows_inlining(size_t initial_graph_size) { static bool graph_size_allows_inlining(size_t graph_size) {
return initial_graph_size < 5000; return graph_size < FLAG_wasm_inlining_budget;
} }
private: private:
......
...@@ -1108,11 +1108,8 @@ DEFINE_BOOL(wasm_math_intrinsics, true, ...@@ -1108,11 +1108,8 @@ DEFINE_BOOL(wasm_math_intrinsics, true,
DEFINE_BOOL( DEFINE_BOOL(
wasm_inlining, false, wasm_inlining, false,
"enable inlining of wasm functions into wasm functions (experimental)") "enable inlining of wasm functions into wasm functions (experimental)")
DEFINE_SIZE_T( DEFINE_SIZE_T(wasm_inlining_budget, 9000,
wasm_inlining_budget_factor, 75000, "maximum graph size (in TF nodes) that allows inlining more")
"maximum allowed size to inline a function is given by {n / caller size}")
DEFINE_SIZE_T(wasm_inlining_max_size, 1000,
"maximum size of a function that can be inlined, in TF nodes")
DEFINE_BOOL(wasm_speculative_inlining, false, DEFINE_BOOL(wasm_speculative_inlining, false,
"enable speculative inlining of call_ref targets (experimental)") "enable speculative inlining of call_ref targets (experimental)")
DEFINE_BOOL(trace_wasm_inlining, false, "trace wasm inlining") DEFINE_BOOL(trace_wasm_inlining, false, "trace wasm inlining")
......
This diff is collapsed.
This diff is collapsed.
...@@ -186,6 +186,9 @@ constexpr uint32_t kGenericWrapperBudget = 1000; ...@@ -186,6 +186,9 @@ constexpr uint32_t kGenericWrapperBudget = 1000;
// gives up some module size for faster access to the supertypes. // gives up some module size for faster access to the supertypes.
constexpr uint32_t kMinimumSupertypeArraySize = 3; constexpr uint32_t kMinimumSupertypeArraySize = 3;
// Maximum number of call targets tracked per call.
constexpr int kMaxPolymorphism = 4;
#if V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_X64
constexpr int32_t kOSRTargetOffset = 5 * kSystemPointerSize; constexpr int32_t kOSRTargetOffset = 5 * kSystemPointerSize;
#endif #endif
......
...@@ -399,9 +399,79 @@ struct V8_EXPORT_PRIVATE WasmDebugSymbols { ...@@ -399,9 +399,79 @@ struct V8_EXPORT_PRIVATE WasmDebugSymbols {
WireBytesRef external_url; WireBytesRef external_url;
}; };
struct CallSiteFeedback { class CallSiteFeedback {
int function_index; public:
int absolute_call_frequency; struct PolymorphicCase {
int function_index;
int absolute_call_frequency;
};
// Regular constructor: uninitialized/unknown, monomorphic, or polymorphic.
CallSiteFeedback() : index_or_count_(-1), frequency_or_ool_(0) {}
CallSiteFeedback(int function_index, int call_count)
: index_or_count_(function_index), frequency_or_ool_(call_count) {}
CallSiteFeedback(PolymorphicCase* polymorphic_cases, int num_cases)
: index_or_count_(-num_cases),
frequency_or_ool_(reinterpret_cast<intptr_t>(polymorphic_cases)) {}
// Copying and assignment: prefer moving, as it's cheaper.
// The code below makes sure external polymorphic storage is copied and/or
// freed as appropriate.
CallSiteFeedback(const CallSiteFeedback& other) V8_NOEXCEPT { *this = other; }
CallSiteFeedback(CallSiteFeedback&& other) V8_NOEXCEPT { *this = other; }
CallSiteFeedback& operator=(const CallSiteFeedback& other) V8_NOEXCEPT {
index_or_count_ = other.index_or_count_;
if (other.is_polymorphic()) {
int num_cases = other.num_cases();
PolymorphicCase* polymorphic = new PolymorphicCase[num_cases];
for (int i = 0; i < num_cases; i++) {
polymorphic[i].function_index = other.function_index(i);
polymorphic[i].absolute_call_frequency = other.call_count(i);
}
frequency_or_ool_ = reinterpret_cast<intptr_t>(polymorphic);
} else {
frequency_or_ool_ = other.frequency_or_ool_;
}
return *this;
}
CallSiteFeedback& operator=(CallSiteFeedback&& other) V8_NOEXCEPT {
if (this != &other) {
index_or_count_ = other.index_or_count_;
frequency_or_ool_ = other.frequency_or_ool_;
other.frequency_or_ool_ = 0;
}
return *this;
}
~CallSiteFeedback() {
if (is_polymorphic()) delete[] polymorphic_storage();
}
int num_cases() const {
if (is_monomorphic()) return 1;
if (is_invalid()) return 0;
return -index_or_count_;
}
int function_index(int i) const {
DCHECK(!is_invalid());
if (is_monomorphic()) return index_or_count_;
return polymorphic_storage()[i].function_index;
}
int call_count(int i) const {
if (index_or_count_ >= 0) return static_cast<int>(frequency_or_ool_);
return polymorphic_storage()[i].absolute_call_frequency;
}
private:
bool is_monomorphic() const { return index_or_count_ >= 0; }
bool is_polymorphic() const { return index_or_count_ <= -2; }
bool is_invalid() const { return index_or_count_ == -1; }
const PolymorphicCase* polymorphic_storage() const {
return reinterpret_cast<PolymorphicCase*>(frequency_or_ool_);
}
int index_or_count_;
intptr_t frequency_or_ool_;
}; };
struct FunctionTypeFeedback { struct FunctionTypeFeedback {
std::vector<CallSiteFeedback> feedback_vector; std::vector<CallSiteFeedback> feedback_vector;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment