Commit 9ca0bfef authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[wasm] Count direct calls

This adds feedback collection to count the number of executions of
call_direct instructions in Liftoff code. The purpose is better
inlining decisions in Turbofan, which are enabled by having call
count information for all kinds of calls.
The new feature is gated on --wasm-speculative-inlining. While
direct calls don't need to speculate about their target, the whole
feedback collection infrastructure depends on that flag.

Bug: v8:12166
Change-Id: Ie24a988fcea631f370188dc21e60a5fac923dd3d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3571807Reviewed-by: 's avatarManos Koukoutos <manoskouk@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79846}
parent 0a1bf43d
......@@ -491,8 +491,8 @@ macro GetTargetAndInstance(funcref: WasmInternalFunction): TargetAndInstance {
// Vector format:
// Two slots per call_ref instruction. These slots' values can be:
// - uninitialized: (undefined, <unused>). Note: we use {undefined} as the
// sentinel as an optimization, as it's the default value for FixedArrays.
// - uninitialized: (0, <unused>). Note: we use {0} as the sentinel because
// it also works as default for vector slots used as counts.
// - monomorphic: (funcref, count (smi)). The second slot is a counter for how
// often the funcref in the first slot has been seen.
// - polymorphic: (fixed_array, <unused>). In this case, the array
......@@ -526,7 +526,8 @@ builtin CallRefIC(
// All other cases are some sort of miss and must compute the target/
// instance. They all fall through to returning the computed data.
const result = GetTargetAndInstance(funcref);
if (TaggedEqual(value, Undefined)) {
if (TaggedEqual(value, SmiConstant(0))) {
// Was uninitialized.
vector.objects[index] = funcref;
vector.objects[index + 1] = SmiConstant(1);
} else if (Is<FixedArray>(value)) {
......
......@@ -1041,13 +1041,13 @@ void TurboAssembler::InitializeRootRegister() {
#endif
}
void MacroAssembler::SmiTag(Register dst, Register src) {
void TurboAssembler::SmiTag(Register dst, Register src) {
DCHECK(dst.Is64Bits() && src.Is64Bits());
DCHECK(SmiValuesAre32Bits() || SmiValuesAre31Bits());
Lsl(dst, src, kSmiShift);
}
void MacroAssembler::SmiTag(Register smi) { SmiTag(smi, smi); }
void TurboAssembler::SmiTag(Register smi) { SmiTag(smi, smi); }
void TurboAssembler::SmiUntag(Register dst, Register src) {
DCHECK(dst.Is64Bits() && src.Is64Bits());
......
......@@ -557,6 +557,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
inline void SmiUntag(Register dst, const MemOperand& src);
inline void SmiUntag(Register smi);
inline void SmiTag(Register dst, Register src);
inline void SmiTag(Register smi);
inline void SmiToInt32(Register smi);
// Calls Abort(msg) if the condition cond is not satisfied.
......@@ -1839,9 +1842,6 @@ class V8_EXPORT_PRIVATE MacroAssembler : public TurboAssembler {
// ---- SMI and Number Utilities ----
inline void SmiTag(Register dst, Register src);
inline void SmiTag(Register smi);
inline void JumpIfNotSmi(Register value, Label* not_smi_label);
// Abort execution if argument is a smi, enabled via --debug-code.
......
......@@ -72,7 +72,6 @@ Reduction WasmInliner::ReduceCall(Node* call) {
TRACE("adding to inlining candidates!]\n")
bool is_speculative_call_ref = false;
int call_count = 0;
if (FLAG_wasm_speculative_inlining) {
base::MutexGuard guard(&module()->type_feedback.mutex);
......@@ -83,11 +82,15 @@ Reduction WasmInliner::ReduceCall(Node* call) {
wasm::WasmCodePosition position =
source_positions_->GetSourcePosition(call).ScriptOffset();
DCHECK_NE(position, wasm::kNoCodePosition);
// It could be that we haven't processed the feedback yet, because e.g.:
// - Liftoff bailed out for this function
// - the call is in an inlined function that isn't hot yet
auto index_in_feedback_vector = feedback.positions.find(position);
if (index_in_feedback_vector != feedback.positions.end()) {
is_speculative_call_ref = true;
call_count = feedback.feedback_vector[index_in_feedback_vector->second]
.absolute_call_frequency;
if (index_in_feedback_vector != feedback.positions.end() &&
feedback.feedback_vector.size() > 0) {
const wasm::CallSiteFeedback& call_site_feedback =
feedback.feedback_vector[index_in_feedback_vector->second];
call_count = call_site_feedback.absolute_call_frequency;
}
}
}
......@@ -96,8 +99,8 @@ Reduction WasmInliner::ReduceCall(Node* call) {
const wasm::WasmFunction* inlinee = &module()->functions[inlinee_index];
base::Vector<const byte> function_bytes = wire_bytes_->GetCode(inlinee->code);
CandidateInfo candidate{call, inlinee_index, is_speculative_call_ref,
call_count, function_bytes.length()};
CandidateInfo candidate{call, inlinee_index, call_count,
function_bytes.length()};
inlining_candidates_.push(candidate);
return NoChange();
......@@ -110,10 +113,9 @@ void WasmInliner::Finalize() {
inlining_candidates_.pop();
Node* call = candidate.node;
TRACE(
" [function %d: considering candidate {@%d, index=%d, type=%s, "
"count=%d, size=%d}... ",
" [function %d: considering candidate {@%d, index=%d, count=%d, "
"size=%d}... ",
function_index_, call->id(), candidate.inlinee_index,
candidate.is_speculative_call_ref ? "ref" : "direct",
candidate.call_count, candidate.wire_byte_size);
if (call->IsDead()) {
TRACE("dead node]\n");
......
......@@ -66,7 +66,6 @@ class WasmInliner final : public AdvancedReducer {
struct CandidateInfo {
Node* node;
uint32_t inlinee_index;
bool is_speculative_call_ref;
int call_count;
int wire_byte_size;
};
......@@ -74,12 +73,6 @@ class WasmInliner final : public AdvancedReducer {
struct LexicographicOrdering {
// Returns if c1 should be prioritized less than c2.
bool operator()(CandidateInfo& c1, CandidateInfo& c2) {
if (c1.is_speculative_call_ref && !c2.is_speculative_call_ref) {
return false;
}
if (c2.is_speculative_call_ref && !c1.is_speculative_call_ref) {
return true;
}
if (c1.call_count > c2.call_count) return false;
if (c2.call_count > c1.call_count) return true;
return c1.wire_byte_size > c2.wire_byte_size;
......
......@@ -143,6 +143,24 @@ Handle<FixedArray> FactoryBase<Impl>::NewFixedArrayWithFiller(
return handle(array, isolate());
}
template <typename Impl>
Handle<FixedArray> FactoryBase<Impl>::NewFixedArrayWithZeroes(
int length, AllocationType allocation) {
DCHECK_LE(0, length);
if (length == 0) return impl()->empty_fixed_array();
if (length > FixedArray::kMaxLength) {
FATAL("Invalid FixedArray size %d", length);
}
HeapObject result = AllocateRawFixedArray(length, allocation);
DisallowGarbageCollection no_gc;
result.set_map_after_allocation(read_only_roots().fixed_array_map(),
SKIP_WRITE_BARRIER);
FixedArray array = FixedArray::cast(result);
array.set_length(length);
MemsetTagged(array.data_start(), Smi::zero(), length);
return handle(array, isolate());
}
template <typename Impl>
Handle<FixedArrayBase> FactoryBase<Impl>::NewFixedDoubleArray(
int length, AllocationType allocation) {
......
......@@ -112,6 +112,10 @@ class EXPORT_TEMPLATE_DECLARE(V8_EXPORT_PRIVATE) FactoryBase
Handle<FixedArray> NewFixedArrayWithHoles(
int length, AllocationType allocation = AllocationType::kYoung);
// Allocate a new fixed array with Smi(0) entries.
Handle<FixedArray> NewFixedArrayWithZeroes(
int length, AllocationType allocation = AllocationType::kYoung);
// Allocate a new uninitialized fixed double array.
// The function returns a pre-allocated empty fixed array for length = 0,
// so the return type must be the general fixed array class.
......
......@@ -1863,6 +1863,14 @@ bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
return true;
}
void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
ldr(scratch, MemOperand(dst.gp(), offset));
add(scratch, scratch, Operand(Smi::FromInt(1)));
str(scratch, MemOperand(dst.gp(), offset));
}
bool LiftoffAssembler::emit_f32_ceil(DoubleRegister dst, DoubleRegister src) {
if (CpuFeatures::IsSupported(ARMv8)) {
CpuFeatureScope scope(this, ARMv8);
......
......@@ -1189,6 +1189,23 @@ bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
return true;
}
void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
UseScratchRegisterScope temps(this);
if (COMPRESS_POINTERS_BOOL) {
DCHECK(SmiValuesAre31Bits());
Register scratch = temps.AcquireW();
Ldr(scratch, MemOperand(dst.gp(), offset));
Add(scratch, scratch, Operand(Smi::FromInt(1)));
Str(scratch, MemOperand(dst.gp(), offset));
} else {
Register scratch = temps.AcquireX();
SmiUntag(scratch, MemOperand(dst.gp(), offset));
Add(scratch, scratch, Operand(1));
SmiTag(scratch);
Str(scratch, MemOperand(dst.gp(), offset));
}
}
void LiftoffAssembler::emit_i32_divs(Register dst, Register lhs, Register rhs,
Label* trap_div_by_zero,
Label* trap_div_unrepresentable) {
......
......@@ -1869,6 +1869,10 @@ bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
return true;
}
void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
add(Operand(dst.gp(), offset), Immediate(Smi::FromInt(1)));
}
void LiftoffAssembler::emit_f32_add(DoubleRegister dst, DoubleRegister lhs,
DoubleRegister rhs) {
if (CpuFeatures::IsSupported(AVX)) {
......
......@@ -738,6 +738,7 @@ class LiftoffAssembler : public TurboAssembler {
emit_i32_sari(dst.gp(), dst.gp(), kSmiTagSize);
}
}
inline void IncrementSmi(LiftoffRegister dst, int offset);
inline void Load(LiftoffRegister dst, Register src_addr, Register offset_reg,
uintptr_t offset_imm, LoadType type, LiftoffRegList pinned,
uint32_t* protected_load_pc = nullptr,
......
......@@ -546,7 +546,7 @@ class LiftoffCompiler {
int GetFeedbackVectorSlots() const {
// The number of instructions is capped by max function size.
STATIC_ASSERT(kV8MaxWasmFunctionSize < std::numeric_limits<int>::max());
return static_cast<int>(num_call_ref_instructions_) * 2;
return static_cast<int>(num_call_instructions_) * 2;
}
void unsupported(FullDecoder* decoder, LiftoffBailoutReason reason,
......@@ -5922,6 +5922,17 @@ class LiftoffCompiler {
call_descriptor =
GetLoweredCallDescriptor(compilation_zone_, call_descriptor);
// One slot would be enough for call_direct, but would make index
// computations much more complicated.
uintptr_t vector_slot = num_call_instructions_ * 2;
if (FLAG_wasm_speculative_inlining) {
base::MutexGuard mutex_guard(&decoder->module_->type_feedback.mutex);
decoder->module_->type_feedback.feedback_for_function[func_index_]
.positions[decoder->position()] =
static_cast<int>(num_call_instructions_);
num_call_instructions_++;
}
if (imm.index < env_->module->num_imported_functions) {
// A direct call to an imported function.
LiftoffRegList pinned;
......@@ -5957,6 +5968,15 @@ class LiftoffCompiler {
FinishCall(decoder, &sig, call_descriptor);
}
} else {
// Inlining direct calls isn't speculative, but existence of the
// feedback vector currently depends on this flag.
if (FLAG_wasm_speculative_inlining) {
LiftoffRegister vector = __ GetUnusedRegister(kGpReg, {});
__ Fill(vector, liftoff::kFeedbackVectorOffset, kPointerKind);
__ IncrementSmi(vector,
wasm::ObjectAccess::ElementOffsetInTaggedFixedArray(
static_cast<int>(vector_slot)));
}
// A direct call within this module just gets the current instance.
__ PrepareCall(&sig, call_descriptor);
// Just encode the function index. This will be patched at instantiation.
......@@ -6145,14 +6165,14 @@ class LiftoffCompiler {
__ Fill(vector, liftoff::kFeedbackVectorOffset, kPointerKind);
LiftoffAssembler::VarState vector_var(kPointerKind, vector, 0);
LiftoffRegister index = pinned.set(__ GetUnusedRegister(kGpReg, pinned));
uintptr_t vector_slot = num_call_ref_instructions_ * 2;
uintptr_t vector_slot = num_call_instructions_ * 2;
{
base::MutexGuard mutex_guard(&decoder->module_->type_feedback.mutex);
decoder->module_->type_feedback.feedback_for_function[func_index_]
.positions[decoder->position()] =
static_cast<int>(num_call_ref_instructions_);
static_cast<int>(num_call_instructions_);
}
num_call_ref_instructions_++;
num_call_instructions_++;
__ LoadConstant(index, WasmValue::ForUintPtr(vector_slot));
LiftoffAssembler::VarState index_var(kIntPtrKind, index, 0);
......@@ -6528,9 +6548,10 @@ class LiftoffCompiler {
// Current number of exception refs on the stack.
int num_exceptions_ = 0;
// Number of {call_ref} instructions encountered. While compiling, also
// index of the next {call_ref}. Used for indexing type feedback.
uintptr_t num_call_ref_instructions_ = 0;
// Number of feedback-collecting call instructions encountered. While
// compiling, also index of the next such instruction. Used for indexing type
// feedback.
uintptr_t num_call_instructions_ = 0;
int32_t* max_steps_;
int32_t* nondeterminism_;
......
......@@ -1474,6 +1474,10 @@ bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
return true;
}
void LiftoffAssembler::IncrementSmi(LiftoffRegister dst, int offset) {
SmiAddConstant(Operand(dst.gp(), offset), Smi::FromInt(1));
}
void LiftoffAssembler::emit_u32_to_uintptr(Register dst, Register src) {
movl(dst, src);
}
......
......@@ -640,12 +640,20 @@ class WasmGraphBuildingInterface {
void CallDirect(FullDecoder* decoder,
const CallFunctionImmediate<validate>& imm,
const Value args[], Value returns[]) {
if (FLAG_wasm_speculative_inlining && type_feedback_.size() > 0) {
DCHECK_LT(feedback_instruction_index_, type_feedback_.size());
feedback_instruction_index_++;
}
DoCall(decoder, CallInfo::CallDirect(imm.index), imm.sig, args, returns);
}
void ReturnCall(FullDecoder* decoder,
const CallFunctionImmediate<validate>& imm,
const Value args[]) {
if (FLAG_wasm_speculative_inlining && type_feedback_.size() > 0) {
DCHECK_LT(feedback_instruction_index_, type_feedback_.size());
feedback_instruction_index_++;
}
DoReturnCall(decoder, CallInfo::CallDirect(imm.index), imm.sig, args);
}
......@@ -671,8 +679,6 @@ class WasmGraphBuildingInterface {
const FunctionSig* sig, uint32_t sig_index, const Value args[],
Value returns[]) {
int maybe_feedback = -1;
// TODO(jkummerow): The way we currently prepare type feedback means that
// we won't have any for inlined functions. Figure out how to change that.
if (FLAG_wasm_speculative_inlining && type_feedback_.size() > 0) {
DCHECK_LT(feedback_instruction_index_, type_feedback_.size());
maybe_feedback =
......
......@@ -1207,8 +1207,8 @@ bool CompileLazy(Isolate* isolate, Handle<WasmInstanceObject> instance,
// Allocate feedback vector if needed.
if (result.feedback_vector_slots > 0) {
DCHECK(FLAG_wasm_speculative_inlining);
Handle<FixedArray> vector =
isolate->factory()->NewFixedArray(result.feedback_vector_slots);
Handle<FixedArray> vector = isolate->factory()->NewFixedArrayWithZeroes(
result.feedback_vector_slots);
instance->feedback_vectors().set(
declared_function_index(module, func_index), *vector);
}
......@@ -1324,6 +1324,15 @@ std::vector<CallSiteFeedback> ProcessTypeFeedback(
PrintF("[Function #%d call_ref #%d: best frequency %f]\n", func_index,
i / 2, best_frequency);
}
} else if (value.IsSmi()) {
// Direct call, just collecting call count.
int count = Smi::cast(value).value();
if (FLAG_trace_wasm_speculative_inlining) {
PrintF("[Function #%d call_direct #%d: frequency %d]\n", func_index,
i / 2, count);
}
result[i / 2] = {-1, count};
continue;
}
// If we fall through to here, then this call isn't eligible for inlining.
// Possible reasons: uninitialized or megamorphic feedback; or monomorphic
......
......@@ -696,7 +696,8 @@ MaybeHandle<WasmInstanceObject> InstanceBuilder::Build() {
PrintF("[Function %d (declared %d): allocating %d feedback slots]\n",
func_index, i, slots);
}
Handle<FixedArray> feedback = isolate_->factory()->NewFixedArray(slots);
Handle<FixedArray> feedback =
isolate_->factory()->NewFixedArrayWithZeroes(slots);
vectors->set(i, *feedback);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment