Commit fa7c0ed2 authored by Frederik Gossen's avatar Frederik Gossen Committed by Commit Bot

[wasm-hints] Resolve Performance Problem

Locks for compilation state callbacks and for the native module are
again taken one after the other. As a consequence, publishing compiled
Wasm code again happens in parallel. Compile times are now comparable to
before lazy hints were enabled.

Bug: chromium:949050
Change-Id: I45c52254d046de080938bd131fd3ed8116660bef
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1552787
Commit-Queue: Frederik Gossen <frgossen@google.com>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarClemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60646}
parent 7d714b61
......@@ -5948,7 +5948,7 @@ wasm::WasmCode* CompileWasmMathIntrinsic(wasm::WasmEngine* wasm_engine,
std::move(result.source_positions), wasm::WasmCode::kFunction,
wasm::ExecutionTier::kNone);
// TODO(titzer): add counters for math intrinsic code size / allocation
return native_module->PublishCode(std::move(wasm_code)).code;
return native_module->PublishCode(std::move(wasm_code));
}
wasm::WasmCode* CompileWasmImportCallWrapper(wasm::WasmEngine* wasm_engine,
......@@ -6012,7 +6012,7 @@ wasm::WasmCode* CompileWasmImportCallWrapper(wasm::WasmEngine* wasm_engine,
std::move(result.protected_instructions),
std::move(result.source_positions), wasm::WasmCode::kWasmToJsWrapper,
wasm::ExecutionTier::kNone);
return native_module->PublishCode(std::move(wasm_code)).code;
return native_module->PublishCode(std::move(wasm_code));
}
wasm::WasmCompilationResult CompileWasmInterpreterEntry(
......
......@@ -21,7 +21,6 @@ namespace wasm {
class NativeModule;
class WasmCode;
struct WasmCompilationResult;
class WasmError;
enum RuntimeExceptionSupport : bool {
......@@ -121,8 +120,8 @@ class CompilationState {
bool failed() const;
void FinishUnit(WasmCompilationResult);
void FinishUnits(Vector<WasmCompilationResult>);
void OnFinishedUnit(WasmCode*);
void OnFinishedUnits(Vector<WasmCode*>);
private:
friend class NativeModule;
......
......@@ -142,8 +142,8 @@ class CompilationStateImpl {
void AddTopTierCompilationUnit(std::unique_ptr<WasmCompilationUnit>);
std::unique_ptr<WasmCompilationUnit> GetNextCompilationUnit();
void FinishUnit(WasmCompilationResult);
void FinishUnits(Vector<WasmCompilationResult>);
void OnFinishedUnit(WasmCode*);
void OnFinishedUnits(Vector<WasmCode*>);
void ReportDetectedFeatures(const WasmFeatures& detected);
void OnBackgroundTaskStopped(const WasmFeatures& detected);
......@@ -225,6 +225,7 @@ class CompilationStateImpl {
int outstanding_baseline_functions_ = 0;
int outstanding_top_tier_functions_ = 0;
std::vector<ExecutionTier> highest_execution_tier_;
// End of fields protected by {callbacks_mutex_}.
//////////////////////////////////////////////////////////////////////////////
......@@ -276,12 +277,12 @@ void CompilationState::AddCallback(CompilationState::callback_t callback) {
bool CompilationState::failed() const { return Impl(this)->failed(); }
void CompilationState::FinishUnit(WasmCompilationResult result) {
Impl(this)->FinishUnit(std::move(result));
void CompilationState::OnFinishedUnit(WasmCode* code) {
Impl(this)->OnFinishedUnit(code);
}
void CompilationState::FinishUnits(Vector<WasmCompilationResult> results) {
Impl(this)->FinishUnits(results);
void CompilationState::OnFinishedUnits(Vector<WasmCode*> code_vector) {
Impl(this)->OnFinishedUnits(code_vector);
}
// static
......@@ -473,8 +474,7 @@ void CompileLazy(Isolate* isolate, NativeModule* native_module,
&env, compilation_state->GetWireBytesStorage(), isolate->counters(),
compilation_state->detected_features());
WasmCodeRefScope code_ref_scope;
WasmCodeUpdate update = native_module->AddCompiledCode(std::move(result));
WasmCode* code = update.code;
WasmCode* code = native_module->AddCompiledCode(std::move(result));
if (tiers.baseline_tier < tiers.top_tier) {
auto tiering_unit = base::make_unique<WasmCompilationUnit>(
......@@ -533,7 +533,9 @@ bool FetchAndExecuteCompilationUnit(CompilationEnv* env,
env, compilation_state->GetWireBytesStorage(), counters, detected);
if (result.succeeded()) {
compilation_state->FinishUnit(std::move(result));
WasmCodeRefScope code_ref_scope;
WasmCode* code = native_module->AddCompiledCode(std::move(result));
compilation_state->OnFinishedUnit(code);
} else {
compilation_state->SetError();
}
......@@ -791,8 +793,12 @@ class BackgroundCompileTask : public CancelableTask {
auto publish_results =
[&results_to_publish](BackgroundCompileScope* compile_scope) {
if (results_to_publish.empty()) return;
compile_scope->compilation_state()->FinishUnits(
VectorOf(results_to_publish));
WasmCodeRefScope code_ref_scope;
std::vector<WasmCode*> code_vector =
compile_scope->native_module()->AddCompiledCode(
VectorOf(results_to_publish));
compile_scope->compilation_state()->OnFinishedUnits(
VectorOf(code_vector));
results_to_publish.clear();
};
......@@ -1687,6 +1693,7 @@ void CompilationStateImpl::SetNumberOfFunctionsToCompile(
int num_functions_to_compile = num_functions - num_lazy_functions;
outstanding_baseline_functions_ = num_functions_to_compile;
outstanding_top_tier_functions_ = num_functions_to_compile;
highest_execution_tier_.assign(num_functions, ExecutionTier::kNone);
}
void CompilationStateImpl::AddCallback(CompilationState::callback_t callback) {
......@@ -1754,65 +1761,64 @@ CompilationStateImpl::GetNextCompilationUnit() {
return unit;
}
void CompilationStateImpl::FinishUnit(WasmCompilationResult result) {
FinishUnits({&result, 1});
void CompilationStateImpl::OnFinishedUnit(WasmCode* code) {
OnFinishedUnits({&code, 1});
}
void CompilationStateImpl::FinishUnits(
Vector<WasmCompilationResult> compilation_results) {
void CompilationStateImpl::OnFinishedUnits(Vector<WasmCode*> code_vector) {
base::MutexGuard guard(&callbacks_mutex_);
// Assume an order of execution tiers that represents the quality of their
// generated code.
static_assert(ExecutionTier::kInterpreter < ExecutionTier::kLiftoff &&
static_assert(ExecutionTier::kNone < ExecutionTier::kInterpreter &&
ExecutionTier::kInterpreter < ExecutionTier::kLiftoff &&
ExecutionTier::kLiftoff < ExecutionTier::kTurbofan,
"Assume an order on execution tiers");
auto module = native_module_->module();
auto enabled_features = native_module_->enabled_features();
WasmCodeRefScope code_ref_scope;
std::vector<WasmCodeUpdate> code_update_vector =
native_module_->AddCompiledCode(compilation_results);
for (WasmCodeUpdate& code_update : code_update_vector) {
DCHECK_NOT_NULL(code_update.code);
DCHECK(code_update.tier.has_value());
native_module_->engine()->LogCode(code_update.code);
for (WasmCode* code : code_vector) {
DCHECK_NOT_NULL(code);
DCHECK_NE(code->tier(), ExecutionTier::kNone);
native_module_->engine()->LogCode(code);
// Skip lazily compiled code as we do not consider this for the completion
// of baseline respectively top tier compilation.
int func_index = code->index();
if (IsLazyCompilation(module, native_module_, enabled_features,
func_index)) {
continue;
}
uint32_t func_index = code_update.code->index();
// Determine whether we are reaching baseline or top tier with the given
// code.
uint32_t slot_index = code->index() - module->num_imported_functions;
ExecutionTierPair requested_tiers = GetRequestedExecutionTiers(
module, compile_mode(), enabled_features, func_index);
// Reconstruct state before code update.
bool had_reached_baseline = code_update.prior_tier.has_value();
bool had_reached_top_tier =
code_update.prior_tier.has_value() &&
code_update.prior_tier.value() >= requested_tiers.top_tier;
DCHECK_IMPLIES(had_reached_baseline, code_update.prior_tier.has_value() &&
code_update.prior_tier.value() >=
requested_tiers.baseline_tier);
// Conclude whether we are reaching baseline or top tier.
DCHECK_EQ(highest_execution_tier_.size(), module->num_declared_functions);
ExecutionTier prior_tier = highest_execution_tier_[slot_index];
bool had_reached_baseline = prior_tier >= requested_tiers.baseline_tier;
bool had_reached_top_tier = prior_tier >= requested_tiers.top_tier;
DCHECK_IMPLIES(had_reached_baseline, prior_tier > ExecutionTier::kNone);
bool reaches_baseline = !had_reached_baseline;
bool reaches_top_tier =
!had_reached_top_tier &&
code_update.tier.value() >= requested_tiers.top_tier;
!had_reached_top_tier && code->tier() >= requested_tiers.top_tier;
DCHECK_IMPLIES(reaches_baseline,
code_update.tier.value() >= requested_tiers.baseline_tier);
code->tier() >= requested_tiers.baseline_tier);
DCHECK_IMPLIES(reaches_top_tier, had_reached_baseline || reaches_baseline);
// Remember state before update.
// Remember compilation state before update.
bool had_completed_baseline_compilation =
outstanding_baseline_functions_ == 0;
bool had_completed_top_tier_compilation =
outstanding_top_tier_functions_ == 0;
// Update state.
if (!IsLazyCompilation(module, native_module_, enabled_features,
func_index)) {
if (reaches_baseline) outstanding_baseline_functions_--;
if (reaches_top_tier) outstanding_top_tier_functions_--;
// Update compilation state.
if (code->tier() > prior_tier) {
highest_execution_tier_[slot_index] = code->tier();
}
if (reaches_baseline) outstanding_baseline_functions_--;
if (reaches_top_tier) outstanding_top_tier_functions_--;
DCHECK_LE(0, outstanding_baseline_functions_);
DCHECK_LE(outstanding_baseline_functions_, outstanding_top_tier_functions_);
......@@ -1827,12 +1833,14 @@ void CompilationStateImpl::FinishUnits(
// Trigger callbacks.
if (completes_baseline_compilation) {
for (auto& callback : callbacks_)
for (auto& callback : callbacks_) {
callback(CompilationEvent::kFinishedBaselineCompilation);
}
}
if (completes_top_tier_compilation) {
for (auto& callback : callbacks_)
for (auto& callback : callbacks_) {
callback(CompilationEvent::kFinishedTopTierCompilation);
}
// Clear the callbacks because no more events will be delivered.
callbacks_.clear();
}
......
......@@ -624,7 +624,7 @@ WasmCode* NativeModule::AddAndPublishAnonymousCode(Handle<Code> code,
new_code->MaybePrint(name);
new_code->Validate();
return PublishCode(std::move(new_code)).code;
return PublishCode(std::move(new_code));
}
std::unique_ptr<WasmCode> NativeModule::AddCode(
......@@ -710,7 +710,7 @@ std::unique_ptr<WasmCode> NativeModule::AddCodeWithCodeSpace(
return code;
}
WasmCodeUpdate NativeModule::PublishCode(std::unique_ptr<WasmCode> code) {
WasmCode* NativeModule::PublishCode(std::unique_ptr<WasmCode> code) {
base::MutexGuard lock(&allocation_mutex_);
return PublishCodeLocked(std::move(code));
}
......@@ -729,10 +729,9 @@ WasmCode::Kind GetCodeKindForExecutionTier(ExecutionTier tier) {
}
} // namespace
WasmCodeUpdate NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
WasmCode* NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
// The caller must hold the {allocation_mutex_}, thus we fail to lock it here.
DCHECK(!allocation_mutex_.TryLock());
WasmCodeUpdate update;
if (!code->IsAnonymous()) {
DCHECK_LT(code->index(), num_functions());
......@@ -740,20 +739,18 @@ WasmCodeUpdate NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
// Assume an order of execution tiers that represents the quality of their
// generated code.
static_assert(ExecutionTier::kInterpreter < ExecutionTier::kLiftoff &&
static_assert(ExecutionTier::kNone < ExecutionTier::kInterpreter &&
ExecutionTier::kInterpreter < ExecutionTier::kLiftoff &&
ExecutionTier::kLiftoff < ExecutionTier::kTurbofan,
"Assume an order on execution tiers");
// Update code table but avoid to fall back to less optimized code. We use
// the new code if it was compiled with a higher tier and also if we cannot
// determine the tier.
// the new code if it was compiled with a higher tier.
uint32_t slot_idx = code->index() - module_->num_imported_functions;
WasmCode* prior_code = code_table_[slot_idx];
if (prior_code) update.prior_tier = prior_code->tier();
update.tier = code->tier();
bool update_code_table = !update.prior_tier.has_value() ||
!update.tier.has_value() ||
update.prior_tier.value() < update.tier.value();
ExecutionTier prior_tier =
prior_code == nullptr ? ExecutionTier::kNone : prior_code->tier();
bool update_code_table = prior_tier < code->tier();
if (update_code_table) {
code_table_[slot_idx] = code.get();
}
......@@ -776,9 +773,9 @@ WasmCodeUpdate NativeModule::PublishCodeLocked(std::unique_ptr<WasmCode> code) {
}
}
WasmCodeRefScope::AddRef(code.get());
update.code = code.get();
WasmCode* result = code.get();
owned_code_.emplace_back(std::move(code));
return update;
return result;
}
WasmCode* NativeModule::AddDeserializedCode(
......@@ -805,7 +802,7 @@ WasmCode* NativeModule::AddDeserializedCode(
// Note: we do not flush the i-cache here, since the code needs to be
// relocated anyway. The caller is responsible for flushing the i-cache later.
return PublishCode(std::move(code)).code;
return PublishCode(std::move(code));
}
std::vector<WasmCode*> NativeModule::SnapshotCodeTable() const {
......@@ -837,7 +834,7 @@ WasmCode* NativeModule::CreateEmptyJumpTable(uint32_t jump_table_size) {
OwnedVector<const uint8_t>{}, // source_pos
WasmCode::kJumpTable, // kind
ExecutionTier::kNone}}; // tier
return PublishCode(std::move(code)).code;
return PublishCode(std::move(code));
}
Vector<byte> NativeModule::AllocateForCode(size_t size) {
......@@ -1296,11 +1293,11 @@ void NativeModule::SampleCodeSize(
histogram->AddSample(code_size_mb);
}
WasmCodeUpdate NativeModule::AddCompiledCode(WasmCompilationResult result) {
WasmCode* NativeModule::AddCompiledCode(WasmCompilationResult result) {
return AddCompiledCode({&result, 1})[0];
}
std::vector<WasmCodeUpdate> NativeModule::AddCompiledCode(
std::vector<WasmCode*> NativeModule::AddCompiledCode(
Vector<WasmCompilationResult> results) {
DCHECK(!results.empty());
// First, allocate code space for all the results.
......@@ -1331,15 +1328,15 @@ std::vector<WasmCodeUpdate> NativeModule::AddCompiledCode(
// Under the {allocation_mutex_}, publish the code. The published code is put
// into the top-most surrounding {WasmCodeRefScope} by {PublishCodeLocked}.
std::vector<WasmCodeUpdate> code_updates;
code_updates.reserve(results.size());
std::vector<WasmCode*> code_vector;
code_vector.reserve(results.size());
{
base::MutexGuard lock(&allocation_mutex_);
for (auto& result : generated_code)
code_updates.push_back(PublishCodeLocked(std::move(result)));
code_vector.push_back(PublishCodeLocked(std::move(result)));
}
return code_updates;
return code_vector;
}
void NativeModule::FreeCode(Vector<WasmCode* const> codes) {
......
......@@ -74,12 +74,6 @@ class V8_EXPORT_PRIVATE DisjointAllocationPool final {
DISALLOW_COPY_AND_ASSIGN(DisjointAllocationPool);
};
struct WasmCodeUpdate {
WasmCode* code = nullptr;
base::Optional<ExecutionTier> tier;
base::Optional<ExecutionTier> prior_tier;
};
class V8_EXPORT_PRIVATE WasmCode final {
public:
enum Kind {
......@@ -278,9 +272,9 @@ class V8_EXPORT_PRIVATE NativeModule final {
// {PublishCode} makes the code available to the system by entering it into
// the code table and patching the jump table. It returns a raw pointer to the
// given {WasmCode} object.
WasmCodeUpdate PublishCode(std::unique_ptr<WasmCode>);
WasmCode* PublishCode(std::unique_ptr<WasmCode>);
// Hold the {allocation_mutex_} when calling {PublishCodeLocked}.
WasmCodeUpdate PublishCodeLocked(std::unique_ptr<WasmCode>);
WasmCode* PublishCodeLocked(std::unique_ptr<WasmCode>);
WasmCode* AddDeserializedCode(
uint32_t index, Vector<const byte> instructions, uint32_t stack_slots,
......@@ -406,8 +400,8 @@ class V8_EXPORT_PRIVATE NativeModule final {
enum CodeSamplingTime : int8_t { kAfterBaseline, kAfterTopTier, kSampling };
void SampleCodeSize(Counters*, CodeSamplingTime) const;
WasmCodeUpdate AddCompiledCode(WasmCompilationResult);
std::vector<WasmCodeUpdate> AddCompiledCode(Vector<WasmCompilationResult>);
WasmCode* AddCompiledCode(WasmCompilationResult);
std::vector<WasmCode*> AddCompiledCode(Vector<WasmCompilationResult>);
// Free a set of functions of this module. Uncommits whole pages if possible.
// The given vector must be ordered by the instruction start address, and all
......
......@@ -13,10 +13,10 @@ namespace wasm {
// All the tiers of WASM execution.
enum class ExecutionTier : int8_t {
kNone,
kInterpreter,
kLiftoff,
kTurbofan,
kNone,
};
} // namespace wasm
......
......@@ -3723,7 +3723,7 @@ TEST(Liftoff_tier_up) {
desc.instr_size = static_cast<int>(sub_size);
std::unique_ptr<WasmCode> new_code = native_module->AddCode(
add.function_index(), desc, 0, 0, {}, OwnedVector<byte>(),
WasmCode::kFunction, ExecutionTier::kNone);
WasmCode::kFunction, ExecutionTier::kTurbofan);
native_module->PublishCode(std::move(new_code));
// Second run should now execute {sub}.
......
......@@ -501,9 +501,7 @@ void WasmFunctionCompiler::Build(const byte* start, const byte* end) {
WasmCompilationResult result = unit.ExecuteCompilation(
&env, native_module->compilation_state()->GetWireBytesStorage(),
isolate()->counters(), &unused_detected_features);
WasmCodeUpdate code_update =
native_module->AddCompiledCode(std::move(result));
WasmCode* code = code_update.code;
WasmCode* code = native_module->AddCompiledCode(std::move(result));
DCHECK_NOT_NULL(code);
if (WasmCode::ShouldBeLogged(isolate())) code->LogCode(isolate());
}
......
......@@ -177,7 +177,7 @@ class WasmCodeManagerTest : public TestWithContext,
desc.instr_size = static_cast<int>(size);
std::unique_ptr<WasmCode> code = native_module->AddCode(
index, desc, 0, 0, {}, {}, WasmCode::kFunction, ExecutionTier::kNone);
return native_module->PublishCode(std::move(code)).code;
return native_module->PublishCode(std::move(code));
}
size_t page() const { return AllocatePageSize(); }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment