Commit 6ad8193b authored by Arnaud Robin's avatar Arnaud Robin Committed by Commit Bot

[wasm] Add priorities for function compilation

In order to improve our tiering strategy, it is a good idea to start
by tiering up functions that will be used the most, as this is done in
most JavaScript engines.

To decide which function requires tiering, we use as a basic strategy
to define its compilation priority to 'func_size * number_of_calls',
this roughly approximates the time we spend in the function.

To handle prioritization, it seemed that using  a concurrent
priority queue similar to BigUnits was causing concurrencies issues.
I then decided to use different priority queues for each worker thread.

R=clemensb@chromium.org
CC=thibaudm@chromium.org

Bug: v8:10728
Change-Id: I6f314468549000b2a9b51d3d470f04a0cb997879
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2367859
Commit-Queue: Arnaud Robin <arobin@google.com>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69585}
parent b59e7262
...@@ -637,21 +637,24 @@ class LiftoffCompiler { ...@@ -637,21 +637,24 @@ class LiftoffCompiler {
kInt32Size * declared_function_index(env_->module, func_index_); kInt32Size * declared_function_index(env_->module, func_index_);
// Get the number of calls and update it. // Get the number of calls and update it.
LiftoffRegister number_of_calls = LiftoffRegister old_number_of_calls =
pinned.set(__ GetUnusedRegister(kGpReg, pinned)); pinned.set(__ GetUnusedRegister(kGpReg, pinned));
__ Load(number_of_calls, array_address.gp(), no_reg, offset, LiftoffRegister new_number_of_calls =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
__ Load(old_number_of_calls, array_address.gp(), no_reg, offset,
LoadType::kI32Load, pinned); LoadType::kI32Load, pinned);
__ emit_i32_addi(number_of_calls.gp(), number_of_calls.gp(), 1); __ emit_i32_addi(new_number_of_calls.gp(), old_number_of_calls.gp(), 1);
__ Store(array_address.gp(), no_reg, offset, number_of_calls, __ Store(array_address.gp(), no_reg, offset, new_number_of_calls,
StoreType::kI32Store, pinned); StoreType::kI32Store, pinned);
// Emit the runtime call if necessary. // Emit the runtime call if necessary.
Label no_tierup; Label no_tierup;
constexpr int kTierUpLimit = 5; // Check if the number of calls is a power of 2.
__ emit_i32_addi(number_of_calls.gp(), number_of_calls.gp(), __ emit_i32_and(old_number_of_calls.gp(), old_number_of_calls.gp(),
-kTierUpLimit); new_number_of_calls.gp());
// Unary "unequal" means "different from zero". // Unary "unequal" means "different from zero".
__ emit_cond_jump(kUnequal, &no_tierup, kWasmI32, number_of_calls.gp()); __ emit_cond_jump(kUnequal, &no_tierup, kWasmI32,
old_number_of_calls.gp());
TierUpFunction(decoder); TierUpFunction(decoder);
__ bind(&no_tierup); __ bind(&no_tierup);
} }
......
...@@ -190,7 +190,8 @@ enum CompileBaselineOnly : bool { ...@@ -190,7 +190,8 @@ enum CompileBaselineOnly : bool {
// runs empty. // runs empty.
class CompilationUnitQueues { class CompilationUnitQueues {
public: public:
explicit CompilationUnitQueues(int max_tasks) : queues_(max_tasks) { explicit CompilationUnitQueues(int max_tasks, int num_declared_functions)
: queues_(max_tasks), top_tier_priority_units_queues_(max_tasks) {
DCHECK_LT(0, max_tasks); DCHECK_LT(0, max_tasks);
for (int task_id = 0; task_id < max_tasks; ++task_id) { for (int task_id = 0; task_id < max_tasks; ++task_id) {
queues_[task_id].next_steal_task_id = next_task_id(task_id); queues_[task_id].next_steal_task_id = next_task_id(task_id);
...@@ -198,6 +199,12 @@ class CompilationUnitQueues { ...@@ -198,6 +199,12 @@ class CompilationUnitQueues {
for (auto& atomic_counter : num_units_) { for (auto& atomic_counter : num_units_) {
std::atomic_init(&atomic_counter, size_t{0}); std::atomic_init(&atomic_counter, size_t{0});
} }
treated_ = std::make_unique<std::atomic<bool>[]>(num_declared_functions);
for (int i = 0; i < num_declared_functions; i++) {
std::atomic_init(&treated_.get()[i], false);
}
} }
base::Optional<WasmCompilationUnit> GetNextUnit( base::Optional<WasmCompilationUnit> GetNextUnit(
...@@ -257,6 +264,25 @@ class CompilationUnitQueues { ...@@ -257,6 +264,25 @@ class CompilationUnitQueues {
} }
} }
void AddTopTierPriorityUnit(WasmCompilationUnit unit, size_t priority) {
// Add to the individual queues in a round-robin fashion. No special care is
// taken to balance them; they will be balanced by work stealing. We use
// the same counter for this reason.
int queue_to_add = next_queue_to_add.load(std::memory_order_relaxed);
while (!next_queue_to_add.compare_exchange_weak(
queue_to_add, next_task_id(queue_to_add), std::memory_order_relaxed)) {
// Retry with updated {queue_to_add}.
}
TopTierPriorityUnitsQueue* queue =
&top_tier_priority_units_queues_[queue_to_add];
base::MutexGuard guard(&queue->mutex);
num_priority_units_.fetch_add(1, std::memory_order_relaxed);
num_units_[kTopTier].fetch_add(1, std::memory_order_relaxed);
queue->units.emplace(priority, unit);
}
// Get the current total number of units in all queues. This is only a // Get the current total number of units in all queues. This is only a
// momentary snapshot, it's not guaranteed that {GetNextUnit} returns a unit // momentary snapshot, it's not guaranteed that {GetNextUnit} returns a unit
// if this method returns non-zero. // if this method returns non-zero.
...@@ -299,6 +325,18 @@ class CompilationUnitQueues { ...@@ -299,6 +325,18 @@ class CompilationUnitQueues {
} }
}; };
struct TopTierPriorityUnit {
TopTierPriorityUnit(int priority, WasmCompilationUnit unit)
: priority(priority), unit(unit) {}
size_t priority;
WasmCompilationUnit unit;
bool operator<(const TopTierPriorityUnit& other) const {
return priority < other.priority;
}
};
struct BigUnitsQueue { struct BigUnitsQueue {
BigUnitsQueue() { BigUnitsQueue() {
for (auto& atomic : has_units) std::atomic_init(&atomic, false); for (auto& atomic : has_units) std::atomic_init(&atomic, false);
...@@ -313,10 +351,23 @@ class CompilationUnitQueues { ...@@ -313,10 +351,23 @@ class CompilationUnitQueues {
std::priority_queue<BigUnit> units[kNumTiers]; std::priority_queue<BigUnit> units[kNumTiers];
}; };
struct TopTierPriorityUnitsQueue {
base::Mutex mutex;
// Protected by {mutex}:
std::priority_queue<TopTierPriorityUnit> units;
int next_steal_task_id;
// End of fields protected by {mutex}.
};
std::vector<Queue> queues_; std::vector<Queue> queues_;
BigUnitsQueue big_units_queue_; BigUnitsQueue big_units_queue_;
std::vector<TopTierPriorityUnitsQueue> top_tier_priority_units_queues_;
std::atomic<size_t> num_units_[kNumTiers]; std::atomic<size_t> num_units_[kNumTiers];
std::atomic<size_t> num_priority_units_{0};
std::unique_ptr<std::atomic<bool>[]> treated_;
std::atomic<int> next_queue_to_add{0}; std::atomic<int> next_queue_to_add{0};
int next_task_id(int task_id) const { int next_task_id(int task_id) const {
...@@ -333,10 +384,19 @@ class CompilationUnitQueues { ...@@ -333,10 +384,19 @@ class CompilationUnitQueues {
base::Optional<WasmCompilationUnit> GetNextUnitOfTier(int task_id, int tier) { base::Optional<WasmCompilationUnit> GetNextUnitOfTier(int task_id, int tier) {
Queue* queue = &queues_[task_id]; Queue* queue = &queues_[task_id];
// First check whether there is a big unit of that tier. Execute that first.
// First check whether there is a priority unit. Execute that
// first.
if (tier == kTopTier) {
if (auto unit = GetTopTierPriorityUnit(task_id)) {
return unit;
}
}
// Then check whether there is a big unit of that tier.
if (auto unit = GetBigUnitOfTier(tier)) return unit; if (auto unit = GetBigUnitOfTier(tier)) return unit;
// Then check whether our own queue has a unit of the wanted tier. If // Finally check whether our own queue has a unit of the wanted tier. If
// so, return it, otherwise get the task id to steal from. // so, return it, otherwise get the task id to steal from.
int steal_task_id; int steal_task_id;
{ {
...@@ -379,6 +439,46 @@ class CompilationUnitQueues { ...@@ -379,6 +439,46 @@ class CompilationUnitQueues {
return unit; return unit;
} }
base::Optional<WasmCompilationUnit> GetTopTierPriorityUnit(int task_id) {
// Fast-path without locking.
if (num_priority_units_.load(std::memory_order_relaxed) == 0) {
return {};
}
TopTierPriorityUnitsQueue* queue =
&top_tier_priority_units_queues_[task_id];
int steal_task_id;
{
base::MutexGuard mutex_guard(&queue->mutex);
while (!queue->units.empty()) {
auto unit = queue->units.top().unit;
queue->units.pop();
num_priority_units_.fetch_sub(1, std::memory_order_relaxed);
if (!treated_[unit.func_index()].exchange(true,
std::memory_order_relaxed)) {
return unit;
}
num_units_[kTopTier].fetch_sub(1, std::memory_order_relaxed);
}
steal_task_id = queue->next_steal_task_id;
}
// Try to steal from all other queues. If this succeeds, return one of the
// stolen units.
size_t steal_trials = queues_.size();
for (; steal_trials > 0;
--steal_trials, steal_task_id = next_task_id(steal_task_id)) {
if (steal_task_id == task_id) continue;
if (auto unit = StealTopTierPriorityUnit(task_id, steal_task_id)) {
return unit;
}
}
return {};
}
// Steal units of {wanted_tier} from {steal_from_task_id} to {task_id}. Return // Steal units of {wanted_tier} from {steal_from_task_id} to {task_id}. Return
// first stolen unit (rest put in queue of {task_id}), or {nullopt} if // first stolen unit (rest put in queue of {task_id}), or {nullopt} if
// {steal_from_task_id} had no units of {wanted_tier}. // {steal_from_task_id} had no units of {wanted_tier}.
...@@ -405,6 +505,39 @@ class CompilationUnitQueues { ...@@ -405,6 +505,39 @@ class CompilationUnitQueues {
queue->next_steal_task_id = next_task_id(steal_from_task_id); queue->next_steal_task_id = next_task_id(steal_from_task_id);
return returned_unit; return returned_unit;
} }
// Steal one priority unit from {steal_from_task_id} to {task_id}. Return
// stolen unit, or {nullopt} if {steal_from_task_id} had no priority units.
base::Optional<WasmCompilationUnit> StealTopTierPriorityUnit(
int task_id, int steal_from_task_id) {
DCHECK_NE(task_id, steal_from_task_id);
base::Optional<WasmCompilationUnit> returned_unit;
{
TopTierPriorityUnitsQueue* steal_queue =
&top_tier_priority_units_queues_[steal_from_task_id];
base::MutexGuard guard(&steal_queue->mutex);
while (true) {
if (steal_queue->units.empty()) return {};
auto unit = steal_queue->units.top().unit;
steal_queue->units.pop();
num_priority_units_.fetch_sub(1, std::memory_order_relaxed);
if (!treated_[unit.func_index()].exchange(true,
std::memory_order_relaxed)) {
returned_unit = unit;
break;
}
num_units_[kTopTier].fetch_sub(1, std::memory_order_relaxed);
}
}
TopTierPriorityUnitsQueue* queue =
&top_tier_priority_units_queues_[task_id];
base::MutexGuard guard(&queue->mutex);
queue->next_steal_task_id = next_task_id(steal_from_task_id);
return returned_unit;
}
}; };
// {JobHandle} is not thread safe in general (at least both the // {JobHandle} is not thread safe in general (at least both the
...@@ -483,6 +616,7 @@ class CompilationStateImpl { ...@@ -483,6 +616,7 @@ class CompilationStateImpl {
Vector<std::shared_ptr<JSToWasmWrapperCompilationUnit>> Vector<std::shared_ptr<JSToWasmWrapperCompilationUnit>>
js_to_wasm_wrapper_units); js_to_wasm_wrapper_units);
void AddTopTierCompilationUnit(WasmCompilationUnit); void AddTopTierCompilationUnit(WasmCompilationUnit);
void AddTopTierPriorityCompilationUnit(WasmCompilationUnit, size_t);
base::Optional<WasmCompilationUnit> GetNextCompilationUnit( base::Optional<WasmCompilationUnit> GetNextCompilationUnit(
int task_id, CompileBaselineOnly baseline_only); int task_id, CompileBaselineOnly baseline_only);
...@@ -1054,7 +1188,14 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module, ...@@ -1054,7 +1188,14 @@ void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
Impl(native_module->compilation_state()); Impl(native_module->compilation_state());
WasmCompilationUnit tiering_unit{func_index, ExecutionTier::kTurbofan, WasmCompilationUnit tiering_unit{func_index, ExecutionTier::kTurbofan,
kNoDebugging}; kNoDebugging};
compilation_state->AddTopTierCompilationUnit(tiering_unit);
uint32_t* call_array = native_module->num_liftoff_function_calls_array();
int offset =
wasm::declared_function_index(native_module->module(), func_index);
size_t priority =
base::Relaxed_Load(reinterpret_cast<int*>(&call_array[offset]));
compilation_state->AddTopTierPriorityCompilationUnit(tiering_unit, priority);
} }
namespace { namespace {
...@@ -2604,7 +2745,8 @@ CompilationStateImpl::CompilationStateImpl( ...@@ -2604,7 +2745,8 @@ CompilationStateImpl::CompilationStateImpl(
max_compile_concurrency_(std::max(GetMaxCompileConcurrency(), 1)), max_compile_concurrency_(std::max(GetMaxCompileConcurrency(), 1)),
// Add one to the allowed number of parallel tasks, because the foreground // Add one to the allowed number of parallel tasks, because the foreground
// task sometimes also contributes. // task sometimes also contributes.
compilation_unit_queues_(max_compile_concurrency_ + 1), compilation_unit_queues_(max_compile_concurrency_ + 1,
native_module->num_functions()),
available_task_ids_(max_compile_concurrency_ + 1) { available_task_ids_(max_compile_concurrency_ + 1) {
for (int i = 0; i <= max_compile_concurrency_; ++i) { for (int i = 0; i <= max_compile_concurrency_; ++i) {
// Ids are popped on task creation, so reverse this list. This ensures that // Ids are popped on task creation, so reverse this list. This ensures that
...@@ -2798,6 +2940,12 @@ void CompilationStateImpl::AddTopTierCompilationUnit(WasmCompilationUnit unit) { ...@@ -2798,6 +2940,12 @@ void CompilationStateImpl::AddTopTierCompilationUnit(WasmCompilationUnit unit) {
AddCompilationUnits({}, {&unit, 1}, {}); AddCompilationUnits({}, {&unit, 1}, {});
} }
void CompilationStateImpl::AddTopTierPriorityCompilationUnit(
WasmCompilationUnit unit, size_t priority) {
compilation_unit_queues_.AddTopTierPriorityUnit(unit, priority);
ScheduleCompileJobForNewUnits(1);
}
std::shared_ptr<JSToWasmWrapperCompilationUnit> std::shared_ptr<JSToWasmWrapperCompilationUnit>
CompilationStateImpl::GetNextJSToWasmWrapperCompilationUnit() { CompilationStateImpl::GetNextJSToWasmWrapperCompilationUnit() {
int wrapper_id = int wrapper_id =
......
...@@ -811,6 +811,11 @@ NativeModule::NativeModule(WasmEngine* engine, const WasmFeatures& enabled, ...@@ -811,6 +811,11 @@ NativeModule::NativeModule(WasmEngine* engine, const WasmFeatures& enabled,
std::make_unique<WasmCode*[]>(module_->num_declared_functions); std::make_unique<WasmCode*[]>(module_->num_declared_functions);
num_liftoff_function_calls_ = num_liftoff_function_calls_ =
std::make_unique<uint32_t[]>(module_->num_declared_functions); std::make_unique<uint32_t[]>(module_->num_declared_functions);
// Start counter at 4 to avoid runtime calls for smaller numbers.
constexpr int kCounterStart = 4;
std::fill_n(num_liftoff_function_calls_.get(),
module_->num_declared_functions, kCounterStart);
} }
code_allocator_.Init(this); code_allocator_.Init(this);
} }
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
load('test/mjsunit/wasm/wasm-module-builder.js'); load('test/mjsunit/wasm/wasm-module-builder.js');
const num_iterations = 5; const num_iterations = 4;
const num_functions = 2; const num_functions = 2;
const builder = new WasmModuleBuilder(); const builder = new WasmModuleBuilder();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment