Commit f181dff3 authored by Arnaud Robin's avatar Arnaud Robin Committed by Commit Bot

[wasm] Implement dynamic tiering in wasm

On desktop systems, we use a very basic tiering strategy: Everything is
initially compiled with Liftoff, and once that is done, the module can
start being used. Concurrently to the execution, we re-compile all code
with TurboFan, and hot-swap each function once TurboFan finishes.

We should start using a more dynamic strategy where each function is
tiered-up when judged necessary. This change will then tier-up each
liftoff function once it has been called 5 times.

I then added a counter in the native module, that is updated directly
from Liftoff code, and a runtime call is then made when the counter
reaches the goal.

R=clemensb@chromium.org
CC=​thibaudm@chromium.org

Bug: v8:10728
Change-Id: I8dc2b02fdff8d97781bb1cf496886594b3d7f644
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2306803
Commit-Queue: Arnaud Robin <arobin@google.com>
Reviewed-by: 's avatarThibaud Michaud <thibaudm@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68971}
parent 1cb7c707
......@@ -18,6 +18,7 @@ extern runtime WasmFunctionTableSet(
extern runtime ThrowWasmError(Context, Smi): JSAny;
extern runtime Throw(Context, Object): JSAny;
extern runtime ReThrow(Context, Object): JSAny;
extern runtime WasmTriggerTierUp(Context, WasmInstanceObject): JSAny;
extern runtime WasmStackGuard(Context): JSAny;
extern runtime ThrowWasmStackOverflow(Context): JSAny;
extern runtime WasmTraceMemory(Context, Smi): JSAny;
......@@ -200,6 +201,11 @@ builtin WasmRethrow(exception: Object): JSAny {
tail runtime::ReThrow(LoadContextFromFrame(), exception);
}
builtin WasmTriggerTierUp(): JSAny {
const instance: WasmInstanceObject = LoadInstanceFromFrame();
tail runtime::WasmTriggerTierUp(LoadContextFromFrame(), instance);
}
builtin WasmStackGuard(): JSAny {
tail runtime::WasmStackGuard(LoadContextFromFrame());
}
......
......@@ -733,6 +733,8 @@ DEFINE_UINT(wasm_max_code_space, v8::internal::kMaxWasmCodeMB,
DEFINE_BOOL(wasm_tier_up, true,
"enable tier up to the optimizing compiler (requires --liftoff to "
"have an effect)")
DEFINE_BOOL(wasm_dynamic_tiering, false,
"enable dynamic tier up to the optimizing compiler")
DEFINE_DEBUG_BOOL(trace_wasm_decoder, false, "trace decoding of wasm code")
DEFINE_DEBUG_BOOL(trace_wasm_compiler, false, "trace compiling of wasm code")
DEFINE_DEBUG_BOOL(trace_wasm_interpreter, false,
......
......@@ -208,6 +208,20 @@ RUNTIME_FUNCTION(Runtime_WasmCompileLazy) {
return Object(entrypoint);
}
RUNTIME_FUNCTION(Runtime_WasmTriggerTierUp) {
HandleScope scope(isolate);
DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(WasmInstanceObject, instance, 0);
FrameFinder<WasmFrame, StackFrame::EXIT> frame_finder(isolate);
int func_index = frame_finder.frame()->function_index();
auto* native_module = instance->module_object().native_module();
wasm::TriggerTierUp(isolate, native_module, func_index);
return ReadOnlyRoots(isolate).undefined_value();
}
// Should be called from within a handle scope
Handle<JSArrayBuffer> GetArrayBuffer(Handle<WasmInstanceObject> instance,
Isolate* isolate, uint32_t address) {
......
......@@ -576,6 +576,7 @@ namespace internal {
F(WasmTableFill, 4, 1) \
F(WasmIsValidFuncRefValue, 1, 1) \
F(WasmCompileLazy, 2, 1) \
F(WasmTriggerTierUp, 1, 1) \
F(WasmDebugBreak, 0, 1) \
F(WasmAllocateRtt, 2, 1)
......
......@@ -330,7 +330,8 @@ class LiftoffCompiler {
CompilationEnv* env, Zone* compilation_zone,
std::unique_ptr<AssemblerBuffer> buffer,
DebugSideTableBuilder* debug_sidetable_builder,
ForDebugging for_debugging, Vector<int> breakpoints = {},
ForDebugging for_debugging, int func_index,
Vector<int> breakpoints = {},
Vector<int> extra_source_pos = {})
: asm_(std::move(buffer)),
descriptor_(
......@@ -338,6 +339,7 @@ class LiftoffCompiler {
env_(env),
debug_sidetable_builder_(debug_sidetable_builder),
for_debugging_(for_debugging),
func_index_(func_index),
out_of_line_code_(compilation_zone),
source_position_table_builder_(compilation_zone),
protected_instructions_(compilation_zone),
......@@ -528,6 +530,11 @@ class LiftoffCompiler {
return false;
}
void TierUpFunction(FullDecoder* decoder) {
__ CallRuntimeStub(WasmCode::kWasmTriggerTierUp);
safepoint_table_builder_.DefineSafepoint(&asm_, Safepoint::kNoLazyDeopt);
}
void TraceFunctionEntry(FullDecoder* decoder) {
DEBUG_CODE_COMMENT("trace function entry");
__ SpillAllRegisters();
......@@ -611,6 +618,42 @@ class LiftoffCompiler {
// is never a position of any instruction in the function.
StackCheck(0);
if (FLAG_wasm_dynamic_tiering) {
// TODO(arobin): Avoid spilling registers unconditionally.
__ SpillAllRegisters();
DEBUG_CODE_COMMENT("dynamic tiering");
LiftoffRegList pinned;
// Get the number of calls array address.
LiftoffRegister array_address =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
LOAD_INSTANCE_FIELD(array_address.gp(), NumLiftoffFunctionCallsArray,
kSystemPointerSize);
// Compute the correct offset in the array.
uint32_t offset =
kInt32Size * declared_function_index(env_->module, func_index_);
// Get the number of calls and update it.
LiftoffRegister number_of_calls =
pinned.set(__ GetUnusedRegister(kGpReg, pinned));
__ Load(number_of_calls, array_address.gp(), no_reg, offset,
LoadType::kI32Load, pinned);
__ emit_i32_addi(number_of_calls.gp(), number_of_calls.gp(), 1);
__ Store(array_address.gp(), no_reg, offset, number_of_calls,
StoreType::kI32Store, pinned);
// Emit the runtime call if necessary.
Label no_tierup;
constexpr int kTierUpLimit = 5;
__ emit_i32_addi(number_of_calls.gp(), number_of_calls.gp(),
-kTierUpLimit);
// Unary "unequal" means "different from zero".
__ emit_cond_jump(kUnequal, &no_tierup, kWasmI32, number_of_calls.gp());
TierUpFunction(decoder);
__ bind(&no_tierup);
}
if (FLAG_trace_wasm) TraceFunctionEntry(decoder);
// If we are generating debug code, do check the "hook on function call"
......@@ -3726,6 +3769,7 @@ class LiftoffCompiler {
DebugSideTableBuilder* const debug_sidetable_builder_;
const ForDebugging for_debugging_;
LiftoffBailoutReason bailout_reason_ = kSuccess;
const int func_index_;
ZoneVector<OutOfLineCode> out_of_line_code_;
SourcePositionTableBuilder source_position_table_builder_;
ZoneVector<trap_handler::ProtectedInstructionData> protected_instructions_;
......@@ -3803,7 +3847,7 @@ WasmCompilationResult ExecuteLiftoffCompilation(
WasmFullDecoder<Decoder::kValidate, LiftoffCompiler> decoder(
&zone, env->module, env->enabled_features, detected, func_body,
call_descriptor, env, &zone, instruction_buffer->CreateView(),
debug_sidetable_builder.get(), for_debugging, breakpoints,
debug_sidetable_builder.get(), for_debugging, func_index, breakpoints,
extra_source_pos);
decoder.Decode();
liftoff_compile_time_scope.reset();
......@@ -3849,7 +3893,7 @@ WasmCompilationResult ExecuteLiftoffCompilation(
std::unique_ptr<DebugSideTable> GenerateLiftoffDebugSideTable(
AccountingAllocator* allocator, CompilationEnv* env,
const FunctionBody& func_body) {
const FunctionBody& func_body, int func_index) {
Zone zone(allocator, "LiftoffDebugSideTableZone");
auto call_descriptor = compiler::GetWasmCallDescriptor(&zone, func_body.sig);
DebugSideTableBuilder debug_sidetable_builder;
......@@ -3858,7 +3902,7 @@ std::unique_ptr<DebugSideTable> GenerateLiftoffDebugSideTable(
&zone, env->module, env->enabled_features, &detected, func_body,
call_descriptor, env, &zone,
NewAssemblerBuffer(AssemblerBase::kDefaultBufferSize),
&debug_sidetable_builder, kForDebugging);
&debug_sidetable_builder, kForDebugging, func_index);
decoder.Decode();
DCHECK(decoder.ok());
DCHECK(!decoder.interface().did_bailout());
......
......@@ -60,7 +60,7 @@ V8_EXPORT_PRIVATE WasmCompilationResult ExecuteLiftoffCompilation(
Vector<int> extra_source_pos = {});
V8_EXPORT_PRIVATE std::unique_ptr<DebugSideTable> GenerateLiftoffDebugSideTable(
AccountingAllocator*, CompilationEnv*, const FunctionBody&);
AccountingAllocator*, CompilationEnv*, const FunctionBody&, int func_index);
} // namespace wasm
} // namespace internal
......
......@@ -1013,6 +1013,15 @@ bool CompileLazy(Isolate* isolate, NativeModule* native_module,
return true;
}
void TriggerTierUp(Isolate* isolate, NativeModule* native_module,
int func_index) {
CompilationStateImpl* compilation_state =
Impl(native_module->compilation_state());
WasmCompilationUnit tiering_unit{func_index, ExecutionTier::kTurbofan,
kNoDebugging};
compilation_state->AddTopTierCompilationUnit(tiering_unit);
}
namespace {
void RecordStats(const Code code, Counters* counters) {
......
......@@ -65,6 +65,8 @@ WasmCode* CompileImportWrapper(
// also lazy.
bool CompileLazy(Isolate*, NativeModule*, int func_index);
void TriggerTierUp(Isolate*, NativeModule*, int func_index);
int GetMaxBackgroundTasks();
template <typename Key, typename Hash>
......
......@@ -799,6 +799,8 @@ NativeModule::NativeModule(WasmEngine* engine, const WasmFeatures& enabled,
if (module_->num_declared_functions > 0) {
code_table_ =
std::make_unique<WasmCode*[]>(module_->num_declared_functions);
num_liftoff_function_calls_ =
std::make_unique<uint32_t[]>(module_->num_declared_functions);
}
code_allocator_.Init(this);
}
......
......@@ -49,6 +49,7 @@ struct WasmModule;
#define WASM_RUNTIME_STUB_LIST(V, VTRAP) \
FOREACH_WASM_TRAPREASON(VTRAP) \
V(WasmCompileLazy) \
V(WasmTriggerTierUp) \
V(WasmDebugBreak) \
V(WasmInt32ToHeapNumber) \
V(WasmTaggedNonSmiToInt32) \
......@@ -641,6 +642,10 @@ class V8_EXPORT_PRIVATE NativeModule final {
// Get or create the debug info for this NativeModule.
DebugInfo* GetDebugInfo();
uint32_t* num_liftoff_function_calls_array() {
return num_liftoff_function_calls_.get();
}
private:
friend class WasmCode;
friend class WasmCodeAllocator;
......@@ -725,6 +730,9 @@ class V8_EXPORT_PRIVATE NativeModule final {
// A cache of the import wrappers, keyed on the kind and signature.
std::unique_ptr<WasmImportWrapperCache> import_wrapper_cache_;
// Array to handle number of function calls.
std::unique_ptr<uint32_t[]> num_liftoff_function_calls_;
// This mutex protects concurrent calls to {AddCode} and friends.
mutable base::Mutex allocation_mutex_;
......
......@@ -697,7 +697,8 @@ class DebugInfoImpl {
FunctionBody func_body{function->sig, 0, function_bytes.begin(),
function_bytes.end()};
std::unique_ptr<DebugSideTable> debug_side_table =
GenerateLiftoffDebugSideTable(allocator, &env, func_body);
GenerateLiftoffDebugSideTable(allocator, &env, func_body,
code->index());
DebugSideTable* ret = debug_side_table.get();
// Check cache again, maybe another thread concurrently generated a debug
......
......@@ -232,6 +232,8 @@ PRIMITIVE_ACCESSORS(WasmInstanceObject, dropped_elem_segments, byte*,
kDroppedElemSegmentsOffset)
PRIMITIVE_ACCESSORS(WasmInstanceObject, hook_on_function_call_address, Address,
kHookOnFunctionCallAddressOffset)
PRIMITIVE_ACCESSORS(WasmInstanceObject, num_liftoff_function_calls_array,
uint32_t*, kNumLiftoffFunctionCallsArrayOffset)
ACCESSORS(WasmInstanceObject, module_object, WasmModuleObject,
kModuleObjectOffset)
......
......@@ -1220,6 +1220,8 @@ Handle<WasmInstanceObject> WasmInstanceObject::New(
instance->set_hook_on_function_call_address(
isolate->debug()->hook_on_function_call_address());
instance->set_managed_object_maps(*isolate->factory()->empty_fixed_array());
instance->set_num_liftoff_function_calls_array(
module_object->native_module()->num_liftoff_function_calls_array());
// Insert the new instance into the scripts weak list of instances. This list
// is used for breakpoints affecting all instances belonging to the script.
......
......@@ -401,6 +401,7 @@ class V8_EXPORT_PRIVATE WasmInstanceObject : public JSObject {
DECL_PRIMITIVE_ACCESSORS(data_segment_sizes, uint32_t*)
DECL_PRIMITIVE_ACCESSORS(dropped_elem_segments, byte*)
DECL_PRIMITIVE_ACCESSORS(hook_on_function_call_address, Address)
DECL_PRIMITIVE_ACCESSORS(num_liftoff_function_calls_array, uint32_t*)
// Clear uninitialized padding space. This ensures that the snapshot content
// is deterministic. Depending on the V8 build mode there could be no padding.
......@@ -448,6 +449,7 @@ class V8_EXPORT_PRIVATE WasmInstanceObject : public JSObject {
V(kDataSegmentSizesOffset, kSystemPointerSize) \
V(kDroppedElemSegmentsOffset, kSystemPointerSize) \
V(kHookOnFunctionCallAddressOffset, kSystemPointerSize) \
V(kNumLiftoffFunctionCallsArrayOffset, kSystemPointerSize) \
V(kHeaderSize, 0)
DEFINE_FIELD_OFFSET_CONSTANTS(JSObject::kHeaderSize,
......
......@@ -83,7 +83,7 @@ class LiftoffCompileEnvironment {
if (breakpoints.empty()) {
std::unique_ptr<DebugSideTable> debug_side_table =
GenerateLiftoffDebugSideTable(CcTest::i_isolate()->allocator(), &env,
test_func.body);
test_func.body, 0);
CheckTableEquals(*debug_side_table, *debug_side_table_via_compilation);
}
......
......@@ -847,6 +847,11 @@
# multiple isolates (https://crbug.com/v8/10099).
'wasm/tier-down-to-liftoff': [SKIP],
# Tier down/up Wasm functions is non-deterministic with
# multiple isolates, as dynamic tiering relies on a array shared
# in the module, that can be modified by all instances.
'wasm/wasm-dynamic-tiering': [SKIP],
# waitAsync tests modify the global state (across Isolates)
'harmony/atomics-waitasync': [SKIP],
'harmony/atomics-waitasync-1thread-2timeout': [SKIP],
......
// Copyright 2019 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax --wasm-dynamic-tiering --liftoff
// Flags: --no-wasm-tier-up --no-stress-opt
load('test/mjsunit/wasm/wasm-module-builder.js');
const num_iterations = 5;
const num_functions = 2;
const builder = new WasmModuleBuilder();
for (let i = 0; i < num_functions; ++i) {
let kFunction = builder.addFunction('f' + i, kSig_i_v)
.addBody(wasmI32Const(i))
.exportAs('f' + i)
}
let instance = builder.instantiate();
for (let i = 0; i < num_iterations - 1; ++i) {
instance.exports.f0();
instance.exports.f1();
}
assertTrue(%IsLiftoffFunction(instance.exports.f0));
assertTrue(%IsLiftoffFunction(instance.exports.f1));
instance.exports.f1();
// Busy waiting until the function is tiered up.
while (true) {
if (!%IsLiftoffFunction(instance.exports.f1)) {
break;
}
}
assertTrue(%IsLiftoffFunction(instance.exports.f0));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment