Commit af1b9a93 authored by Andreas Haas's avatar Andreas Haas Committed by V8 LUCI CQ

[wasm] Introduce a caching threshold

With dynamic tiering, typically not all functions of a WebAssembly
module get compiled with TurboFan, and therefore the code caching would
never get triggered. With this CL code caching is triggered whenever
{FLAG_wasm_caching_threshold} bytes of TurboFan code are generated.

This new caching event is only triggered when --wasm-dynamic-tiering is
enabled.

R=clemensb@chromium.org

Bug: v8:12281
Change-Id: I939325aea7e4310aa76c936636799661c05d4079
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3202593Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77251}
parent bab8254c
......@@ -955,6 +955,9 @@ DEFINE_BOOL(wasm_tier_up, true,
"have an effect)")
DEFINE_BOOL(wasm_dynamic_tiering, false,
"enable dynamic tier up to the optimizing compiler")
DEFINE_INT(
wasm_caching_threshold, 1000000,
"the amount of wasm top tier code that triggers the next caching event")
DEFINE_DEBUG_BOOL(trace_wasm_decoder, false, "trace decoding of wasm code")
DEFINE_DEBUG_BOOL(trace_wasm_compiler, false, "trace compiling of wasm code")
DEFINE_DEBUG_BOOL(trace_wasm_interpreter, false,
......
......@@ -105,6 +105,7 @@ class WireBytesStorage {
enum class CompilationEvent : uint8_t {
kFinishedBaselineCompilation,
kFinishedExportWrappers,
kFinishedCompilationChunk,
kFinishedTopTierCompilation,
kFailedCompilation,
kFinishedRecompilation
......
......@@ -746,6 +746,9 @@ class CompilationStateImpl {
int outstanding_baseline_units_ = 0;
int outstanding_export_wrappers_ = 0;
int outstanding_top_tier_functions_ = 0;
// The amount of generated top tier code since the last
// {kFinishedCompilationChunk} event.
size_t bytes_since_last_chunk = 0;
std::vector<uint8_t> compilation_progress_;
int outstanding_recompilation_functions_ = 0;
......@@ -2095,8 +2098,12 @@ class AsyncCompileJob::CompilationStateCallback {
: nullptr);
}
break;
case CompilationEvent::kFinishedCompilationChunk:
DCHECK(CompilationEvent::kFinishedBaselineCompilation == last_event_ ||
CompilationEvent::kFinishedCompilationChunk == last_event_);
break;
case CompilationEvent::kFinishedTopTierCompilation:
DCHECK_EQ(CompilationEvent::kFinishedBaselineCompilation, last_event_);
DCHECK(CompilationEvent::kFinishedBaselineCompilation == last_event_);
// At this point, the job will already be gone, thus do not access it
// here.
break;
......@@ -3191,6 +3198,10 @@ void CompilationStateImpl::CommitTopTierCompilationUnit(
void CompilationStateImpl::AddTopTierPriorityCompilationUnit(
WasmCompilationUnit unit, size_t priority) {
compilation_unit_queues_.AddTopTierPriorityUnit(unit, priority);
{
base::MutexGuard guard(&callbacks_mutex_);
outstanding_top_tier_functions_++;
}
compile_job_->NotifyConcurrencyIncrease();
}
......@@ -3303,6 +3314,9 @@ void CompilationStateImpl::OnFinishedUnits(
DCHECK_GT(outstanding_baseline_units_, 0);
outstanding_baseline_units_--;
}
if (code->tier() == ExecutionTier::kTurbofan) {
bytes_since_last_chunk += code->instructions().size();
}
if (reached_tier < required_top_tier &&
required_top_tier <= code->tier()) {
DCHECK_GT(outstanding_top_tier_functions_, 0);
......@@ -3356,12 +3370,17 @@ void CompilationStateImpl::TriggerCallbacks(
triggered_events.Add(CompilationEvent::kFinishedExportWrappers);
if (outstanding_baseline_units_ == 0) {
triggered_events.Add(CompilationEvent::kFinishedBaselineCompilation);
if (outstanding_top_tier_functions_ == 0) {
if (!FLAG_wasm_dynamic_tiering && outstanding_top_tier_functions_ == 0) {
triggered_events.Add(CompilationEvent::kFinishedTopTierCompilation);
}
}
}
if (static_cast<size_t>(FLAG_wasm_caching_threshold) <
bytes_since_last_chunk) {
triggered_events.Add(CompilationEvent::kFinishedCompilationChunk);
bytes_since_last_chunk = 0;
}
if (compile_failed_.load(std::memory_order_relaxed)) {
// *Only* trigger the "failed" event.
triggered_events =
......@@ -3372,9 +3391,11 @@ void CompilationStateImpl::TriggerCallbacks(
// Don't trigger past events again.
triggered_events -= finished_events_;
// Recompilation can happen multiple times, thus do not store this.
finished_events_ |=
triggered_events - CompilationEvent::kFinishedRecompilation;
// Recompilation can happen multiple times, thus do not store this. There can
// also be multiple compilation chunks.
finished_events_ |= triggered_events -
CompilationEvent::kFinishedRecompilation -
CompilationEvent::kFinishedCompilationChunk;
for (auto event :
{std::make_pair(CompilationEvent::kFailedCompilation,
......@@ -3385,6 +3406,8 @@ void CompilationStateImpl::TriggerCallbacks(
"wasm.BaselineFinished"),
std::make_pair(CompilationEvent::kFinishedTopTierCompilation,
"wasm.TopTierFinished"),
std::make_pair(CompilationEvent::kFinishedCompilationChunk,
"wasm.CompilationChunkFinished"),
std::make_pair(CompilationEvent::kFinishedRecompilation,
"wasm.RecompilationFinished")}) {
if (!triggered_events.contains(event.first)) continue;
......@@ -3395,7 +3418,11 @@ void CompilationStateImpl::TriggerCallbacks(
}
}
if (outstanding_baseline_units_ == 0 && outstanding_export_wrappers_ == 0 &&
// With dynamic tiering, we don't know if we can ever delete the callback.
// TODO(https://crbug.com/v8/12289): Release some callbacks also when dynamic
// tiering is enabled.
if (!FLAG_wasm_dynamic_tiering && outstanding_baseline_units_ == 0 &&
outstanding_export_wrappers_ == 0 &&
outstanding_top_tier_functions_ == 0 &&
outstanding_recompilation_functions_ == 0) {
// Clear the callbacks because no more events will be delivered.
......
......@@ -312,33 +312,29 @@ void AsyncStreamingDecoder::Abort() {
namespace {
class TopTierCompiledCallback {
class CompilationChunkFinishedCallback {
public:
TopTierCompiledCallback(
CompilationChunkFinishedCallback(
std::weak_ptr<NativeModule> native_module,
AsyncStreamingDecoder::ModuleCompiledCallback callback)
: native_module_(std::move(native_module)),
callback_(std::move(callback)) {}
void operator()(CompilationEvent event) const {
if (event != CompilationEvent::kFinishedTopTierCompilation) return;
if (event != CompilationEvent::kFinishedCompilationChunk &&
event != CompilationEvent::kFinishedTopTierCompilation) {
return;
}
// If the native module is still alive, get back a shared ptr and call the
// callback.
if (std::shared_ptr<NativeModule> native_module = native_module_.lock()) {
callback_(native_module);
}
#ifdef DEBUG
DCHECK(!called_);
called_ = true;
#endif
}
private:
const std::weak_ptr<NativeModule> native_module_;
const AsyncStreamingDecoder::ModuleCompiledCallback callback_;
#ifdef DEBUG
mutable bool called_ = false;
#endif
};
} // namespace
......@@ -347,7 +343,7 @@ void AsyncStreamingDecoder::NotifyNativeModuleCreated(
const std::shared_ptr<NativeModule>& native_module) {
if (!module_compiled_callback_) return;
auto* comp_state = native_module->compilation_state();
comp_state->AddCallback(TopTierCompiledCallback{
comp_state->AddCallback(CompilationChunkFinishedCallback{
std::move(native_module), std::move(module_compiled_callback_)});
module_compiled_callback_ = {};
}
......
......@@ -1240,4 +1240,13 @@
'test-calls-with-arraylike-or-spread/*': [SKIP],
}],
################################################################################
['variant == stress', {
# The 'stress' variants sets the '--stress-opt' d8 flag, which executes 2 runs
# in debug mode and 5 runs in release mode. Hence the module will be cached
# between runs, and the correct caching behavior cannot be observed anymore in
# the later runs.
'test-streaming-compilation/AsyncTestIncrementalCaching': [SKIP],
'test-streaming-compilation/SingleThreadedTestIncrementalCaching': [SKIP],
}],
]
......@@ -20,6 +20,7 @@
#include "test/common/wasm/flag-utils.h"
#include "test/common/wasm/test-signatures.h"
#include "test/common/wasm/wasm-macro-gen.h"
#include "test/common/wasm/wasm-module-runner.h"
namespace v8 {
namespace internal {
......@@ -1108,6 +1109,105 @@ STREAM_TEST(TestModuleWithImportedFunction) {
CHECK(tester.IsPromiseFulfilled());
}
STREAM_TEST(TestIncrementalCaching) {
FLAG_VALUE_SCOPE(wasm_dynamic_tiering, true);
FLAG_VALUE_SCOPE(wasm_tier_up, false);
constexpr int threshold = 10;
FlagScope<int> caching_treshold(&FLAG_wasm_caching_threshold, threshold);
StreamTester tester(isolate);
int call_cache_counter = 0;
tester.stream()->SetModuleCompiledCallback(
[&call_cache_counter](
const std::shared_ptr<i::wasm::NativeModule>& native_module) {
call_cache_counter++;
});
ZoneBuffer buffer(tester.zone());
TestSignatures sigs;
WasmModuleBuilder builder(tester.zone());
builder.SetMinMemorySize(1);
base::Vector<const char> function_names[] = {
base::CStrVector("f0"), base::CStrVector("f1"), base::CStrVector("f2")};
for (int i = 0; i < 3; ++i) {
WasmFunctionBuilder* f = builder.AddFunction(sigs.v_v());
constexpr int64_t val = 0x123456789abc;
constexpr int index = 0x1234;
uint8_t store_mem[] = {
WASM_STORE_MEM(MachineType::Int64(), WASM_I32V(index), WASM_I64V(val))};
constexpr uint32_t kStoreLength = 20;
CHECK_EQ(kStoreLength, arraysize(store_mem));
// Produce a store {threshold} many times to reach the caching threshold.
constexpr uint32_t kCodeLength = kStoreLength * threshold + 1;
uint8_t code[kCodeLength];
for (int j = 0; j < threshold; ++j) {
memcpy(code + (j * kStoreLength), store_mem, kStoreLength);
}
code[kCodeLength - 1] = WasmOpcode::kExprEnd;
f->EmitCode(code, kCodeLength);
builder.AddExport(function_names[i], f);
}
builder.WriteTo(&buffer);
tester.OnBytesReceived(buffer.begin(), buffer.end() - buffer.begin());
tester.FinishStream();
tester.RunCompilerTasks();
CHECK(tester.IsPromiseFulfilled());
tester.native_module();
constexpr base::Vector<const char> kNoSourceUrl{"", 0};
Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate);
Handle<Script> script = GetWasmEngine()->GetOrCreateScript(
i_isolate, tester.native_module(), kNoSourceUrl);
Handle<FixedArray> export_wrappers = i_isolate->factory()->NewFixedArray(3);
Handle<WasmModuleObject> module_object = WasmModuleObject::New(
i_isolate, tester.native_module(), script, export_wrappers);
ErrorThrower thrower(i_isolate, "Instantiation");
// We instantiated before, so the second instantiation must also succeed:
Handle<WasmInstanceObject> instance =
GetWasmEngine()
->SyncInstantiate(i_isolate, &thrower, module_object, {}, {})
.ToHandleChecked();
CHECK(!thrower.error());
WasmCodeRefScope code_scope;
CHECK(tester.native_module()->GetCode(0)->is_liftoff());
CHECK(tester.native_module()->GetCode(1)->is_liftoff());
CHECK(tester.native_module()->GetCode(2)->is_liftoff());
// No TurboFan compilation happened yet, and therefore no call to the cache.
CHECK_EQ(0, call_cache_counter);
bool exception = false;
// The tier-up threshold is hard-coded right now.
constexpr int tier_up_threshold = 4;
for (int i = 0; i < tier_up_threshold; ++i) {
testing::CallWasmFunctionForTesting(i_isolate, instance, "f0", 0, nullptr,
&exception);
}
tester.RunCompilerTasks();
CHECK(!tester.native_module()->GetCode(0)->is_liftoff());
CHECK(tester.native_module()->GetCode(1)->is_liftoff());
CHECK(tester.native_module()->GetCode(2)->is_liftoff());
CHECK_EQ(1, call_cache_counter);
size_t serialized_size;
{
i::wasm::WasmSerializer serializer(tester.native_module().get());
serialized_size = serializer.GetSerializedNativeModuleSize();
}
for (int i = 0; i < tier_up_threshold; ++i) {
testing::CallWasmFunctionForTesting(i_isolate, instance, "f1", 0, nullptr,
&exception);
}
tester.RunCompilerTasks();
CHECK(!tester.native_module()->GetCode(0)->is_liftoff());
CHECK(!tester.native_module()->GetCode(1)->is_liftoff());
CHECK(tester.native_module()->GetCode(2)->is_liftoff());
CHECK_EQ(2, call_cache_counter);
{
i::wasm::WasmSerializer serializer(tester.native_module().get());
CHECK_LT(serialized_size, serializer.GetSerializedNativeModuleSize());
}
}
STREAM_TEST(TestModuleWithErrorAfterDataSection) {
StreamTester tester(isolate);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment