Commit 4e983705 authored by Clemens Backes's avatar Clemens Backes Committed by V8 LUCI CQ

[wasm] Tweak constants for estimating code space size

It turned out that on arm and arm64 we over-estimated the code size of a
Wasm module quite a bit. This CL adds some more output for the
--trace-wasm-compilation-times flag, and adds a script to compute the
factors we use for code size estimates from that output.
I ran the script on a few benchmarks (an older Epic module, the current
Photoshop module, and the benchmark from the linked bug), and adjusted
the constants accordingly.

Also, simplify the API of {ReservationSize} to only return a single
number, and fail internally if we need to allocate more than the engine
supports (which would only fail for artificially large modules).

R=jkummerow@chromium.org

Bug: chromium:1302310
Change-Id: I5b2c27ff3e360fb6738cf5dd697bcee09e106b6d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3522067Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Reviewed-by: 's avatarMaya Lekova <mslekova@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/main@{#79482}
parent 7ff96832
...@@ -3217,8 +3217,10 @@ void Pipeline::GenerateCodeForWasmFunction( ...@@ -3217,8 +3217,10 @@ void Pipeline::GenerateCodeForWasmFunction(
<< time.InMilliseconds() << " ms and " << time.InMilliseconds() << " ms and "
<< zone_stats.GetMaxAllocatedBytes() << " / " << zone_stats.GetMaxAllocatedBytes() << " / "
<< zone_stats.GetTotalAllocatedBytes() << zone_stats.GetTotalAllocatedBytes()
<< " max/total bytes, codesize " << codesize << " name " << " max/total bytes; bodysize "
<< data.info()->GetDebugName().get() << std::endl; << function_body.end - function_body.start << " codesize "
<< codesize << " name " << data.info()->GetDebugName().get()
<< std::endl;
} }
DCHECK(result->succeeded()); DCHECK(result->succeeded());
......
...@@ -8064,6 +8064,11 @@ wasm::WasmCompilationResult CompileWasmImportCallWrapper( ...@@ -8064,6 +8064,11 @@ wasm::WasmCompilationResult CompileWasmImportCallWrapper(
TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"), TRACE_EVENT0(TRACE_DISABLED_BY_DEFAULT("v8.wasm.detailed"),
"wasm.CompileWasmImportCallWrapper"); "wasm.CompileWasmImportCallWrapper");
base::TimeTicks start_time;
if (V8_UNLIKELY(FLAG_trace_wasm_compilation_times)) {
start_time = base::TimeTicks::Now();
}
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
// Create the Graph // Create the Graph
//---------------------------------------------------------------------------- //----------------------------------------------------------------------------
...@@ -8099,9 +8104,19 @@ wasm::WasmCompilationResult CompileWasmImportCallWrapper( ...@@ -8099,9 +8104,19 @@ wasm::WasmCompilationResult CompileWasmImportCallWrapper(
if (machine->Is32()) { if (machine->Is32()) {
incoming = GetI32WasmCallDescriptor(&zone, incoming); incoming = GetI32WasmCallDescriptor(&zone, incoming);
} }
return Pipeline::GenerateCodeForWasmNativeStub( wasm::WasmCompilationResult result = Pipeline::GenerateCodeForWasmNativeStub(
incoming, mcgraph, CodeKind::WASM_TO_JS_FUNCTION, func_name, incoming, mcgraph, CodeKind::WASM_TO_JS_FUNCTION, func_name,
WasmStubAssemblerOptions(), source_position_table); WasmStubAssemblerOptions(), source_position_table);
if (V8_UNLIKELY(FLAG_trace_wasm_compilation_times)) {
base::TimeDelta time = base::TimeTicks::Now() - start_time;
int codesize = result.code_desc.body_size();
StdoutStream{} << "Compiled WasmToJS wrapper " << func_name << ", took "
<< time.InMilliseconds() << " ms; codesize " << codesize
<< std::endl;
}
return result;
} }
wasm::WasmCode* CompileWasmCapiCallWrapper(wasm::NativeModule* native_module, wasm::WasmCode* CompileWasmCapiCallWrapper(wasm::NativeModule* native_module,
......
...@@ -599,28 +599,39 @@ size_t OverheadPerCodeSpace(uint32_t num_declared_functions) { ...@@ -599,28 +599,39 @@ size_t OverheadPerCodeSpace(uint32_t num_declared_functions) {
return overhead; return overhead;
} }
// Returns both the minimum size to reserve, and an estimate how much should be // Returns an estimate how much code space should be reserved.
// reserved. size_t ReservationSize(size_t code_size_estimate, int num_declared_functions,
std::pair<size_t, size_t> ReservationSize(size_t code_size_estimate, size_t total_reserved) {
int num_declared_functions,
size_t total_reserved) {
size_t overhead = OverheadPerCodeSpace(num_declared_functions); size_t overhead = OverheadPerCodeSpace(num_declared_functions);
// Reserve a power of two at least as big as any of // Reserve the maximum of
// a) needed size + overhead (this is the minimum needed) // a) needed size + overhead (this is the minimum needed)
// b) 2 * overhead (to not waste too much space by overhead) // b) 2 * overhead (to not waste too much space by overhead)
// c) 1/4 of current total reservation size (to grow exponentially) // c) 1/4 of current total reservation size (to grow exponentially)
size_t minimum_size = 2 * overhead; size_t minimum_size = 2 * overhead;
size_t suggested_size = base::bits::RoundUpToPowerOfTwo( size_t suggested_size =
std::max(std::max(RoundUp<kCodeAlignment>(code_size_estimate) + overhead, std::max(std::max(RoundUp<kCodeAlignment>(code_size_estimate) + overhead,
minimum_size), minimum_size),
total_reserved / 4)); total_reserved / 4);
if (V8_UNLIKELY(minimum_size > WasmCodeAllocator::kMaxCodeSpaceSize)) {
constexpr auto format = base::StaticCharVector(
"wasm code reservation: required minimum (%zu) is bigger than "
"supported maximum (%zu)");
constexpr int kMaxMessageLength =
format.size() - 6 + 2 * std::numeric_limits<size_t>::digits10;
base::EmbeddedVector<char, kMaxMessageLength + 1> message;
SNPrintF(message, format.begin(), minimum_size,
WasmCodeAllocator::kMaxCodeSpaceSize);
V8::FatalProcessOutOfMemory(nullptr, message.begin());
UNREACHABLE();
}
// Limit by the maximum supported code space size. // Limit by the maximum supported code space size.
size_t reserve_size = size_t reserve_size =
std::min(WasmCodeAllocator::kMaxCodeSpaceSize, suggested_size); std::min(WasmCodeAllocator::kMaxCodeSpaceSize, suggested_size);
return {minimum_size, reserve_size}; return reserve_size;
} }
#ifdef DEBUG #ifdef DEBUG
...@@ -709,14 +720,18 @@ base::Vector<byte> WasmCodeAllocator::AllocateForCodeInRegion( ...@@ -709,14 +720,18 @@ base::Vector<byte> WasmCodeAllocator::AllocateForCodeInRegion(
size_t total_reserved = 0; size_t total_reserved = 0;
for (auto& vmem : owned_code_space_) total_reserved += vmem.size(); for (auto& vmem : owned_code_space_) total_reserved += vmem.size();
size_t min_reservation; size_t reserve_size = ReservationSize(
size_t reserve_size;
std::tie(min_reservation, reserve_size) = ReservationSize(
size, native_module->module()->num_declared_functions, total_reserved); size, native_module->module()->num_declared_functions, total_reserved);
VirtualMemory new_mem = VirtualMemory new_mem =
code_manager->TryAllocate(reserve_size, reinterpret_cast<void*>(hint)); code_manager->TryAllocate(reserve_size, reinterpret_cast<void*>(hint));
if (!new_mem.IsReserved() || new_mem.size() < min_reservation) { if (!new_mem.IsReserved()) {
V8::FatalProcessOutOfMemory(nullptr, "wasm code reservation"); constexpr auto format = base::StaticCharVector(
"Cannot allocate more code space (%zu bytes, currently %zu)");
constexpr int kMaxMessageLength =
format.size() - 6 + 2 * std::numeric_limits<size_t>::digits10;
base::EmbeddedVector<char, kMaxMessageLength + 1> message;
SNPrintF(message, format.begin(), total_reserved, reserve_size);
V8::FatalProcessOutOfMemory(nullptr, message.begin());
UNREACHABLE(); UNREACHABLE();
} }
...@@ -2000,47 +2015,43 @@ namespace { ...@@ -2000,47 +2015,43 @@ namespace {
// separate code spaces being allocated (compile time and runtime overhead), // separate code spaces being allocated (compile time and runtime overhead),
// choosing them too large results in over-reservation (virtual address space // choosing them too large results in over-reservation (virtual address space
// only). // only).
// The current numbers have been determined on 2019-11-11 by clemensb@, based // In doubt, choose the numbers slightly too large, because over-reservation is
// on one small and one large module compiled from C++ by Emscripten. If in // less critical than multiple separate code spaces (especially on 64-bit).
// doubt, they where chosen slightly larger than required, as over-reservation // Numbers can be determined by running benchmarks with
// is not a big issue currently. // --trace-wasm-compilation-times, and piping the output through
// Numbers will change when Liftoff or TurboFan evolve, other toolchains are // tools/wasm/code-size-factors.py.
// used to produce the wasm code, or characteristics of wasm modules on the
// web change. They might require occasional tuning.
// This patch might help to find reasonable numbers for any future adaptation:
// https://crrev.com/c/1910945
#if V8_TARGET_ARCH_X64 #if V8_TARGET_ARCH_X64
constexpr size_t kTurbofanFunctionOverhead = 20; constexpr size_t kTurbofanFunctionOverhead = 24;
constexpr size_t kTurbofanCodeSizeMultiplier = 3; constexpr size_t kTurbofanCodeSizeMultiplier = 3;
constexpr size_t kLiftoffFunctionOverhead = 60; constexpr size_t kLiftoffFunctionOverhead = 56;
constexpr size_t kLiftoffCodeSizeMultiplier = 4; constexpr size_t kLiftoffCodeSizeMultiplier = 4;
constexpr size_t kImportSize = 350; constexpr size_t kImportSize = 640;
#elif V8_TARGET_ARCH_IA32 #elif V8_TARGET_ARCH_IA32
constexpr size_t kTurbofanFunctionOverhead = 20; constexpr size_t kTurbofanFunctionOverhead = 20;
constexpr size_t kTurbofanCodeSizeMultiplier = 4; constexpr size_t kTurbofanCodeSizeMultiplier = 4;
constexpr size_t kLiftoffFunctionOverhead = 60; constexpr size_t kLiftoffFunctionOverhead = 48;
constexpr size_t kLiftoffCodeSizeMultiplier = 5; constexpr size_t kLiftoffCodeSizeMultiplier = 5;
constexpr size_t kImportSize = 480; constexpr size_t kImportSize = 320;
#elif V8_TARGET_ARCH_ARM #elif V8_TARGET_ARCH_ARM
constexpr size_t kTurbofanFunctionOverhead = 40; constexpr size_t kTurbofanFunctionOverhead = 44;
constexpr size_t kTurbofanCodeSizeMultiplier = 4; constexpr size_t kTurbofanCodeSizeMultiplier = 4;
constexpr size_t kLiftoffFunctionOverhead = 108; constexpr size_t kLiftoffFunctionOverhead = 96;
constexpr size_t kLiftoffCodeSizeMultiplier = 7; constexpr size_t kLiftoffCodeSizeMultiplier = 5;
constexpr size_t kImportSize = 750; constexpr size_t kImportSize = 550;
#elif V8_TARGET_ARCH_ARM64 #elif V8_TARGET_ARCH_ARM64
constexpr size_t kTurbofanFunctionOverhead = 60; constexpr size_t kTurbofanFunctionOverhead = 40;
constexpr size_t kTurbofanCodeSizeMultiplier = 4;
constexpr size_t kLiftoffFunctionOverhead = 80;
constexpr size_t kLiftoffCodeSizeMultiplier = 7;
constexpr size_t kImportSize = 750;
#else
// Other platforms should add their own estimates if needed. Numbers below are
// the minimum of other architectures.
constexpr size_t kTurbofanFunctionOverhead = 20;
constexpr size_t kTurbofanCodeSizeMultiplier = 3; constexpr size_t kTurbofanCodeSizeMultiplier = 3;
constexpr size_t kLiftoffFunctionOverhead = 60; constexpr size_t kLiftoffFunctionOverhead = 68;
constexpr size_t kLiftoffCodeSizeMultiplier = 4; constexpr size_t kLiftoffCodeSizeMultiplier = 4;
constexpr size_t kImportSize = 350; constexpr size_t kImportSize = 750;
#else
// Other platforms should add their own estimates for best performance. Numbers
// below are the maximum of other architectures.
constexpr size_t kTurbofanFunctionOverhead = 44;
constexpr size_t kTurbofanCodeSizeMultiplier = 4;
constexpr size_t kLiftoffFunctionOverhead = 96;
constexpr size_t kLiftoffCodeSizeMultiplier = 5;
constexpr size_t kImportSize = 750;
#endif #endif
} // namespace } // namespace
...@@ -2179,9 +2190,7 @@ std::shared_ptr<NativeModule> WasmCodeManager::NewNativeModule( ...@@ -2179,9 +2190,7 @@ std::shared_ptr<NativeModule> WasmCodeManager::NewNativeModule(
committed + (max_committed_code_space_ - committed) / 2); committed + (max_committed_code_space_ - committed) / 2);
} }
size_t min_code_size; size_t code_vmem_size =
size_t code_vmem_size;
std::tie(min_code_size, code_vmem_size) =
ReservationSize(code_size_estimate, module->num_declared_functions, 0); ReservationSize(code_size_estimate, module->num_declared_functions, 0);
// The '--wasm-max-initial-code-space-reservation' testing flag can be used to // The '--wasm-max-initial-code-space-reservation' testing flag can be used to
...@@ -2192,18 +2201,6 @@ std::shared_ptr<NativeModule> WasmCodeManager::NewNativeModule( ...@@ -2192,18 +2201,6 @@ std::shared_ptr<NativeModule> WasmCodeManager::NewNativeModule(
if (flag_max_bytes < code_vmem_size) code_vmem_size = flag_max_bytes; if (flag_max_bytes < code_vmem_size) code_vmem_size = flag_max_bytes;
} }
// If we cannot allocate enough code space, fail with an OOM message.
if (code_vmem_size < min_code_size) {
constexpr auto format = base::StaticCharVector(
"NewNativeModule cannot allocate required minimum (%zu)");
constexpr int kMaxMessageLength =
format.size() - 3 + std::numeric_limits<size_t>::digits10;
base::EmbeddedVector<char, kMaxMessageLength + 1> message;
SNPrintF(message, format.begin(), min_code_size);
V8::FatalProcessOutOfMemory(isolate, message.begin());
UNREACHABLE();
}
// Try up to two times; getting rid of dead JSArrayBuffer allocations might // Try up to two times; getting rid of dead JSArrayBuffer allocations might
// require two GCs because the first GC maybe incremental and may have // require two GCs because the first GC maybe incremental and may have
// floating garbage. // floating garbage.
......
...@@ -70,13 +70,13 @@ class Function: ...@@ -70,13 +70,13 @@ class Function:
self.has_tf = True self.has_tf = True
# 0 1 2 3 4 5 6 7 8 9 10 11 # 0 1 2 3 4 5 6 7 8 9 10 11
# Compiled function #6 using TurboFan, took 0 ms and 14440 / 44656 # Compiled function #6 using TurboFan, took 0 ms and 14440 / 44656
# 12 13 14 15 16 17 # 12 13 14 15 16 17 18 19
# max/total bytes, codesize 24 name wasm-function#6 # max/total bytes; bodysize 12 codesize 24 name wasm-function#6
self.time_tf = int(words[6]) self.time_tf = int(words[6])
self.mem_tf_max = int(words[9]) self.mem_tf_max = int(words[9])
self.mem_tf_total = int(words[11]) self.mem_tf_total = int(words[11])
self.size_tf = int(words[15]) self.size_tf = int(words[17])
self.name = words[17] self.name = words[19]
def AddLiftoffLine(self, words): def AddLiftoffLine(self, words):
assert self.index == words[2], "wrong function" assert self.index == words[2], "wrong function"
...@@ -109,7 +109,8 @@ if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help", "help"): ...@@ -109,7 +109,8 @@ if len(sys.argv) < 2 or sys.argv[1] in ("-h", "--help", "help"):
with open(sys.argv[1], "r") as f: with open(sys.argv[1], "r") as f:
for line in f.readlines(): for line in f.readlines():
words = line.strip().split(" ") words = line.strip().split(" ")
if words[0] != "Compiled": continue if words[0] != "Compiled" or words[1] != "function":
continue
name = words[2] name = words[2]
RegisterName(name) RegisterName(name)
if name in funcs_dict: if name in funcs_dict:
......
#!/usr/bin/env python3
# vim:fenc=utf-8:ts=2:sw=2:softtabstop=2:expandtab:
# Copyright 2022 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
import sys
import re
liftoff_regex = re.compile('^Compiled function .* using Liftoff, '
'.*bodysize ([0-9]+) codesize ([0-9]+)$')
turbofan_regex = re.compile('^Compiled function .* using TurboFan, '
'.*bodysize ([0-9]+) codesize ([0-9]+) ')
wasm2js_regex = re.compile('^Compiled WasmToJS wrapper .* '
'codesize ([0-9]+)$')
def main():
print('Reading --trace-wasm-compilation-times lines from stdin...')
liftoff_values = []
turbofan_values = []
wasm2js_values = []
for line in sys.stdin:
match(line, liftoff_regex, liftoff_values)
match(line, turbofan_regex, turbofan_values)
match_wasm2js(line, wasm2js_values)
evaluate('Liftoff', liftoff_values)
evaluate('TurboFan', turbofan_values)
evaluate_wasm2js(wasm2js_values)
def match(line, regex, array):
m = regex.match(line)
if m:
array.append([int(m.group(1)), int(m.group(2))])
def match_wasm2js(line, array):
m = wasm2js_regex.match(line)
if m:
array.append(int(m.group(1)))
def evaluate(name, values):
n = len(values)
if n == 0:
print(f'No values for {name}')
return
print(f'Computing base and factor for {name} based on {n} values')
sum_xy = sum(x * y for [x, y] in values)
sum_x = sum(x for [x, y] in values)
sum_y = sum(y for [x, y] in values)
sum_xx = sum(x * x for [x, y] in values)
factor = (n * sum_xy - sum_x * sum_y) / (n * sum_xx - sum_x * sum_x)
base = (sum_y - factor * sum_x) / n
print(f'--> [{name}] Trend line: base: {base:.2f}, factor {factor:.2f}')
min_y = min(y for [x, y] in values)
simple_factor = (sum_y - n * min_y) / sum_x
print(f'--> [{name}] Simple analysis: Min {min_y}, '
f'factor {simple_factor:.2f}')
def evaluate_wasm2js(values):
n = len(values)
if n == 0:
print('No wasm2js wrappers')
return
print(f'--> [Wasm2js wrappers] {n} compiled, size min {min(values)}, '
f'max {max(values)}, avg {(sum(values) / n):.2f}')
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment