Commit 49a1a9a4 authored by Stephan Herhut's avatar Stephan Herhut Committed by Commit Bot

[wasm] Parse function names on demand

Function names are optional in wasm and might not be present for most
functions. Instead of storing an empty name with each function, this
change loads names, if present, on first access of the name.

This also fixes an inconsistency with streaming compilation. Under
streaming compilation, functions are compiled before parsing the name
section. Hence, they always received an empty name. With this change,
assignment of names is typically deferred until the whole module was
parsed.

Bug: chromium:820291
Change-Id: I86d76aa40b7c45897d152725547795c8b6b9b9ba
Reviewed-on: https://chromium-review.googlesource.com/955647
Commit-Queue: Stephan Herhut <herhut@chromium.org>
Reviewed-by: 's avatarClemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#51909}
parent cabf5631
......@@ -5398,7 +5398,7 @@ WasmCodeWrapper WasmCompilationUnit::CompileWasmFunction(
wire_bytes.start() + function->code.end_offset()};
WasmCompilationUnit unit(isolate, env, native_module, function_body,
wire_bytes.GetNameOrNull(function),
wire_bytes.GetNameOrNull(function, env->module),
function->func_index,
CEntryStub(isolate, 1).GetCode(), mode);
unit.ExecuteCompilation();
......
......@@ -1029,7 +1029,7 @@ size_t ModuleCompiler::InitializeCompilationUnits(
uint32_t buffer_offset = func->code.offset();
Vector<const uint8_t> bytes(wire_bytes.start() + func->code.offset(),
func->code.end_offset() - func->code.offset());
WasmName name = wire_bytes.GetName(func);
WasmName name = wire_bytes.GetName(func, module_env->module);
DCHECK_NOT_NULL(native_module_);
builder.AddUnit(module_env, native_module_, func, buffer_offset, bytes,
name);
......@@ -1162,7 +1162,7 @@ void ModuleCompiler::CompileSequentially(const ModuleWireBytes& wire_bytes,
WasmCodeWrapper code = compiler::WasmCompilationUnit::CompileWasmFunction(
native_module_, thrower, isolate_, wire_bytes, module_env, &func);
if (code.is_null()) {
TruncatedUserString<> name(wire_bytes.GetName(&func));
TruncatedUserString<> name(wire_bytes.GetName(&func, module));
thrower->CompileError("Compilation of #%d:%.*s failed.", i, name.length(),
name.start());
break;
......@@ -1189,7 +1189,7 @@ void ModuleCompiler::ValidateSequentially(const ModuleWireBytes& wire_bytes,
DecodeResult result = VerifyWasmCodeWithStats(
isolate_->allocator(), module, body, module->is_wasm(), counters());
if (result.failed()) {
TruncatedUserString<> name(wire_bytes.GetName(&func));
TruncatedUserString<> name(wire_bytes.GetName(&func, module));
thrower->CompileError("Compiling function #%d:%.*s failed: %s @+%u", i,
name.length(), name.start(),
result.error_msg().c_str(), result.error_offset());
......@@ -2498,10 +2498,13 @@ void InstanceBuilder::ProcessExports(
MaybeHandle<String> func_name;
if (module_->is_asm_js()) {
// For modules arising from asm.js, honor the names section.
WireBytesRef func_name_ref =
module_->LookupName(compiled_module_->shared()->module_bytes(),
function.func_index);
func_name =
WasmSharedModuleData::ExtractUtf8StringFromModuleBytes(
isolate_, handle(compiled_module_->shared(), isolate_),
function.name)
func_name_ref)
.ToHandleChecked();
}
js_function = WasmExportedFunction::New(
......@@ -2735,10 +2738,12 @@ void InstanceBuilder::LoadTableSegments(Handle<WasmInstanceObject> instance) {
MaybeHandle<String> func_name;
if (module_->is_asm_js()) {
// For modules arising from asm.js, honor the names section.
WireBytesRef func_name_ref = module_->LookupName(
compiled_module_->shared()->module_bytes(), func_index);
func_name =
WasmSharedModuleData::ExtractUtf8StringFromModuleBytes(
isolate_, handle(compiled_module_->shared(), isolate_),
function->name)
func_name_ref)
.ToHandleChecked();
}
Handle<WasmExportedFunction> js_function =
......
......@@ -91,6 +91,12 @@ const char* SectionName(SectionCode code) {
namespace {
bool validate_utf8(Decoder* decoder, WireBytesRef string) {
return unibrow::Utf8::ValidateEncoding(
decoder->start() + decoder->GetBufferRelativeOffset(string.offset()),
string.length());
}
ValueType TypeOf(const WasmModule* module, const WasmInitExpr& expr) {
switch (expr.kind) {
case WasmInitExpr::kNone:
......@@ -468,7 +474,6 @@ class ModuleDecoderImpl : public Decoder {
module_->functions.push_back({nullptr, // sig
import->index, // func_index
0, // sig_index
{0, 0}, // name_offset
{0, 0}, // code
true, // imported
false}); // exported
......@@ -535,7 +540,6 @@ class ModuleDecoderImpl : public Decoder {
module_->functions.push_back({nullptr, // sig
func_index, // func_index
0, // sig_index
{0, 0}, // name
{0, 0}, // code
false, // imported
false}); // exported
......@@ -794,35 +798,13 @@ class ModuleDecoderImpl : public Decoder {
uint32_t name_payload_len = inner.consume_u32v("name payload length");
if (!inner.checkAvailable(name_payload_len)) break;
// Decode function names, ignore the rest.
// Local names will be decoded when needed.
switch (name_type) {
case NameSectionKindCode::kModule: {
WireBytesRef name = wasm::consume_string(inner, false, "module name");
if (inner.ok() && validate_utf8(&inner, name)) module_->name = name;
break;
}
case NameSectionKindCode::kFunction: {
uint32_t functions_count = inner.consume_u32v("functions count");
for (; inner.ok() && functions_count > 0; --functions_count) {
uint32_t function_index = inner.consume_u32v("function index");
WireBytesRef name =
wasm::consume_string(inner, false, "function name");
// Be lenient with errors in the name section: Ignore illegal
// or out-of-order indexes and non-UTF8 names. You can even assign
// to the same function multiple times (last valid one wins).
if (inner.ok() && function_index < module_->functions.size() &&
validate_utf8(&inner, name)) {
module_->functions[function_index].name = name;
}
}
break;
}
default:
inner.consume_bytes(name_payload_len, "name subsection payload");
break;
// Decode module name, ignore the rest.
// Function and local names will be decoded when needed.
if (name_type == NameSectionKindCode::kModule) {
WireBytesRef name = wasm::consume_string(inner, false, "module name");
if (inner.ok() && validate_utf8(&inner, name)) module_->name = name;
} else {
inner.consume_bytes(name_payload_len, "name subsection payload");
}
}
// Skip the whole names section in the outer decoder.
......@@ -895,7 +877,6 @@ class ModuleDecoderImpl : public Decoder {
std::unique_ptr<WasmFunction> function) {
pc_ = start_;
function->sig = consume_sig(zone);
function->name = {0, 0};
function->code = {off(pc_), static_cast<uint32_t>(end_ - pc_)};
if (ok())
......@@ -1039,7 +1020,8 @@ class ModuleDecoderImpl : public Decoder {
void VerifyFunctionBody(AccountingAllocator* allocator, uint32_t func_num,
const ModuleWireBytes& wire_bytes,
const WasmModule* module, WasmFunction* function) {
WasmFunctionName func_name(function, wire_bytes.GetNameOrNull(function));
WasmFunctionName func_name(function,
wire_bytes.GetNameOrNull(function, module));
if (FLAG_trace_wasm_decoder || FLAG_trace_wasm_decode_time) {
OFStream os(stdout);
os << "Verifying wasm function " << func_name << std::endl;
......@@ -1067,12 +1049,6 @@ class ModuleDecoderImpl : public Decoder {
return wasm::consume_string(*this, validate_utf8, name);
}
bool validate_utf8(Decoder* decoder, WireBytesRef string) {
return unibrow::Utf8::ValidateEncoding(
decoder->start() + decoder->GetBufferRelativeOffset(string.offset()),
string.length());
}
uint32_t consume_sig_index(WasmModule* module, FunctionSig** sig) {
const byte* pos = pc_;
uint32_t sig_index = consume_u32v("signature index");
......@@ -1574,13 +1550,10 @@ std::vector<CustomSectionOffset> DecodeCustomSections(const byte* start,
return result;
}
void DecodeLocalNames(const byte* module_start, const byte* module_end,
LocalNames* result) {
DCHECK_NOT_NULL(result);
DCHECK(result->names.empty());
namespace {
bool FindSection(Decoder& decoder, SectionCode section_code) {
static constexpr int kModuleHeaderSize = 8;
Decoder decoder(module_start, module_end);
decoder.consume_bytes(kModuleHeaderSize, "module header");
WasmSectionIterator section_iter(decoder);
......@@ -1589,10 +1562,57 @@ void DecodeLocalNames(const byte* module_start, const byte* module_end,
section_iter.section_code() != kNameSectionCode) {
section_iter.advance(true);
}
if (!section_iter.more()) return;
if (!section_iter.more()) return false;
// Reset the decoder to not read beyond the name section end.
decoder.Reset(section_iter.payload(), decoder.pc_offset());
return true;
}
} // namespace
void DecodeFunctionNames(const byte* module_start, const byte* module_end,
std::unordered_map<uint32_t, WireBytesRef>* names) {
DCHECK_NOT_NULL(names);
DCHECK(names->empty());
Decoder decoder(module_start, module_end);
if (!FindSection(decoder, kNameSectionCode)) return;
while (decoder.ok() && decoder.more()) {
uint8_t name_type = decoder.consume_u8("name type");
if (name_type & 0x80) break; // no varuint7
uint32_t name_payload_len = decoder.consume_u32v("name payload length");
if (!decoder.checkAvailable(name_payload_len)) break;
if (name_type != NameSectionKindCode::kFunction) {
decoder.consume_bytes(name_payload_len, "name subsection payload");
continue;
}
uint32_t functions_count = decoder.consume_u32v("functions count");
for (; decoder.ok() && functions_count > 0; --functions_count) {
uint32_t function_index = decoder.consume_u32v("function index");
WireBytesRef name = wasm::consume_string(decoder, false, "function name");
// Be lenient with errors in the name section: Ignore non-UTF8 names. You
// can even assign to the same function multiple times (last valid one
// wins).
if (decoder.ok() && validate_utf8(&decoder, name)) {
names->insert(std::make_pair(function_index, name));
}
}
}
}
void DecodeLocalNames(const byte* module_start, const byte* module_end,
LocalNames* result) {
DCHECK_NOT_NULL(result);
DCHECK(result->names.empty());
Decoder decoder(module_start, module_end);
if (!FindSection(decoder, kNameSectionCode)) return;
while (decoder.ok() && decoder.more()) {
uint8_t name_type = decoder.consume_u8("name type");
......
......@@ -107,6 +107,12 @@ V8_EXPORT_PRIVATE std::vector<CustomSectionOffset> DecodeCustomSections(
AsmJsOffsetsResult DecodeAsmJsOffsets(const byte* module_start,
const byte* module_end);
// Decode the function names from the name section.
// Returns the result as an unordered map. Only names with valid utf8 encoding
// are stored and conflicts are resolved by choosing the last name read.
void DecodeFunctionNames(const byte* module_start, const byte* module_end,
std::unordered_map<uint32_t, WireBytesRef>* names);
// Decode the local names assignment from the name section.
// Stores the result in the given {LocalNames} structure. The result will be
// empty if no name section is present. On encountering an error in the name
......
......@@ -2935,7 +2935,7 @@ ControlTransferMap WasmInterpreter::ComputeControlTransfersForTesting(
// Create some dummy structures, to avoid special-casing the implementation
// just for testing.
FunctionSig sig(0, 0, nullptr);
WasmFunction function{&sig, 0, 0, {0, 0}, {0, 0}, false, false};
WasmFunction function{&sig, 0, 0, {0, 0}, false, false};
InterpreterCode code{
&function, BodyLocalDecls(zone), start, end, nullptr, nullptr, nullptr};
......
......@@ -70,9 +70,64 @@ void UnpackAndRegisterProtectedInstructions(
}
}
WireBytesRef WasmModule::LookupName(const ModuleWireBytes* wire_bytes,
uint32_t function_index) const {
if (!names_) {
names_.reset(new std::unordered_map<uint32_t, WireBytesRef>());
wasm::DecodeFunctionNames(wire_bytes->start(), wire_bytes->end(),
names_.get());
}
auto it = names_->find(function_index);
if (it == names_->end()) return WireBytesRef();
return it->second;
}
WireBytesRef WasmModule::LookupName(SeqOneByteString* wire_bytes,
uint32_t function_index) const {
DisallowHeapAllocation no_gc;
uint8_t* chars = wire_bytes->GetChars();
ModuleWireBytes module_wire_bytes(chars, chars + wire_bytes->length());
return LookupName(&module_wire_bytes, function_index);
}
void WasmModule::AddNameForTesting(int function_index, WireBytesRef name) {
if (!names_) {
names_.reset(new std::unordered_map<uint32_t, WireBytesRef>());
}
names_->insert(std::make_pair(function_index, name));
}
// Get a string stored in the module bytes representing a name.
WasmName ModuleWireBytes::GetName(WireBytesRef ref) const {
if (ref.is_empty()) return {"<?>", 3}; // no name.
CHECK(BoundsCheck(ref.offset(), ref.length()));
return Vector<const char>::cast(
module_bytes_.SubVector(ref.offset(), ref.end_offset()));
}
// Get a string stored in the module bytes representing a function name.
WasmName ModuleWireBytes::GetName(const WasmFunction* function,
const WasmModule* module) const {
return GetName(module->LookupName(this, function->func_index));
}
// Get a string stored in the module bytes representing a name.
WasmName ModuleWireBytes::GetNameOrNull(WireBytesRef ref) const {
if (!ref.is_set()) return {nullptr, 0}; // no name.
CHECK(BoundsCheck(ref.offset(), ref.length()));
return Vector<const char>::cast(
module_bytes_.SubVector(ref.offset(), ref.end_offset()));
}
// Get a string stored in the module bytes representing a function name.
WasmName ModuleWireBytes::GetNameOrNull(const WasmFunction* function,
const WasmModule* module) const {
return GetNameOrNull(module->LookupName(this, function->func_index));
}
std::ostream& operator<<(std::ostream& os, const WasmFunctionName& name) {
os << "#" << name.function_->func_index;
if (name.function_->name.is_set()) {
if (!name.name_.is_empty()) {
if (name.name_.start()) {
os << ":";
os.write(name.name_.start(), name.name_.length());
......
......@@ -35,13 +35,13 @@ class CallDescriptor;
namespace wasm {
class ErrorThrower;
class NativeModule;
class TestingModuleBuilder;
// Static representation of a wasm function.
struct WasmFunction {
FunctionSig* sig; // signature of the function.
uint32_t func_index; // index into the function table.
uint32_t sig_index; // index into the signature table.
WireBytesRef name; // function name, if any.
WireBytesRef code; // code of this function.
bool imported;
bool exported;
......@@ -165,9 +165,16 @@ struct V8_EXPORT_PRIVATE WasmModule {
bool is_wasm() const { return origin_ == kWasmOrigin; }
bool is_asm_js() const { return origin_ == kAsmJsOrigin; }
WireBytesRef LookupName(const ModuleWireBytes* wire_bytes,
uint32_t function_index) const;
WireBytesRef LookupName(SeqOneByteString* wire_bytes,
uint32_t function_index) const;
void AddNameForTesting(int function_index, WireBytesRef name);
private:
// TODO(kschimpf) - Encapsulate more fields.
ModuleOrigin origin_ = kWasmOrigin; // origin of the module
mutable std::unique_ptr<std::unordered_map<uint32_t, WireBytesRef>> names_;
};
typedef Managed<WasmModule> WasmModuleWrapper;
......@@ -185,30 +192,18 @@ struct V8_EXPORT_PRIVATE ModuleWireBytes {
}
// Get a string stored in the module bytes representing a name.
WasmName GetName(WireBytesRef ref) const {
if (ref.is_empty()) return {"<?>", 3}; // no name.
CHECK(BoundsCheck(ref.offset(), ref.length()));
return Vector<const char>::cast(
module_bytes_.SubVector(ref.offset(), ref.end_offset()));
}
WasmName GetName(WireBytesRef ref) const;
// Get a string stored in the module bytes representing a function name.
WasmName GetName(const WasmFunction* function) const {
return GetName(function->name);
}
WasmName GetName(const WasmFunction* function,
const WasmModule* module) const;
// Get a string stored in the module bytes representing a name.
WasmName GetNameOrNull(WireBytesRef ref) const {
if (!ref.is_set()) return {nullptr, 0}; // no name.
CHECK(BoundsCheck(ref.offset(), ref.length()));
return Vector<const char>::cast(
module_bytes_.SubVector(ref.offset(), ref.end_offset()));
}
WasmName GetNameOrNull(WireBytesRef ref) const;
// Get a string stored in the module bytes representing a function name.
WasmName GetNameOrNull(const WasmFunction* function) const {
return GetNameOrNull(function->name);
}
WasmName GetNameOrNull(const WasmFunction* function,
const WasmModule* module) const;
// Checks the given offset range is contained within the module bytes.
bool BoundsCheck(uint32_t offset, uint32_t length) const {
......
......@@ -1431,9 +1431,10 @@ MaybeHandle<String> WasmSharedModuleData::GetFunctionNameOrNull(
Isolate* isolate, Handle<WasmSharedModuleData> shared,
uint32_t func_index) {
DCHECK_LT(func_index, shared->module()->functions.size());
WasmFunction& function = shared->module()->functions[func_index];
if (!function.name.is_set()) return {};
return ExtractUtf8StringFromModuleBytes(isolate, shared, function.name);
wasm::WireBytesRef name =
shared->module()->LookupName(shared->module_bytes(), func_index);
if (!name.is_set()) return {};
return ExtractUtf8StringFromModuleBytes(isolate, shared, name);
}
Handle<String> WasmSharedModuleData::GetFunctionName(
......@@ -1447,12 +1448,11 @@ Handle<String> WasmSharedModuleData::GetFunctionName(
Vector<const uint8_t> WasmSharedModuleData::GetRawFunctionName(
uint32_t func_index) {
DCHECK_GT(module()->functions.size(), func_index);
WasmFunction& function = module()->functions[func_index];
SeqOneByteString* bytes = module_bytes();
DCHECK_GE(bytes->length(), function.name.end_offset());
return Vector<const uint8_t>(
bytes->GetCharsAddress() + function.name.offset(),
function.name.length());
wasm::WireBytesRef name = module()->LookupName(bytes, func_index);
DCHECK_GE(bytes->length(), name.end_offset());
return Vector<const uint8_t>(bytes->GetCharsAddress() + name.offset(),
name.length());
}
int WasmSharedModuleData::GetFunctionOffset(uint32_t func_index) {
......
......@@ -46,7 +46,7 @@ void PrintWasmText(const WasmModule* module, const ModuleWireBytes& wire_bytes,
// Print the function signature.
os << "func";
WasmName fun_name = wire_bytes.GetNameOrNull(fun);
WasmName fun_name = wire_bytes.GetNameOrNull(fun, module);
if (IsValidFunctionName(fun_name)) {
os << " $";
os.write(fun_name.start(), fun_name.length());
......
......@@ -73,12 +73,11 @@ uint32_t TestingModuleBuilder::AddFunction(FunctionSig* sig, const char* name) {
}
uint32_t index = static_cast<uint32_t>(test_module_.functions.size());
native_module_->ResizeCodeTableForTest(index);
test_module_.functions.push_back(
{sig, index, 0, {0, 0}, {0, 0}, false, false});
test_module_.functions.push_back({sig, index, 0, {0, 0}, false, false});
if (name) {
Vector<const byte> name_vec = Vector<const byte>::cast(CStrVector(name));
test_module_.functions.back().name = {
AddBytes(name_vec), static_cast<uint32_t>(name_vec.length())};
test_module_.AddNameForTesting(
index, {AddBytes(name_vec), static_cast<uint32_t>(name_vec.length())});
}
if (interpreter_) {
interpreter_->AddFunctionForTesting(&test_module_.functions.back());
......@@ -436,9 +435,11 @@ void WasmFunctionCompiler::Build(const byte* start, const byte* end) {
memcpy(func_wire_bytes.start(),
wire_bytes->GetChars() + function_->code.offset(),
func_wire_bytes.length());
ScopedVector<char> func_name(function_->name.length());
memcpy(func_name.start(), wire_bytes->GetChars() + function_->name.offset(),
func_name.length());
WireBytesRef func_name_ref =
module_env.module->LookupName(*wire_bytes, function_->func_index);
ScopedVector<char> func_name(func_name_ref.length());
memcpy(func_name.start(), wire_bytes->GetChars() + func_name_ref.offset(),
func_name_ref.length());
FunctionBody func_body{function_->sig, function_->code.offset(),
func_wire_bytes.start(), func_wire_bytes.end()};
......
......@@ -220,7 +220,6 @@ class TestModuleBuilder {
mod.functions.push_back({sig, // sig
0, // func_index
0, // sig_index
{0, 0}, // name
{0, 0}, // code
false, // import
false}); // export
......
......@@ -1075,7 +1075,6 @@ TEST_F(WasmFunctionVerifyTest, Ok_v_v_empty) {
WasmFunction* function = result.val.get();
EXPECT_EQ(0u, function->sig->parameter_count());
EXPECT_EQ(0u, function->sig->return_count());
EXPECT_EQ(0u, function->name.offset());
EXPECT_EQ(static_cast<uint32_t>(SIZEOF_SIG_ENTRY_v_v),
function->code.offset());
EXPECT_EQ(sizeof(data), function->code.end_offset());
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment