Commit d43969ea authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[tools][wasm] wami: Support hexdump for invalid modules

When trying to understand why a given module fails to validate, it
can be helpful to disassemble it as far as possible until reaching
the erroneous byte(s).

Change-Id: I0056ba1a81b85a486c0446d15bbf54ccb2e8332e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3827866Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#82433}
parent a19316d9
...@@ -246,7 +246,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc, ...@@ -246,7 +246,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
decoder, pc, decoder, pc,
"invalid heap type '%s', enable with --experimental-wasm-gc", "invalid heap type '%s', enable with --experimental-wasm-gc",
HeapType::from_code(code).name().c_str()); HeapType::from_code(code).name().c_str());
return HeapType(HeapType::kBottom);
} }
V8_FALLTHROUGH; V8_FALLTHROUGH;
case kExternRefCode: case kExternRefCode:
...@@ -261,7 +260,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc, ...@@ -261,7 +260,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
"invalid heap type '%s', enable with " "invalid heap type '%s', enable with "
"--experimental-wasm-stringref", "--experimental-wasm-stringref",
HeapType::from_code(code).name().c_str()); HeapType::from_code(code).name().c_str());
return HeapType(HeapType::kBottom);
} }
return HeapType::from_code(code); return HeapType::from_code(code);
default: default:
...@@ -274,7 +272,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc, ...@@ -274,7 +272,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
DecodeError<validate>(decoder, pc, DecodeError<validate>(decoder, pc,
"Invalid indexed heap type, enable with " "Invalid indexed heap type, enable with "
"--experimental-wasm-typed-funcref"); "--experimental-wasm-typed-funcref");
return HeapType(HeapType::kBottom);
} }
uint32_t type_index = static_cast<uint32_t>(heap_index); uint32_t type_index = static_cast<uint32_t>(heap_index);
if (!VALIDATE(type_index < kV8MaxWasmTypes)) { if (!VALIDATE(type_index < kV8MaxWasmTypes)) {
...@@ -289,7 +286,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc, ...@@ -289,7 +286,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
if (!VALIDATE(module == nullptr || type_index < module->types.capacity())) { if (!VALIDATE(module == nullptr || type_index < module->types.capacity())) {
DecodeError<validate>(decoder, pc, "Type index %u is out of bounds", DecodeError<validate>(decoder, pc, "Type index %u is out of bounds",
type_index); type_index);
return HeapType(HeapType::kBottom);
} }
return HeapType(type_index); return HeapType(type_index);
} }
......
...@@ -33,6 +33,7 @@ class NoTracer { ...@@ -33,6 +33,7 @@ class NoTracer {
// Hooks for extracting byte offsets of things. // Hooks for extracting byte offsets of things.
void TypeOffset(uint32_t offset) {} void TypeOffset(uint32_t offset) {}
void ImportOffset(uint32_t offset) {} void ImportOffset(uint32_t offset) {}
void ImportsDone() {}
void TableOffset(uint32_t offset) {} void TableOffset(uint32_t offset) {}
void MemoryOffset(uint32_t offset) {} void MemoryOffset(uint32_t offset) {}
void TagOffset(uint32_t offset) {} void TagOffset(uint32_t offset) {}
...@@ -866,6 +867,7 @@ class ModuleDecoderTemplate : public Decoder { ...@@ -866,6 +867,7 @@ class ModuleDecoderTemplate : public Decoder {
break; break;
} }
} }
tracer_.ImportsDone();
} }
void DecodeFunctionSection() { void DecodeFunctionSection() {
...@@ -1676,7 +1678,7 @@ class ModuleDecoderTemplate : public Decoder { ...@@ -1676,7 +1678,7 @@ class ModuleDecoderTemplate : public Decoder {
} }
// Shift the offset by the remaining section payload // Shift the offset by the remaining section payload
offset += section_iter.payload_length(); offset += section_iter.payload_length();
if (!section_iter.more()) break; if (!section_iter.more() || !ok()) break;
section_iter.advance(true); section_iter.advance(true);
} }
...@@ -2203,6 +2205,7 @@ class ModuleDecoderTemplate : public Decoder { ...@@ -2203,6 +2205,7 @@ class ModuleDecoderTemplate : public Decoder {
tracer_.NextLineIfFull(); tracer_.NextLineIfFull();
} }
tracer_.NextLineIfNonEmpty(); tracer_.NextLineIfNonEmpty();
if (failed()) return nullptr;
// Parse return types. // Parse return types.
std::vector<ValueType> returns; std::vector<ValueType> returns;
......
...@@ -608,6 +608,7 @@ class OffsetsProvider { ...@@ -608,6 +608,7 @@ class OffsetsProvider {
void DataOffset(uint32_t offset) { data_offsets_.push_back(offset); } void DataOffset(uint32_t offset) { data_offsets_.push_back(offset); }
// Unused by this tracer: // Unused by this tracer:
void ImportsDone() {}
void Bytes(const byte* start, uint32_t count) {} void Bytes(const byte* start, uint32_t count) {}
void Description(const char* desc) {} void Description(const char* desc) {}
void Description(const char* desc, size_t length) {} void Description(const char* desc, size_t length) {}
......
...@@ -358,10 +358,24 @@ class ExtendedFunctionDis : public FunctionBodyDisassembler { ...@@ -358,10 +358,24 @@ class ExtendedFunctionDis : public FunctionBodyDisassembler {
// A variant of ModuleDisassembler that produces "annotated hex dump" format, // A variant of ModuleDisassembler that produces "annotated hex dump" format,
// e.g.: // e.g.:
// 0x01, 0x70, 0x00, // table count 1: funcref no maximum // 0x01, 0x70, 0x00, // table count 1: funcref no maximum
class HexDumpModuleDis;
class DumpingModuleDecoder : public ModuleDecoderTemplate<HexDumpModuleDis> {
public:
DumpingModuleDecoder(const ModuleWireBytes wire_bytes,
HexDumpModuleDis* module_dis)
: ModuleDecoderTemplate<HexDumpModuleDis>(
WasmFeatures::All(), wire_bytes.start(), wire_bytes.end(),
kWasmOrigin, *module_dis) {}
void onFirstError() override {
// Pretend we've reached the end of the section, but contrary to the
// superclass implementation do so without moving {pc_}, so whatever
// bytes caused the failure can still be dumped correctly.
end_ = pc_;
}
};
class HexDumpModuleDis { class HexDumpModuleDis {
public: public:
using DumpingModuleDecoder = ModuleDecoderTemplate<HexDumpModuleDis>;
HexDumpModuleDis(MultiLineStringBuilder& out, const WasmModule* module, HexDumpModuleDis(MultiLineStringBuilder& out, const WasmModule* module,
NamesProvider* names, const ModuleWireBytes wire_bytes, NamesProvider* names, const ModuleWireBytes wire_bytes,
AccountingAllocator* allocator) AccountingAllocator* allocator)
...@@ -370,35 +384,39 @@ class HexDumpModuleDis { ...@@ -370,35 +384,39 @@ class HexDumpModuleDis {
names_(names), names_(names),
wire_bytes_(wire_bytes), wire_bytes_(wire_bytes),
allocator_(allocator), allocator_(allocator),
zone_(allocator, "disassembler") { zone_(allocator, "disassembler") {}
for (const WasmImport& import : module->import_table) {
switch (import.kind) {
// clang-format off
case kExternalFunction: break;
case kExternalTable: next_table_index_++; break;
case kExternalMemory: break;
case kExternalGlobal: next_global_index_++; break;
case kExternalTag: next_tag_index_++; break;
// clang-format on
}
}
}
// Public entrypoint. // Public entrypoint.
void PrintModule() { void PrintModule() {
constexpr bool verify_functions = false; DumpingModuleDecoder decoder(wire_bytes_, this);
DumpingModuleDecoder decoder(WasmFeatures::All(), wire_bytes_.start(),
wire_bytes_.end(), kWasmOrigin, *this);
decoder_ = &decoder; decoder_ = &decoder;
// If the module failed validation, create fakes to allow us to print
// what we can.
std::unique_ptr<WasmModule> fake_module;
std::unique_ptr<NamesProvider> names_provider;
if (!names_) {
fake_module.reset(
new WasmModule(std::make_unique<Zone>(allocator_, "fake module")));
names_provider.reset(
new NamesProvider(fake_module.get(), wire_bytes_.module_bytes()));
names_ = names_provider.get();
}
out_ << "["; out_ << "[";
out_.NextLine(0); out_.NextLine(0);
constexpr bool verify_functions = false;
decoder.DecodeModule(nullptr, allocator_, verify_functions); decoder.DecodeModule(nullptr, allocator_, verify_functions);
out_ << "]"; out_ << "]";
if (total_bytes_ != wire_bytes_.length()) { if (total_bytes_ != wire_bytes_.length()) {
std::cerr << "WARNING: OUTPUT INCOMPLETE. Disassembled " << total_bytes_ std::cerr << "WARNING: OUTPUT INCOMPLETE. Disassembled " << total_bytes_
<< " out of " << wire_bytes_.length() << " bytes.\n"; << " out of " << wire_bytes_.length() << " bytes.\n";
// TODO(jkummerow): Would it be helpful to DCHECK here? }
// For cleanliness, reset {names_} if it's pointing at a fake.
if (names_ == names_provider.get()) {
names_ = nullptr;
} }
} }
...@@ -504,7 +522,7 @@ class HexDumpModuleDis { ...@@ -504,7 +522,7 @@ class HexDumpModuleDis {
// We don't care about offsets, but we can use these hooks to provide // We don't care about offsets, but we can use these hooks to provide
// helpful indexing comments in long lists. // helpful indexing comments in long lists.
void TypeOffset(uint32_t offset) { void TypeOffset(uint32_t offset) {
if (module_->types.size() > 3) { if (!module_ || module_->types.size() > 3) {
description_ << "type #" << next_type_index_ << " "; description_ << "type #" << next_type_index_ << " ";
names_->PrintTypeName(description_, next_type_index_); names_->PrintTypeName(description_, next_type_index_);
next_type_index_++; next_type_index_++;
...@@ -514,14 +532,20 @@ class HexDumpModuleDis { ...@@ -514,14 +532,20 @@ class HexDumpModuleDis {
description_ << "import #" << next_import_index_++; description_ << "import #" << next_import_index_++;
NextLine(); NextLine();
} }
void ImportsDone() {
const WasmModule* module = decoder_->shared_module().get();
next_table_index_ = static_cast<uint32_t>(module->tables.size());
next_global_index_ = static_cast<uint32_t>(module->globals.size());
next_tag_index_ = static_cast<uint32_t>(module->tags.size());
}
void TableOffset(uint32_t offset) { void TableOffset(uint32_t offset) {
if (module_->tables.size() > 3) { if (!module_ || module_->tables.size() > 3) {
description_ << "table #" << next_table_index_++; description_ << "table #" << next_table_index_++;
} }
} }
void MemoryOffset(uint32_t offset) {} void MemoryOffset(uint32_t offset) {}
void TagOffset(uint32_t offset) { void TagOffset(uint32_t offset) {
if (module_->tags.size() > 3) { if (!module_ || module_->tags.size() > 3) {
description_ << "tag #" << next_tag_index_++ << ":"; description_ << "tag #" << next_tag_index_++ << ":";
} }
} }
...@@ -530,13 +554,13 @@ class HexDumpModuleDis { ...@@ -530,13 +554,13 @@ class HexDumpModuleDis {
} }
void StartOffset(uint32_t offset) {} void StartOffset(uint32_t offset) {}
void ElementOffset(uint32_t offset) { void ElementOffset(uint32_t offset) {
if (module_->elem_segments.size() > 3) { if (!module_ || module_->elem_segments.size() > 3) {
description_ << "segment #" << next_segment_index_++; description_ << "segment #" << next_segment_index_++;
NextLine(); NextLine();
} }
} }
void DataOffset(uint32_t offset) { void DataOffset(uint32_t offset) {
if (module_->data_segments.size() > 3) { if (!module_ || module_->data_segments.size() > 3) {
description_ << "data segment #" << next_data_segment_index_++; description_ << "data segment #" << next_data_segment_index_++;
NextLine(); NextLine();
} }
...@@ -549,7 +573,9 @@ class HexDumpModuleDis { ...@@ -549,7 +573,9 @@ class HexDumpModuleDis {
WasmFeatures detected; WasmFeatures detected;
auto sig = FixedSizeSignature<ValueType>::Returns(expected_type); auto sig = FixedSizeSignature<ValueType>::Returns(expected_type);
uint32_t offset = decoder_->pc_offset(); uint32_t offset = decoder_->pc_offset();
ExtendedFunctionDis d(&zone_, module_, 0, &detected, &sig, start, end, const WasmModule* module = module_;
if (!module) module = decoder_->shared_module().get();
ExtendedFunctionDis d(&zone_, module, 0, &detected, &sig, start, end,
offset, names_); offset, names_);
d.HexdumpConstantExpression(out_); d.HexdumpConstantExpression(out_);
total_bytes_ += static_cast<size_t>(end - start); total_bytes_ += static_cast<size_t>(end - start);
...@@ -559,7 +585,9 @@ class HexDumpModuleDis { ...@@ -559,7 +585,9 @@ class HexDumpModuleDis {
const byte* end = start + func->code.length(); const byte* end = start + func->code.length();
WasmFeatures detected; WasmFeatures detected;
uint32_t offset = static_cast<uint32_t>(start - decoder_->start()); uint32_t offset = static_cast<uint32_t>(start - decoder_->start());
ExtendedFunctionDis d(&zone_, module_, func->func_index, &detected, const WasmModule* module = module_;
if (!module) module = decoder_->shared_module().get();
ExtendedFunctionDis d(&zone_, module, func->func_index, &detected,
func->sig, start, end, offset, names_); func->sig, start, end, offset, names_);
d.HexDump(out_, FunctionBodyDisassembler::kSkipHeader); d.HexDump(out_, FunctionBodyDisassembler::kSkipHeader);
total_bytes_ += func->code.length(); total_bytes_ += func->code.length();
...@@ -606,9 +634,6 @@ class HexDumpModuleDis { ...@@ -606,9 +634,6 @@ class HexDumpModuleDis {
} }
} }
// TODO(jkummerow): Consider using an OnFirstError() override to offer
// help when decoding fails.
private: private:
static constexpr uint32_t kDontCareAboutOffsets = 0; static constexpr uint32_t kDontCareAboutOffsets = 0;
static constexpr uint32_t kMaxBytesPerLine = 8; static constexpr uint32_t kMaxBytesPerLine = 8;
...@@ -701,31 +726,33 @@ class HexDumpModuleDis { ...@@ -701,31 +726,33 @@ class HexDumpModuleDis {
class FormatConverter { class FormatConverter {
public: public:
enum Status { kNotReady, kIoInitialized, kModuleReady };
explicit FormatConverter(const char* input, const char* output) explicit FormatConverter(const char* input, const char* output)
: output_(output), out_(output_.get()) { : output_(output), out_(output_.get()) {
if (!output_.ok()) return; if (!output_.ok()) return;
if (!LoadFile(input)) return; if (!LoadFile(input)) return;
base::Vector<const byte> wire_bytes(raw_bytes_.data(), raw_bytes_.size()); base::Vector<const byte> wire_bytes(raw_bytes_.data(), raw_bytes_.size());
wire_bytes_ = ModuleWireBytes({raw_bytes_.data(), raw_bytes_.size()}); wire_bytes_ = ModuleWireBytes({raw_bytes_.data(), raw_bytes_.size()});
status_ = kIoInitialized;
ModuleResult result = ModuleResult result =
DecodeWasmModuleForDisassembler(start(), end(), &allocator_); DecodeWasmModuleForDisassembler(start(), end(), &allocator_);
if (result.failed()) { if (result.failed()) {
WasmError error = result.error(); WasmError error = result.error();
std::cerr << "Decoding error: " << error.message() << " at offset " std::cerr << "Decoding error: " << error.message() << " at offset "
<< error.offset() << "\n"; << error.offset() << "\n";
// TODO(jkummerow): Show some disassembly.
return; return;
} }
ok_ = true; status_ = kModuleReady;
module_ = result.value(); module_ = result.value();
names_provider_ = names_provider_ =
std::make_unique<NamesProvider>(module_.get(), wire_bytes); std::make_unique<NamesProvider>(module_.get(), wire_bytes);
} }
bool ok() const { return ok_; } Status status() const { return status_; }
void ListFunctions() { void ListFunctions() {
DCHECK(ok_); DCHECK_EQ(status_, kModuleReady);
const WasmModule* m = module(); const WasmModule* m = module();
uint32_t num_functions = static_cast<uint32_t>(m->functions.size()); uint32_t num_functions = static_cast<uint32_t>(m->functions.size());
out_ << "There are " << num_functions << " functions (" out_ << "There are " << num_functions << " functions ("
...@@ -742,7 +769,7 @@ class FormatConverter { ...@@ -742,7 +769,7 @@ class FormatConverter {
} }
void SectionStats() { void SectionStats() {
DCHECK(ok_); DCHECK_EQ(status_, kModuleReady);
Decoder decoder(start(), end()); Decoder decoder(start(), end());
decoder.consume_bytes(kModuleHeaderSize, "module header"); decoder.consume_bytes(kModuleHeaderSize, "module header");
...@@ -771,7 +798,7 @@ class FormatConverter { ...@@ -771,7 +798,7 @@ class FormatConverter {
} }
void Strip() { void Strip() {
DCHECK(ok_); DCHECK_EQ(status_, kModuleReady);
Decoder decoder(start(), end()); Decoder decoder(start(), end());
out_.write(reinterpret_cast<const char*>(decoder.pc()), kModuleHeaderSize); out_.write(reinterpret_cast<const char*>(decoder.pc()), kModuleHeaderSize);
decoder.consume_bytes(kModuleHeaderSize); decoder.consume_bytes(kModuleHeaderSize);
...@@ -785,7 +812,7 @@ class FormatConverter { ...@@ -785,7 +812,7 @@ class FormatConverter {
} }
void InstructionStats() { void InstructionStats() {
DCHECK(ok_); DCHECK_EQ(status_, kModuleReady);
Zone zone(&allocator_, "disassembler"); Zone zone(&allocator_, "disassembler");
InstructionStatistics stats; InstructionStatistics stats;
for (uint32_t i = module()->num_imported_functions; for (uint32_t i = module()->num_imported_functions;
...@@ -803,7 +830,7 @@ class FormatConverter { ...@@ -803,7 +830,7 @@ class FormatConverter {
} }
void DisassembleFunction(uint32_t func_index, OutputMode mode) { void DisassembleFunction(uint32_t func_index, OutputMode mode) {
DCHECK(ok_); DCHECK_EQ(status_, kModuleReady);
MultiLineStringBuilder sb; MultiLineStringBuilder sb;
if (func_index >= module()->functions.size()) { if (func_index >= module()->functions.size()) {
sb << "Invalid function index!\n"; sb << "Invalid function index!\n";
...@@ -838,7 +865,7 @@ class FormatConverter { ...@@ -838,7 +865,7 @@ class FormatConverter {
} }
void WatForModule() { void WatForModule() {
DCHECK(ok_); DCHECK_EQ(status_, kModuleReady);
MultiLineStringBuilder sb; MultiLineStringBuilder sb;
ModuleDisassembler md(sb, module(), names(), wire_bytes_, &allocator_); ModuleDisassembler md(sb, module(), names(), wire_bytes_, &allocator_);
md.PrintModule({0, 2}); md.PrintModule({0, 2});
...@@ -846,7 +873,9 @@ class FormatConverter { ...@@ -846,7 +873,9 @@ class FormatConverter {
} }
void HexdumpForModule() { void HexdumpForModule() {
DCHECK(ok_); DCHECK_NE(status_, kNotReady);
DCHECK_IMPLIES(status_ == kIoInitialized,
module() == nullptr && names() == nullptr);
MultiLineStringBuilder sb; MultiLineStringBuilder sb;
HexDumpModuleDis md(sb, module(), names(), wire_bytes_, &allocator_); HexDumpModuleDis md(sb, module(), names(), wire_bytes_, &allocator_);
md.PrintModule(); md.PrintModule();
...@@ -1021,7 +1050,7 @@ class FormatConverter { ...@@ -1021,7 +1050,7 @@ class FormatConverter {
AccountingAllocator allocator_; AccountingAllocator allocator_;
Output output_; Output output_;
std::ostream& out_; std::ostream& out_;
bool ok_{false}; Status status_{kNotReady};
std::vector<byte> raw_bytes_; std::vector<byte> raw_bytes_;
ModuleWireBytes wire_bytes_{{}}; ModuleWireBytes wire_bytes_{{}};
std::shared_ptr<WasmModule> module_; std::shared_ptr<WasmModule> module_;
...@@ -1160,7 +1189,13 @@ int main(int argc, char** argv) { ...@@ -1160,7 +1189,13 @@ int main(int argc, char** argv) {
v8::V8::Initialize(); v8::V8::Initialize();
FormatConverter fc(options.input, options.output); FormatConverter fc(options.input, options.output);
if (!fc.ok()) return 1; if (fc.status() == FormatConverter::kNotReady) return 1;
// Allow hex dumping invalid modules.
if (fc.status() != FormatConverter::kModuleReady &&
options.action != Action::kFullHexdump) {
std::cerr << "Consider using --full-hexdump to learn more.\n";
return 1;
}
switch (options.action) { switch (options.action) {
case Action::kListFunctions: case Action::kListFunctions:
fc.ListFunctions(); fc.ListFunctions();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment