Commit d43969ea authored by Jakob Kummerow's avatar Jakob Kummerow Committed by V8 LUCI CQ

[tools][wasm] wami: Support hexdump for invalid modules

When trying to understand why a given module fails to validate, it
can be helpful to disassemble it as far as possible until reaching
the erroneous byte(s).

Change-Id: I0056ba1a81b85a486c0446d15bbf54ccb2e8332e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3827866Reviewed-by: 's avatarAndreas Haas <ahaas@chromium.org>
Commit-Queue: Jakob Kummerow <jkummerow@chromium.org>
Cr-Commit-Position: refs/heads/main@{#82433}
parent a19316d9
......@@ -246,7 +246,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
decoder, pc,
"invalid heap type '%s', enable with --experimental-wasm-gc",
HeapType::from_code(code).name().c_str());
return HeapType(HeapType::kBottom);
}
V8_FALLTHROUGH;
case kExternRefCode:
......@@ -261,7 +260,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
"invalid heap type '%s', enable with "
"--experimental-wasm-stringref",
HeapType::from_code(code).name().c_str());
return HeapType(HeapType::kBottom);
}
return HeapType::from_code(code);
default:
......@@ -274,7 +272,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
DecodeError<validate>(decoder, pc,
"Invalid indexed heap type, enable with "
"--experimental-wasm-typed-funcref");
return HeapType(HeapType::kBottom);
}
uint32_t type_index = static_cast<uint32_t>(heap_index);
if (!VALIDATE(type_index < kV8MaxWasmTypes)) {
......@@ -289,7 +286,6 @@ HeapType read_heap_type(Decoder* decoder, const byte* pc,
if (!VALIDATE(module == nullptr || type_index < module->types.capacity())) {
DecodeError<validate>(decoder, pc, "Type index %u is out of bounds",
type_index);
return HeapType(HeapType::kBottom);
}
return HeapType(type_index);
}
......
......@@ -33,6 +33,7 @@ class NoTracer {
// Hooks for extracting byte offsets of things.
void TypeOffset(uint32_t offset) {}
void ImportOffset(uint32_t offset) {}
void ImportsDone() {}
void TableOffset(uint32_t offset) {}
void MemoryOffset(uint32_t offset) {}
void TagOffset(uint32_t offset) {}
......@@ -866,6 +867,7 @@ class ModuleDecoderTemplate : public Decoder {
break;
}
}
tracer_.ImportsDone();
}
void DecodeFunctionSection() {
......@@ -1676,7 +1678,7 @@ class ModuleDecoderTemplate : public Decoder {
}
// Shift the offset by the remaining section payload
offset += section_iter.payload_length();
if (!section_iter.more()) break;
if (!section_iter.more() || !ok()) break;
section_iter.advance(true);
}
......@@ -2203,6 +2205,7 @@ class ModuleDecoderTemplate : public Decoder {
tracer_.NextLineIfFull();
}
tracer_.NextLineIfNonEmpty();
if (failed()) return nullptr;
// Parse return types.
std::vector<ValueType> returns;
......
......@@ -608,6 +608,7 @@ class OffsetsProvider {
void DataOffset(uint32_t offset) { data_offsets_.push_back(offset); }
// Unused by this tracer:
void ImportsDone() {}
void Bytes(const byte* start, uint32_t count) {}
void Description(const char* desc) {}
void Description(const char* desc, size_t length) {}
......
......@@ -358,10 +358,24 @@ class ExtendedFunctionDis : public FunctionBodyDisassembler {
// A variant of ModuleDisassembler that produces "annotated hex dump" format,
// e.g.:
// 0x01, 0x70, 0x00, // table count 1: funcref no maximum
class HexDumpModuleDis;
class DumpingModuleDecoder : public ModuleDecoderTemplate<HexDumpModuleDis> {
public:
DumpingModuleDecoder(const ModuleWireBytes wire_bytes,
HexDumpModuleDis* module_dis)
: ModuleDecoderTemplate<HexDumpModuleDis>(
WasmFeatures::All(), wire_bytes.start(), wire_bytes.end(),
kWasmOrigin, *module_dis) {}
void onFirstError() override {
// Pretend we've reached the end of the section, but contrary to the
// superclass implementation do so without moving {pc_}, so whatever
// bytes caused the failure can still be dumped correctly.
end_ = pc_;
}
};
class HexDumpModuleDis {
public:
using DumpingModuleDecoder = ModuleDecoderTemplate<HexDumpModuleDis>;
HexDumpModuleDis(MultiLineStringBuilder& out, const WasmModule* module,
NamesProvider* names, const ModuleWireBytes wire_bytes,
AccountingAllocator* allocator)
......@@ -370,35 +384,39 @@ class HexDumpModuleDis {
names_(names),
wire_bytes_(wire_bytes),
allocator_(allocator),
zone_(allocator, "disassembler") {
for (const WasmImport& import : module->import_table) {
switch (import.kind) {
// clang-format off
case kExternalFunction: break;
case kExternalTable: next_table_index_++; break;
case kExternalMemory: break;
case kExternalGlobal: next_global_index_++; break;
case kExternalTag: next_tag_index_++; break;
// clang-format on
}
}
}
zone_(allocator, "disassembler") {}
// Public entrypoint.
void PrintModule() {
constexpr bool verify_functions = false;
DumpingModuleDecoder decoder(WasmFeatures::All(), wire_bytes_.start(),
wire_bytes_.end(), kWasmOrigin, *this);
DumpingModuleDecoder decoder(wire_bytes_, this);
decoder_ = &decoder;
// If the module failed validation, create fakes to allow us to print
// what we can.
std::unique_ptr<WasmModule> fake_module;
std::unique_ptr<NamesProvider> names_provider;
if (!names_) {
fake_module.reset(
new WasmModule(std::make_unique<Zone>(allocator_, "fake module")));
names_provider.reset(
new NamesProvider(fake_module.get(), wire_bytes_.module_bytes()));
names_ = names_provider.get();
}
out_ << "[";
out_.NextLine(0);
constexpr bool verify_functions = false;
decoder.DecodeModule(nullptr, allocator_, verify_functions);
out_ << "]";
if (total_bytes_ != wire_bytes_.length()) {
std::cerr << "WARNING: OUTPUT INCOMPLETE. Disassembled " << total_bytes_
<< " out of " << wire_bytes_.length() << " bytes.\n";
// TODO(jkummerow): Would it be helpful to DCHECK here?
}
// For cleanliness, reset {names_} if it's pointing at a fake.
if (names_ == names_provider.get()) {
names_ = nullptr;
}
}
......@@ -504,7 +522,7 @@ class HexDumpModuleDis {
// We don't care about offsets, but we can use these hooks to provide
// helpful indexing comments in long lists.
void TypeOffset(uint32_t offset) {
if (module_->types.size() > 3) {
if (!module_ || module_->types.size() > 3) {
description_ << "type #" << next_type_index_ << " ";
names_->PrintTypeName(description_, next_type_index_);
next_type_index_++;
......@@ -514,14 +532,20 @@ class HexDumpModuleDis {
description_ << "import #" << next_import_index_++;
NextLine();
}
void ImportsDone() {
const WasmModule* module = decoder_->shared_module().get();
next_table_index_ = static_cast<uint32_t>(module->tables.size());
next_global_index_ = static_cast<uint32_t>(module->globals.size());
next_tag_index_ = static_cast<uint32_t>(module->tags.size());
}
void TableOffset(uint32_t offset) {
if (module_->tables.size() > 3) {
if (!module_ || module_->tables.size() > 3) {
description_ << "table #" << next_table_index_++;
}
}
void MemoryOffset(uint32_t offset) {}
void TagOffset(uint32_t offset) {
if (module_->tags.size() > 3) {
if (!module_ || module_->tags.size() > 3) {
description_ << "tag #" << next_tag_index_++ << ":";
}
}
......@@ -530,13 +554,13 @@ class HexDumpModuleDis {
}
void StartOffset(uint32_t offset) {}
void ElementOffset(uint32_t offset) {
if (module_->elem_segments.size() > 3) {
if (!module_ || module_->elem_segments.size() > 3) {
description_ << "segment #" << next_segment_index_++;
NextLine();
}
}
void DataOffset(uint32_t offset) {
if (module_->data_segments.size() > 3) {
if (!module_ || module_->data_segments.size() > 3) {
description_ << "data segment #" << next_data_segment_index_++;
NextLine();
}
......@@ -549,7 +573,9 @@ class HexDumpModuleDis {
WasmFeatures detected;
auto sig = FixedSizeSignature<ValueType>::Returns(expected_type);
uint32_t offset = decoder_->pc_offset();
ExtendedFunctionDis d(&zone_, module_, 0, &detected, &sig, start, end,
const WasmModule* module = module_;
if (!module) module = decoder_->shared_module().get();
ExtendedFunctionDis d(&zone_, module, 0, &detected, &sig, start, end,
offset, names_);
d.HexdumpConstantExpression(out_);
total_bytes_ += static_cast<size_t>(end - start);
......@@ -559,7 +585,9 @@ class HexDumpModuleDis {
const byte* end = start + func->code.length();
WasmFeatures detected;
uint32_t offset = static_cast<uint32_t>(start - decoder_->start());
ExtendedFunctionDis d(&zone_, module_, func->func_index, &detected,
const WasmModule* module = module_;
if (!module) module = decoder_->shared_module().get();
ExtendedFunctionDis d(&zone_, module, func->func_index, &detected,
func->sig, start, end, offset, names_);
d.HexDump(out_, FunctionBodyDisassembler::kSkipHeader);
total_bytes_ += func->code.length();
......@@ -606,9 +634,6 @@ class HexDumpModuleDis {
}
}
// TODO(jkummerow): Consider using an OnFirstError() override to offer
// help when decoding fails.
private:
static constexpr uint32_t kDontCareAboutOffsets = 0;
static constexpr uint32_t kMaxBytesPerLine = 8;
......@@ -701,31 +726,33 @@ class HexDumpModuleDis {
class FormatConverter {
public:
enum Status { kNotReady, kIoInitialized, kModuleReady };
explicit FormatConverter(const char* input, const char* output)
: output_(output), out_(output_.get()) {
if (!output_.ok()) return;
if (!LoadFile(input)) return;
base::Vector<const byte> wire_bytes(raw_bytes_.data(), raw_bytes_.size());
wire_bytes_ = ModuleWireBytes({raw_bytes_.data(), raw_bytes_.size()});
status_ = kIoInitialized;
ModuleResult result =
DecodeWasmModuleForDisassembler(start(), end(), &allocator_);
if (result.failed()) {
WasmError error = result.error();
std::cerr << "Decoding error: " << error.message() << " at offset "
<< error.offset() << "\n";
// TODO(jkummerow): Show some disassembly.
return;
}
ok_ = true;
status_ = kModuleReady;
module_ = result.value();
names_provider_ =
std::make_unique<NamesProvider>(module_.get(), wire_bytes);
}
bool ok() const { return ok_; }
Status status() const { return status_; }
void ListFunctions() {
DCHECK(ok_);
DCHECK_EQ(status_, kModuleReady);
const WasmModule* m = module();
uint32_t num_functions = static_cast<uint32_t>(m->functions.size());
out_ << "There are " << num_functions << " functions ("
......@@ -742,7 +769,7 @@ class FormatConverter {
}
void SectionStats() {
DCHECK(ok_);
DCHECK_EQ(status_, kModuleReady);
Decoder decoder(start(), end());
decoder.consume_bytes(kModuleHeaderSize, "module header");
......@@ -771,7 +798,7 @@ class FormatConverter {
}
void Strip() {
DCHECK(ok_);
DCHECK_EQ(status_, kModuleReady);
Decoder decoder(start(), end());
out_.write(reinterpret_cast<const char*>(decoder.pc()), kModuleHeaderSize);
decoder.consume_bytes(kModuleHeaderSize);
......@@ -785,7 +812,7 @@ class FormatConverter {
}
void InstructionStats() {
DCHECK(ok_);
DCHECK_EQ(status_, kModuleReady);
Zone zone(&allocator_, "disassembler");
InstructionStatistics stats;
for (uint32_t i = module()->num_imported_functions;
......@@ -803,7 +830,7 @@ class FormatConverter {
}
void DisassembleFunction(uint32_t func_index, OutputMode mode) {
DCHECK(ok_);
DCHECK_EQ(status_, kModuleReady);
MultiLineStringBuilder sb;
if (func_index >= module()->functions.size()) {
sb << "Invalid function index!\n";
......@@ -838,7 +865,7 @@ class FormatConverter {
}
void WatForModule() {
DCHECK(ok_);
DCHECK_EQ(status_, kModuleReady);
MultiLineStringBuilder sb;
ModuleDisassembler md(sb, module(), names(), wire_bytes_, &allocator_);
md.PrintModule({0, 2});
......@@ -846,7 +873,9 @@ class FormatConverter {
}
void HexdumpForModule() {
DCHECK(ok_);
DCHECK_NE(status_, kNotReady);
DCHECK_IMPLIES(status_ == kIoInitialized,
module() == nullptr && names() == nullptr);
MultiLineStringBuilder sb;
HexDumpModuleDis md(sb, module(), names(), wire_bytes_, &allocator_);
md.PrintModule();
......@@ -1021,7 +1050,7 @@ class FormatConverter {
AccountingAllocator allocator_;
Output output_;
std::ostream& out_;
bool ok_{false};
Status status_{kNotReady};
std::vector<byte> raw_bytes_;
ModuleWireBytes wire_bytes_{{}};
std::shared_ptr<WasmModule> module_;
......@@ -1160,7 +1189,13 @@ int main(int argc, char** argv) {
v8::V8::Initialize();
FormatConverter fc(options.input, options.output);
if (!fc.ok()) return 1;
if (fc.status() == FormatConverter::kNotReady) return 1;
// Allow hex dumping invalid modules.
if (fc.status() != FormatConverter::kModuleReady &&
options.action != Action::kFullHexdump) {
std::cerr << "Consider using --full-hexdump to learn more.\n";
return 1;
}
switch (options.action) {
case Action::kListFunctions:
fc.ListFunctions();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment