[wasm] Check for UTF-8 validity of section names

According to the spec, section names must be valid UTF-8. This CL adds a check for that. Imported and exported names were already checked before. In order to use the {consume_string} function from the WasmSectionIterator, it moved it out of the ModuleDecoder into the anonymous namespace. It now also gets a name for the string to be parsed, for better error messages. R=rossberg@chromium.org Change-Id: I20b1ddb0bd1c7ada237d8303951073310fe1c714 Reviewed-on: https://chromium-review.googlesource.com/470207 Commit-Queue: Clemens Hammacher <clemensh@chromium.org> Reviewed-by: Andreas Rossberg <rossberg@chromium.org> Cr-Commit-Position: refs/heads/master@{#44550}

[wasm] Check for UTF-8 validity of section names
According to the spec, section names must be valid UTF-8. This CL adds a check for that. Imported and exported names were already checked before. In order to use the {consume_string} function from the WasmSectionIterator, it moved it out of the ModuleDecoder into the anonymous namespace. It now also gets a name for the string to be parsed, for better error messages. R=rossberg@chromium.org Change-Id: I20b1ddb0bd1c7ada237d8303951073310fe1c714 Reviewed-on: https://chromium-review.googlesource.com/470207 Commit-Queue: Clemens Hammacher <clemensh@chromium.org> Reviewed-by: Andreas Rossberg <rossberg@chromium.org> Cr-Commit-Position: refs/heads/master@{#44550}
81c7135c · Clemens Hammacher · Commit Bot · 26e5d012 · 81c7135c · 81c7135c
Commit 81c7135c authored Apr 11, 2017 by Clemens Hammacher Committed by Commit Bot Apr 11, 2017
Hide whitespace changes
Inline Side-by-side

Showing with 36 additions and 26 deletions

module-decoder.cc src/wasm/module-decoder.cc +31 -26

module-decoder-unittest.cc test/unittests/wasm/module-decoder-unittest.cc +5 -0

No files found.
--- a/src/wasm/module-decoder.cc
+++ b/src/wasm/module-decoder.cc
@@ -90,6 +90,24 @@ ValueType TypeOf(const WasmModule* module, const WasmInitExpr& expr) {
  }
 }

+// Reads a length-prefixed string, checking that it is within bounds. Returns
+// the offset of the string, and the length as an out parameter.
+uint32_t consume_string(Decoder& decoder, uint32_t* length, bool validate_utf8,
+                        const char* name) {
+  *length = decoder.consume_u32v("string length");
+  uint32_t offset = decoder.pc_offset();
+  const byte* string_start = decoder.pc();
+  // Consume bytes before validation to guarantee that the string is not oob.
+  if (*length > 0) {
+    decoder.consume_bytes(*length, name);
+    if (decoder.ok() && validate_utf8 &&
+        !unibrow::Utf8::Validate(string_start, *length)) {
+      decoder.errorf(string_start, "%s: no valid UTF-8 string", name);
+    }
+  }
+  return offset;
+}
+
 // An iterator over the sections in a WASM binary module.
 // Automatically skips all unknown sections.
 class WasmSectionIterator {
@@ -166,14 +184,14 @@ class WasmSectionIterator {

      if (section_code == kUnknownSectionCode) {
        // Check for the known "name" section.
-        uint32_t string_length = decoder_.consume_u32v("section name length");
-        const byte* section_name_start = decoder_.pc();
-        decoder_.consume_bytes(string_length, "section name");
+        uint32_t string_length;
+        uint32_t string_offset = wasm::consume_string(decoder_, &string_length,
+                                                      true, "section name");
        if (decoder_.failed() || decoder_.pc() > section_end_) {
-          TRACE("Section name of length %u couldn't be read\n", string_length);
          section_code_ = kUnknownSectionCode;
          return;
        }
+        const byte* section_name_start = decoder_.start() + string_offset;
        payload_start_ = decoder_.pc();

        TRACE("  +%d  section name        : \"%.*s\"\n",
@@ -316,9 +334,9 @@ class ModuleDecoder : public Decoder {
        WasmImport* import = &module->import_table.back();
        const byte* pos = pc_;
        import->module_name_offset =
-            consume_string(&import->module_name_length, true);
+            consume_string(&import->module_name_length, true, "module name");
        import->field_name_offset =
-            consume_string(&import->field_name_length, true);
+            consume_string(&import->field_name_length, true, "field name");

        import->kind = static_cast<WasmExternalKind>(consume_u8("import kind"));
        switch (import->kind) {
@@ -475,7 +493,8 @@ class ModuleDecoder : public Decoder {
        });
        WasmExport* exp = &module->export_table.back();

-        exp->name_offset = consume_string(&exp->name_length, true);
+        exp->name_offset =
+            consume_string(&exp->name_length, true, "field name");
        const byte* pos = pc();
        exp->kind = static_cast<WasmExternalKind>(consume_u8("export kind"));
        switch (exp->kind) {
@@ -667,7 +686,8 @@ class ModuleDecoder : public Decoder {
          for (; inner.ok() && functions_count > 0; --functions_count) {
            uint32_t function_index = inner.consume_u32v("function index");
            uint32_t name_length = 0;
-            uint32_t name_offset = consume_string(inner, &name_length, false);
+            uint32_t name_offset = wasm::consume_string(inner, &name_length,
+                                                        false, "function name");
            // Be lenient with errors in the name section: Ignore illegal
            // or out-of-order indexes and non-UTF8 names. You can even assign
            // to the same function multiple times (last valid one wins).
@@ -865,24 +885,9 @@ class ModuleDecoder : public Decoder {
    }
  }

-  uint32_t consume_string(uint32_t* length, bool validate_utf8) {
-    return consume_string(*this, length, validate_utf8);
-  }
-
-  // Reads a length-prefixed string, checking that it is within bounds. Returns
-  // the offset of the string, and the length as an out parameter.
-  uint32_t consume_string(Decoder& decoder, uint32_t* length,
-                          bool validate_utf8) {
-    *length = decoder.consume_u32v("string length");
-    uint32_t offset = decoder.pc_offset();
-    const byte* string_start = decoder.pc();
-    // Consume bytes before validation to guarantee that the string is not oob.
-    if (*length > 0) decoder.consume_bytes(*length, "string");
-    if (decoder.ok() && validate_utf8 &&
-        !unibrow::Utf8::Validate(string_start, *length)) {
-      decoder.error(string_start, "no valid UTF-8 string");
-    }
-    return offset;
+  uint32_t consume_string(uint32_t* length, bool validate_utf8,
+                          const char* name) {
+    return wasm::consume_string(*this, length, validate_utf8, name);
  }

  uint32_t consume_sig_index(WasmModule* module, FunctionSig** sig) {

--- a/test/unittests/wasm/module-decoder-unittest.cc
+++ b/test/unittests/wasm/module-decoder-unittest.cc
@@ -1556,6 +1556,11 @@ TEST_F(WasmModuleVerifyTest, Multiple_Named_Sections) {
  EXPECT_VERIFIES(data);
 }

+TEST_F(WasmModuleVerifyTest, Section_Name_No_UTF8) {
+  static const byte data[] = {SECTION(Unknown, 4), 1, 0xff, 17, 18};
+  EXPECT_FAILURE(data);
+}
+
 class WasmModuleCustomSectionTest : public TestWithIsolateAndZone {
 public:
  void CheckSections(const byte* module_start, const byte* module_end,