decoder.h 16.4 KB
Newer Older
1 2 3 4 5 6 7
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_WASM_DECODER_H_
#define V8_WASM_DECODER_H_

8
#include <cinttypes>
9
#include <cstdarg>
10 11
#include <memory>

jfb's avatar
jfb committed
12
#include "src/base/compiler-specific.h"
13
#include "src/base/memory.h"
14
#include "src/codegen/signature.h"
15
#include "src/flags/flags.h"
16 17
#include "src/utils/utils.h"
#include "src/utils/vector.h"
18
#include "src/wasm/wasm-opcodes.h"
19
#include "src/wasm/wasm-result.h"
20
#include "src/zone/zone-containers.h"
21 22 23 24 25 26 27 28 29

namespace v8 {
namespace internal {
namespace wasm {

#define TRACE(...)                                    \
  do {                                                \
    if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
  } while (false)
30 31 32 33
#define TRACE_IF(cond, ...)                                     \
  do {                                                          \
    if (FLAG_trace_wasm_decoder && (cond)) PrintF(__VA_ARGS__); \
  } while (false)
34

35 36
// A {DecodeResult} only stores the failure / success status, but no data.
using DecodeResult = VoidResult;
37

38 39 40 41
// A helper utility to decode bytes, integers, fields, varints, etc, from
// a buffer of bytes.
class Decoder {
 public:
42 43 44 45 46 47
  enum ValidateFlag : bool { kValidate = true, kNoValidate = false };

  enum AdvancePCFlag : bool { kAdvancePc = true, kNoAdvancePc = false };

  enum TraceFlag : bool { kTrace = true, kNoTrace = false };

48
  Decoder(const byte* start, const byte* end, uint32_t buffer_offset = 0)
49
      : Decoder(start, start, end, buffer_offset) {}
50
  explicit Decoder(const Vector<const byte> bytes, uint32_t buffer_offset = 0)
51
      : Decoder(bytes.begin(), bytes.begin() + bytes.length(), buffer_offset) {}
52 53
  Decoder(const byte* start, const byte* pc, const byte* end,
          uint32_t buffer_offset = 0)
54 55 56 57 58
      : start_(start), pc_(pc), end_(end), buffer_offset_(buffer_offset) {
    DCHECK_LE(start, pc);
    DCHECK_LE(pc, end);
    DCHECK_EQ(static_cast<uint32_t>(end - start), end - start);
  }
59

60
  virtual ~Decoder() = default;
61

62
  inline bool validate_size(const byte* pc, uint32_t length, const char* msg) {
63
    DCHECK_LE(start_, pc);
64
    if (V8_UNLIKELY(pc > end_ || length > static_cast<uint32_t>(end_ - pc))) {
65
      error(pc, msg);
66 67 68 69 70
      return false;
    }
    return true;
  }

71
  // Reads an 8-bit unsigned integer.
72
  template <ValidateFlag validate>
73
  inline uint8_t read_u8(const byte* pc, const char* msg = "expected 1 byte") {
74
    return read_little_endian<uint8_t, validate>(pc, msg);
75 76
  }

77
  // Reads a 16-bit unsigned integer (little endian).
78
  template <ValidateFlag validate>
79 80
  inline uint16_t read_u16(const byte* pc,
                           const char* msg = "expected 2 bytes") {
81
    return read_little_endian<uint16_t, validate>(pc, msg);
82 83
  }

84
  // Reads a 32-bit unsigned integer (little endian).
85
  template <ValidateFlag validate>
86 87
  inline uint32_t read_u32(const byte* pc,
                           const char* msg = "expected 4 bytes") {
88
    return read_little_endian<uint32_t, validate>(pc, msg);
89 90
  }

91
  // Reads a 64-bit unsigned integer (little endian).
92
  template <ValidateFlag validate>
93 94
  inline uint64_t read_u64(const byte* pc,
                           const char* msg = "expected 8 bytes") {
95
    return read_little_endian<uint64_t, validate>(pc, msg);
96 97
  }

98
  // Reads a variable-length unsigned integer (little endian).
99
  template <ValidateFlag validate>
100
  uint32_t read_u32v(const byte* pc, uint32_t* length,
101
                     const char* name = "LEB32") {
102 103
    return read_leb<uint32_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
                                                                name);
104 105
  }

106
  // Reads a variable-length signed integer (little endian).
107
  template <ValidateFlag validate>
108
  int32_t read_i32v(const byte* pc, uint32_t* length,
109
                    const char* name = "signed LEB32") {
110 111
    return read_leb<int32_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
                                                               name);
112 113
  }

114
  // Reads a variable-length unsigned integer (little endian).
115
  template <ValidateFlag validate>
116
  uint64_t read_u64v(const byte* pc, uint32_t* length,
117
                     const char* name = "LEB64") {
118 119
    return read_leb<uint64_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
                                                                name);
120 121 122
  }

  // Reads a variable-length signed integer (little endian).
123
  template <ValidateFlag validate>
124
  int64_t read_i64v(const byte* pc, uint32_t* length,
125
                    const char* name = "signed LEB64") {
126 127
    return read_leb<int64_t, validate, kNoAdvancePc, kNoTrace>(pc, length,
                                                               name);
128 129
  }

130 131 132 133 134 135 136 137
  // Reads a variable-length 33-bit signed integer (little endian).
  template <ValidateFlag validate>
  int64_t read_i33v(const byte* pc, uint32_t* length,
                    const char* name = "signed LEB33") {
    return read_leb<int64_t, validate, kNoAdvancePc, kNoTrace, 33>(pc, length,
                                                                   name);
  }

138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
  // Reads a prefixed-opcode, possibly with variable-length index.
  // The length param is set to the number of bytes this index is encoded with.
  // For most cases (non variable-length), it will be 1.
  template <ValidateFlag validate>
  WasmOpcode read_prefixed_opcode(const byte* pc, uint32_t* length = nullptr,
                                  const char* name = "prefixed opcode") {
    uint32_t unused_length;
    if (length == nullptr) {
      length = &unused_length;
    }
    uint32_t index;
    if (*pc == WasmOpcode::kSimdPrefix) {
      // SIMD opcodes can be multiple bytes (when LEB128 encoded).
      index = read_u32v<validate>(pc + 1, length, "prefixed opcode index");
      // Only support SIMD opcodes that go up to 0xFF (when decoded). Anything
      // bigger will need 1 more byte, and the '<< 8' below will be wrong.
154 155 156
      if (validate && V8_UNLIKELY(index > 0xff)) {
        errorf(pc, "Invalid SIMD opcode %d", index);
      }
157
    } else {
158 159 160 161 162 163 164 165 166
      if (!validate || validate_size(pc, 2, "expected 2 bytes")) {
        DCHECK(validate_size(pc, 2, "expected 2 bytes"));
        index = *(pc + 1);
        *length = 1;
      } else {
        // If kValidate and size validation fails.
        index = 0;
        *length = 0;
      }
167 168 169 170
    }
    return static_cast<WasmOpcode>((*pc) << 8 | index);
  }

171
  // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
172 173
  uint8_t consume_u8(const char* name = "uint8_t") {
    return consume_little_endian<uint8_t>(name);
174 175 176
  }

  // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
177 178
  uint16_t consume_u16(const char* name = "uint16_t") {
    return consume_little_endian<uint16_t>(name);
179 180 181
  }

  // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
182 183
  uint32_t consume_u32(const char* name = "uint32_t") {
    return consume_little_endian<uint32_t>(name);
184 185
  }

186
  // Reads a LEB128 variable-length unsigned 32-bit integer and advances {pc_}.
187
  uint32_t consume_u32v(const char* name = nullptr) {
188
    uint32_t length = 0;
189 190
    return read_leb<uint32_t, kValidate, kAdvancePc, kTrace>(pc_, &length,
                                                             name);
191 192
  }

193
  // Reads a LEB128 variable-length signed 32-bit integer and advances {pc_}.
194
  int32_t consume_i32v(const char* name = nullptr) {
195
    uint32_t length = 0;
196
    return read_leb<int32_t, kValidate, kAdvancePc, kTrace>(pc_, &length, name);
197 198 199 200
  }

  // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
  void consume_bytes(uint32_t size, const char* name = "skip") {
201
    // Only trace if the name is not null.
202
    TRACE_IF(name, "  +%u  %-20s: %u bytes\n", pc_offset(), name, size);
203 204 205
    if (checkAvailable(size)) {
      pc_ += size;
    } else {
206
      pc_ = end_;
207 208 209
    }
  }

210
  // Check that at least {size} bytes exist between {pc_} and {end_}.
211
  bool checkAvailable(uint32_t size) {
212
    DCHECK_LE(pc_, end_);
213
    if (V8_UNLIKELY(size > static_cast<uint32_t>(end_ - pc_))) {
214
      errorf(pc_, "expected %u bytes, fell off end", size);
215 216
      return false;
    }
217
    return true;
218 219
  }

220 221 222 223
  // Do not inline error methods. This has measurable impact on validation time,
  // see https://crbug.com/910432.
  void V8_NOINLINE error(const char* msg) { errorf(pc_offset(), "%s", msg); }
  void V8_NOINLINE error(const uint8_t* pc, const char* msg) {
224 225
    errorf(pc_offset(pc), "%s", msg);
  }
226 227 228
  void V8_NOINLINE error(uint32_t offset, const char* msg) {
    errorf(offset, "%s", msg);
  }
229

230 231
  void V8_NOINLINE PRINTF_FORMAT(3, 4)
      errorf(uint32_t offset, const char* format, ...) {
232 233 234 235 236
    va_list args;
    va_start(args, format);
    verrorf(offset, format, args);
    va_end(args);
  }
237

238 239
  void V8_NOINLINE PRINTF_FORMAT(3, 4)
      errorf(const uint8_t* pc, const char* format, ...) {
240 241 242 243
    va_list args;
    va_start(args, format);
    verrorf(pc_offset(pc), format, args);
    va_end(args);
244 245 246 247 248
  }

  // Behavior triggered on first error, overridden in subclasses.
  virtual void onFirstError() {}

249 250 251 252 253 254
  // Debugging helper to print a bytes range as hex bytes.
  void traceByteRange(const byte* start, const byte* end) {
    DCHECK_LE(start, end);
    for (const byte* p = start; p < end; ++p) TRACE("%02x ", *p);
  }

255
  // Debugging helper to print bytes up to the end.
256 257
  void traceOffEnd() {
    traceByteRange(pc_, end_);
258 259 260 261
    TRACE("<end>\n");
  }

  // Converts the given value to a {Result}, copying the error if necessary.
262 263
  template <typename T, typename U = typename std::remove_reference<T>::type>
  Result<U> toResult(T&& val) {
264
    if (failed()) {
265 266
      TRACE("Result error: %s\n", error_.message().c_str());
      return Result<U>{error_};
267
    }
268
    return Result<U>{std::forward<T>(val)};
269 270 271
  }

  // Resets the boundaries of this decoder.
272
  void Reset(const byte* start, const byte* end, uint32_t buffer_offset = 0) {
273 274
    DCHECK_LE(start, end);
    DCHECK_EQ(static_cast<uint32_t>(end - start), end - start);
275 276
    start_ = start;
    pc_ = start;
277
    end_ = end;
278
    buffer_offset_ = buffer_offset;
279
    error_ = {};
280 281
  }

282 283 284 285
  void Reset(Vector<const uint8_t> bytes, uint32_t buffer_offset = 0) {
    Reset(bytes.begin(), bytes.end(), buffer_offset);
  }

286
  bool ok() const { return error_.empty(); }
287
  bool failed() const { return !ok(); }
288
  bool more() const { return pc_ < end_; }
289
  const WasmError& error() const { return error_; }
290

291 292
  const byte* start() const { return start_; }
  const byte* pc() const { return pc_; }
293 294 295 296 297
  uint32_t V8_INLINE position() const {
    return static_cast<uint32_t>(pc_ - start_);
  }
  // This needs to be inlined for performance (see https://crbug.com/910432).
  uint32_t V8_INLINE pc_offset(const uint8_t* pc) const {
298 299 300
    DCHECK_LE(start_, pc);
    DCHECK_GE(kMaxUInt32 - buffer_offset_, pc - start_);
    return static_cast<uint32_t>(pc - start_) + buffer_offset_;
301
  }
302
  uint32_t pc_offset() const { return pc_offset(pc_); }
303 304 305 306 307 308 309
  uint32_t buffer_offset() const { return buffer_offset_; }
  // Takes an offset relative to the module start and returns an offset relative
  // to the current buffer of the decoder.
  uint32_t GetBufferRelativeOffset(uint32_t offset) const {
    DCHECK_LE(buffer_offset_, offset);
    return offset - buffer_offset_;
  }
310
  const byte* end() const { return end_; }
311
  void set_end(const byte* end) { end_ = end; }
312

313 314 315 316 317 318
  // Check if the byte at {offset} from the current pc equals {expected}.
  bool lookahead(int offset, byte expected) {
    DCHECK_LE(pc_, end_);
    return end_ - pc_ > offset && pc_[offset] == expected;
  }

319 320 321
 protected:
  const byte* start_;
  const byte* pc_;
322
  const byte* end_;
323 324
  // The offset of the current buffer in the module. Needed for streaming.
  uint32_t buffer_offset_;
325
  WasmError error_;
326 327

 private:
328 329 330 331 332 333 334
  void verrorf(uint32_t offset, const char* format, va_list args) {
    // Only report the first error.
    if (!ok()) return;
    constexpr int kMaxErrorMsg = 256;
    EmbeddedVector<char, kMaxErrorMsg> buffer;
    int len = VSNPrintF(buffer, format, args);
    CHECK_LT(0, len);
335
    error_ = {offset, {buffer.begin(), static_cast<size_t>(len)}};
336 337 338
    onFirstError();
  }

339
  template <typename IntType, bool validate>
340
  inline IntType read_little_endian(const byte* pc, const char* msg) {
341 342 343
    if (!validate) {
      DCHECK(validate_size(pc, sizeof(IntType), msg));
    } else if (!validate_size(pc, sizeof(IntType), msg)) {
344 345
      return IntType{0};
    }
346
    return base::ReadLittleEndianValue<IntType>(reinterpret_cast<Address>(pc));
347 348 349 350
  }

  template <typename IntType>
  inline IntType consume_little_endian(const char* name) {
351
    TRACE("  +%u  %-20s: ", pc_offset(), name);
352 353 354 355 356 357 358 359 360 361 362 363
    if (!checkAvailable(sizeof(IntType))) {
      traceOffEnd();
      pc_ = end_;
      return IntType{0};
    }
    IntType val = read_little_endian<IntType, false>(pc_, name);
    traceByteRange(pc_, pc_ + sizeof(IntType));
    TRACE("= %d\n", val);
    pc_ += sizeof(IntType);
    return val;
  }

364
  template <typename IntType, ValidateFlag validate, AdvancePCFlag advance_pc,
365
            TraceFlag trace, size_t size_in_bits = 8 * sizeof(IntType)>
366
  inline IntType read_leb(const byte* pc, uint32_t* length,
367
                          const char* name = "varint") {
368
    DCHECK_IMPLIES(advance_pc, pc == pc_);
369 370
    static_assert(size_in_bits <= 8 * sizeof(IntType),
                  "leb does not fit in type");
371
    TRACE_IF(trace, "  +%u  %-20s: ", pc_offset(), name);
372 373
    return read_leb_tail<IntType, validate, advance_pc, trace, size_in_bits, 0>(
        pc, length, name, 0);
374 375
  }

376
  template <typename IntType, ValidateFlag validate, AdvancePCFlag advance_pc,
377
            TraceFlag trace, size_t size_in_bits, int byte_index>
378 379 380
  IntType read_leb_tail(const byte* pc, uint32_t* length, const char* name,
                        IntType result) {
    constexpr bool is_signed = std::is_signed<IntType>::value;
381
    constexpr int kMaxLength = (size_in_bits + 6) / 7;
382 383 384
    static_assert(byte_index < kMaxLength, "invalid template instantiation");
    constexpr int shift = byte_index * 7;
    constexpr bool is_last_byte = byte_index == kMaxLength - 1;
385
    const bool at_end = validate && pc >= end_;
386
    byte b = 0;
387
    if (V8_LIKELY(!at_end)) {
Clemens Hammacher's avatar
Clemens Hammacher committed
388
      DCHECK_LT(pc, end_);
389
      b = *pc;
390
      TRACE_IF(trace, "%02x ", b);
391
      using Unsigned = typename std::make_unsigned<IntType>::type;
392 393
      result = result |
               (static_cast<Unsigned>(static_cast<IntType>(b) & 0x7f) << shift);
394 395 396 397 398 399
    }
    if (!is_last_byte && (b & 0x80)) {
      // Make sure that we only instantiate the template for valid byte indexes.
      // Compilers are not smart enough to figure out statically that the
      // following call is unreachable if is_last_byte is false.
      constexpr int next_byte_index = byte_index + (is_last_byte ? 0 : 1);
400
      return read_leb_tail<IntType, validate, advance_pc, trace, size_in_bits,
401 402 403 404
                           next_byte_index>(pc + 1, length, name, result);
    }
    if (advance_pc) pc_ = pc + (at_end ? 0 : 1);
    *length = byte_index + (at_end ? 0 : 1);
405
    if (validate && V8_UNLIKELY(at_end || (b & 0x80))) {
406
      TRACE_IF(trace, at_end ? "<end> " : "<length overflow> ");
407
      errorf(pc, "expected %s", name);
408 409 410
      result = 0;
    }
    if (is_last_byte) {
411 412 413 414 415 416
      // A signed-LEB128 must sign-extend the final byte, excluding its
      // most-significant bit; e.g. for a 32-bit LEB128:
      //   kExtraBits = 4  (== 32 - (5-1) * 7)
      // For unsigned values, the extra bits must be all zero.
      // For signed values, the extra bits *plus* the most significant bit must
      // either be 0, or all ones.
417
      constexpr int kExtraBits = size_in_bits - ((kMaxLength - 1) * 7);
418 419 420
      constexpr int kSignExtBits = kExtraBits - (is_signed ? 1 : 0);
      const byte checked_bits = b & (0xFF << kSignExtBits);
      constexpr byte kSignExtendedExtraBits = 0x7f & (0xFF << kSignExtBits);
421
      const bool valid_extra_bits =
422 423
          checked_bits == 0 ||
          (is_signed && checked_bits == kSignExtendedExtraBits);
424
      if (!validate) {
425
        DCHECK(valid_extra_bits);
426
      } else if (V8_UNLIKELY(!valid_extra_bits)) {
427
        error(pc, "extra bits in varint");
428
        result = 0;
429 430
      }
    }
431
    constexpr int sign_ext_shift =
432
        is_signed ? Max(0, int{8 * sizeof(IntType)} - shift - 7) : 0;
433 434
    // Perform sign extension.
    result = (result << sign_ext_shift) >> sign_ext_shift;
435 436 437 438
    if (trace && is_signed) {
      TRACE("= %" PRIi64 "\n", static_cast<int64_t>(result));
    } else if (trace) {
      TRACE("= %" PRIu64 "\n", static_cast<uint64_t>(result));
439
    }
440
    return result;
441
  }
442 443 444 445 446 447 448 449
};

#undef TRACE
}  // namespace wasm
}  // namespace internal
}  // namespace v8

#endif  // V8_WASM_DECODER_H_