decoder.h 11.8 KB
Newer Older
1 2 3 4 5 6 7
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_WASM_DECODER_H_
#define V8_WASM_DECODER_H_

8 9
#include <memory>

jfb's avatar
jfb committed
10
#include "src/base/compiler-specific.h"
11 12
#include "src/flags.h"
#include "src/signature.h"
jfb's avatar
jfb committed
13
#include "src/utils.h"
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
#include "src/wasm/wasm-result.h"
#include "src/zone-containers.h"

namespace v8 {
namespace internal {
namespace wasm {

#if DEBUG
#define TRACE(...)                                    \
  do {                                                \
    if (FLAG_trace_wasm_decoder) PrintF(__VA_ARGS__); \
  } while (false)
#else
#define TRACE(...)
#endif

// A helper utility to decode bytes, integers, fields, varints, etc, from
// a buffer of bytes.
class Decoder {
 public:
  Decoder(const byte* start, const byte* end)
      : start_(start),
        pc_(start),
        limit_(end),
38
        end_(end),
39 40 41 42 43
        error_pc_(nullptr),
        error_pt_(nullptr) {}

  virtual ~Decoder() {}

44 45
  inline bool check(const byte* base, unsigned offset, unsigned length,
                    const char* msg) {
46 47
    DCHECK_GE(base, start_);
    if ((base + offset + length) > limit_) {
jfb's avatar
jfb committed
48
      error(base, base + offset, "%s", msg);
49 50 51 52 53 54
      return false;
    }
    return true;
  }

  // Reads a single 8-bit byte, reporting an error if out of bounds.
55
  inline uint8_t checked_read_u8(const byte* base, unsigned offset,
56 57 58 59 60
                                 const char* msg = "expected 1 byte") {
    return check(base, offset, 1, msg) ? base[offset] : 0;
  }

  // Reads 16-bit word, reporting an error if out of bounds.
61
  inline uint16_t checked_read_u16(const byte* base, unsigned offset,
62 63 64 65 66
                                   const char* msg = "expected 2 bytes") {
    return check(base, offset, 2, msg) ? read_u16(base + offset) : 0;
  }

  // Reads 32-bit word, reporting an error if out of bounds.
67
  inline uint32_t checked_read_u32(const byte* base, unsigned offset,
68 69 70 71 72
                                   const char* msg = "expected 4 bytes") {
    return check(base, offset, 4, msg) ? read_u32(base + offset) : 0;
  }

  // Reads 64-bit word, reporting an error if out of bounds.
73
  inline uint64_t checked_read_u64(const byte* base, unsigned offset,
74 75 76 77
                                   const char* msg = "expected 8 bytes") {
    return check(base, offset, 8, msg) ? read_u64(base + offset) : 0;
  }

78
  // Reads a variable-length unsigned integer (little endian).
79 80
  uint32_t checked_read_u32v(const byte* base, unsigned offset,
                             unsigned* length,
81
                             const char* msg = "expected LEB32") {
82
    return checked_read_leb<uint32_t, false>(base, offset, length, msg);
83 84
  }

85
  // Reads a variable-length signed integer (little endian).
86
  int32_t checked_read_i32v(const byte* base, unsigned offset, unsigned* length,
87
                            const char* msg = "expected SLEB32") {
88 89
    uint32_t result =
        checked_read_leb<uint32_t, true>(base, offset, length, msg);
90 91 92
    if (*length == 5) return bit_cast<int32_t>(result);
    if (*length > 0) {
      int shift = 32 - 7 * *length;
93
      // Perform sign extension.
94 95 96 97 98
      return bit_cast<int32_t>(result << shift) >> shift;
    }
    return 0;
  }

99
  // Reads a variable-length unsigned integer (little endian).
100 101
  uint64_t checked_read_u64v(const byte* base, unsigned offset,
                             unsigned* length,
102
                             const char* msg = "expected LEB64") {
103
    return checked_read_leb<uint64_t, false>(base, offset, length, msg);
104 105 106
  }

  // Reads a variable-length signed integer (little endian).
107
  int64_t checked_read_i64v(const byte* base, unsigned offset, unsigned* length,
108
                            const char* msg = "expected SLEB64") {
109 110
    uint64_t result =
        checked_read_leb<uint64_t, true>(base, offset, length, msg);
111 112 113 114 115 116 117 118 119
    if (*length == 10) return bit_cast<int64_t>(result);
    if (*length > 0) {
      int shift = 64 - 7 * *length;
      // Perform sign extension.
      return bit_cast<int64_t>(result << shift) >> shift;
    }
    return 0;
  }

120 121 122
  // Reads a single 16-bit unsigned integer (little endian).
  inline uint16_t read_u16(const byte* ptr) {
    DCHECK(ptr >= start_ && (ptr + 2) <= end_);
123
    return ReadLittleEndianValue<uint16_t>(ptr);
124 125 126 127 128
  }

  // Reads a single 32-bit unsigned integer (little endian).
  inline uint32_t read_u32(const byte* ptr) {
    DCHECK(ptr >= start_ && (ptr + 4) <= end_);
129
    return ReadLittleEndianValue<uint32_t>(ptr);
130 131 132 133 134
  }

  // Reads a single 64-bit unsigned integer (little endian).
  inline uint64_t read_u64(const byte* ptr) {
    DCHECK(ptr >= start_ && (ptr + 8) <= end_);
135
    return ReadLittleEndianValue<uint64_t>(ptr);
136 137
  }

138
  // Reads a 8-bit unsigned integer (byte) and advances {pc_}.
139
  uint8_t consume_u8(const char* name = nullptr) {
140 141 142 143 144 145 146
    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
          name ? name : "uint8_t");
    if (checkAvailable(1)) {
      byte val = *(pc_++);
      TRACE("%02x = %d\n", val, val);
      return val;
    }
147
    return traceOffEnd<uint8_t>();
148 149 150
  }

  // Reads a 16-bit unsigned integer (little endian) and advances {pc_}.
151
  uint16_t consume_u16(const char* name = nullptr) {
152 153 154
    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
          name ? name : "uint16_t");
    if (checkAvailable(2)) {
155
      uint16_t val = read_u16(pc_);
156 157 158 159
      TRACE("%02x %02x = %d\n", pc_[0], pc_[1], val);
      pc_ += 2;
      return val;
    }
160
    return traceOffEnd<uint16_t>();
161 162 163
  }

  // Reads a single 32-bit unsigned integer (little endian) and advances {pc_}.
164
  uint32_t consume_u32(const char* name = nullptr) {
165 166 167
    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
          name ? name : "uint32_t");
    if (checkAvailable(4)) {
168
      uint32_t val = read_u32(pc_);
169 170 171 172
      TRACE("%02x %02x %02x %02x = %u\n", pc_[0], pc_[1], pc_[2], pc_[3], val);
      pc_ += 4;
      return val;
    }
173
    return traceOffEnd<uint32_t>();
174 175 176
  }

  // Reads a LEB128 variable-length 32-bit integer and advances {pc_}.
177
  uint32_t consume_u32v(const char* name = nullptr) {
178 179
    TRACE("  +%d  %-20s: ", static_cast<int>(pc_ - start_),
          name ? name : "varint");
180 181 182 183
    if (checkAvailable(1)) {
      const byte* pos = pc_;
      const byte* end = pc_ + 5;
      if (end > limit_) end = limit_;
184

185 186 187 188 189 190 191 192 193 194
      uint32_t result = 0;
      int shift = 0;
      byte b = 0;
      while (pc_ < end) {
        b = *pc_++;
        TRACE("%02x ", b);
        result = result | ((b & 0x7F) << shift);
        if ((b & 0x80) == 0) break;
        shift += 7;
      }
195

196
      int length = static_cast<int>(pc_ - pos);
197 198
      if (pc_ == end && (b & 0x80)) {
        error(pc_ - 1, "varint too large");
199
      } else if (length == 0) {
200
        error(pc_, "varint of length 0");
201 202 203 204
      } else {
        TRACE("= %u\n", result);
      }
      return result;
205
    }
206
    return traceOffEnd<uint32_t>();
207 208
  }

209 210 211 212 213 214 215 216 217
  // Consume {size} bytes and send them to the bit bucket, advancing {pc_}.
  void consume_bytes(int size) {
    if (checkAvailable(size)) {
      pc_ += size;
    } else {
      pc_ = limit_;
    }
  }

218 219
  // Check that at least {size} bytes exist between {pc_} and {limit_}.
  bool checkAvailable(int size) {
220 221 222 223 224
    intptr_t pc_overflow_value = std::numeric_limits<intptr_t>::max() - size;
    if (size < 0 || (intptr_t)pc_ > pc_overflow_value) {
      error(pc_, nullptr, "reading %d bytes would underflow/overflow", size);
      return false;
    } else if (pc_ < start_ || limit_ < (pc_ + size)) {
225 226 227 228 229 230 231
      error(pc_, nullptr, "expected %d bytes, fell off end", size);
      return false;
    } else {
      return true;
    }
  }

jfb's avatar
jfb committed
232
  void error(const char* msg) { error(pc_, nullptr, "%s", msg); }
233

jfb's avatar
jfb committed
234
  void error(const byte* pc, const char* msg) { error(pc, nullptr, "%s", msg); }
235 236

  // Sets internal error state.
jfb's avatar
jfb committed
237 238
  void PRINTF_FORMAT(4, 5)
      error(const byte* pc, const byte* pt, const char* format, ...) {
239 240 241 242 243 244 245 246 247 248 249 250
    if (ok()) {
#if DEBUG
      if (FLAG_wasm_break_on_decoder_error) {
        base::OS::DebugBreak();
      }
#endif
      const int kMaxErrorMsg = 256;
      char* buffer = new char[kMaxErrorMsg];
      va_list arguments;
      va_start(arguments, format);
      base::OS::VSNPrintF(buffer, kMaxErrorMsg - 1, format, arguments);
      va_end(arguments);
251
      error_msg_.reset(buffer);
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
      error_pc_ = pc;
      error_pt_ = pt;
      onFirstError();
    }
  }

  // Behavior triggered on first error, overridden in subclasses.
  virtual void onFirstError() {}

  // Debugging helper to print bytes up to the end.
  template <typename T>
  T traceOffEnd() {
    T t = 0;
    for (const byte* ptr = pc_; ptr < limit_; ptr++) {
      TRACE("%02x ", *ptr);
    }
    TRACE("<end>\n");
    pc_ = limit_;
    return t;
  }

  // Converts the given value to a {Result}, copying the error if necessary.
  template <typename T>
  Result<T> toResult(T val) {
    Result<T> result;
    if (error_pc_) {
278
      TRACE("Result error: %s\n", error_msg_.get());
279 280 281 282
      result.error_code = kError;
      result.start = start_;
      result.error_pc = error_pc_;
      result.error_pt = error_pt_;
283
      // transfer ownership of the error to the result.
284
      result.error_msg.reset(error_msg_.release());
285 286 287
    } else {
      result.error_code = kSuccess;
    }
288
    result.val = std::move(val);
289 290 291 292 293 294 295 296
    return result;
  }

  // Resets the boundaries of this decoder.
  void Reset(const byte* start, const byte* end) {
    start_ = start;
    pc_ = start;
    limit_ = end;
297
    end_ = end;
298 299
    error_pc_ = nullptr;
    error_pt_ = nullptr;
300
    error_msg_.reset();
301 302 303
  }

  bool ok() const { return error_pc_ == nullptr; }
304
  bool failed() const { return !!error_msg_; }
305
  bool more() const { return pc_ < limit_; }
306

307 308
  const byte* start() { return start_; }
  const byte* pc() { return pc_; }
309
  uint32_t pc_offset() { return static_cast<uint32_t>(pc_ - start_); }
310

311 312 313 314
 protected:
  const byte* start_;
  const byte* pc_;
  const byte* limit_;
315
  const byte* end_;
316 317
  const byte* error_pc_;
  const byte* error_pt_;
318
  std::unique_ptr<char[]> error_msg_;
319 320

 private:
321
  template <typename IntType, bool is_signed>
322
  IntType checked_read_leb(const byte* base, unsigned offset, unsigned* length,
323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342
                           const char* msg) {
    if (!check(base, offset, 1, msg)) {
      *length = 0;
      return 0;
    }

    const int kMaxLength = (sizeof(IntType) * 8 + 6) / 7;
    const byte* ptr = base + offset;
    const byte* end = ptr + kMaxLength;
    if (end > limit_) end = limit_;
    int shift = 0;
    byte b = 0;
    IntType result = 0;
    while (ptr < end) {
      b = *ptr++;
      result = result | (static_cast<IntType>(b & 0x7F) << shift);
      if ((b & 0x80) == 0) break;
      shift += 7;
    }
    DCHECK_LE(ptr - (base + offset), kMaxLength);
343
    *length = static_cast<unsigned>(ptr - (base + offset));
344 345 346
    if (ptr == end) {
      // Check there are no bits set beyond the bitwidth of {IntType}.
      const int kExtraBits = (1 + kMaxLength * 7) - (sizeof(IntType) * 8);
347 348
      const byte kExtraBitsMask =
          static_cast<byte>((0xFF << (8 - kExtraBits)) & 0xFF);
349 350 351 352 353 354 355 356 357 358 359 360 361 362
      int extra_bits_value;
      if (is_signed) {
        // A signed-LEB128 must sign-extend the final byte, excluding its
        // most-signifcant bit. e.g. for a 32-bit LEB128:
        //   kExtraBits = 4
        //   kExtraBitsMask = 0xf0
        // If b is 0x0f, the value is negative, so extra_bits_value is 0x70.
        // If b is 0x03, the value is positive, so extra_bits_value is 0x00.
        extra_bits_value = (static_cast<int8_t>(b << kExtraBits) >> 8) &
                           kExtraBitsMask & ~0x80;
      } else {
        extra_bits_value = 0;
      }
      if (*length == kMaxLength && (b & kExtraBitsMask) != extra_bits_value) {
363 364 365 366
        error(base, ptr, "extra bits in varint");
        return 0;
      }
      if ((b & 0x80) != 0) {
jfb's avatar
jfb committed
367
        error(base, ptr, "%s", msg);
368 369 370 371 372
        return 0;
      }
    }
    return result;
  }
373 374 375 376 377 378 379 380
};

#undef TRACE
}  // namespace wasm
}  // namespace internal
}  // namespace v8

#endif  // V8_WASM_DECODER_H_