json-parser.h 11.3 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

Yang Guo's avatar
Yang Guo committed
5 6
#ifndef V8_JSON_JSON_PARSER_H_
#define V8_JSON_JSON_PARSER_H_
7

8
#include "include/v8-callbacks.h"
9
#include "src/base/small-vector.h"
10
#include "src/base/strings.h"
11
#include "src/common/high-allocation-throughput-scope.h"
12
#include "src/execution/isolate.h"
13
#include "src/heap/factory.h"
14
#include "src/objects/objects.h"
15
#include "src/zone/zone-containers.h"
16 17 18 19

namespace v8 {
namespace internal {

20
enum ParseElementResult { kElementFound, kElementNotFound };
21

22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
class JsonString final {
 public:
  JsonString()
      : start_(0),
        length_(0),
        needs_conversion_(false),
        internalize_(false),
        has_escape_(false),
        is_index_(false) {}

  explicit JsonString(uint32_t index)
      : index_(index),
        length_(0),
        needs_conversion_(false),
        internalize_(false),
        has_escape_(false),
        is_index_(true) {}

  JsonString(int start, int length, bool needs_conversion,
             bool needs_internalization, bool has_escape)
      : start_(start),
        length_(length),
        needs_conversion_(needs_conversion),
        internalize_(needs_internalization ||
                     length_ <= kMaxInternalizedStringValueLength),
        has_escape_(has_escape),
        is_index_(false) {}

  bool internalize() const {
    DCHECK(!is_index_);
    return internalize_;
  }

  bool needs_conversion() const {
    DCHECK(!is_index_);
    return needs_conversion_;
  }

  bool has_escape() const {
    DCHECK(!is_index_);
    return has_escape_;
  }

  int start() const {
    DCHECK(!is_index_);
    return start_;
  }

  int length() const {
    DCHECK(!is_index_);
    return length_;
  }

  uint32_t index() const {
    DCHECK(is_index_);
    return index_;
  }

  bool is_index() const { return is_index_; }

 private:
83
  static const int kMaxInternalizedStringValueLength = 10;
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103

  union {
    const int start_;
    const uint32_t index_;
  };
  const int length_;
  const bool needs_conversion_ : 1;
  const bool internalize_ : 1;
  const bool has_escape_ : 1;
  const bool is_index_ : 1;
};

struct JsonProperty {
  JsonProperty() { UNREACHABLE(); }
  explicit JsonProperty(const JsonString& string) : string(string) {}

  JsonString string;
  Handle<Object> value;
};

104
class JsonParseInternalizer {
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121
 public:
  static MaybeHandle<Object> Internalize(Isolate* isolate,
                                         Handle<Object> object,
                                         Handle<Object> reviver);

 private:
  JsonParseInternalizer(Isolate* isolate, Handle<JSReceiver> reviver)
      : isolate_(isolate), reviver_(reviver) {}

  MaybeHandle<Object> InternalizeJsonProperty(Handle<JSReceiver> holder,
                                              Handle<String> key);

  bool RecurseAndApply(Handle<JSReceiver> holder, Handle<String> name);

  Isolate* isolate_;
  Handle<JSReceiver> reviver_;
};
122

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
enum class JsonToken : uint8_t {
  NUMBER,
  STRING,
  LBRACE,
  RBRACE,
  LBRACK,
  RBRACK,
  TRUE_LITERAL,
  FALSE_LITERAL,
  NULL_LITERAL,
  WHITESPACE,
  COLON,
  COMMA,
  ILLEGAL,
  EOS
};

140
// A simple json parser.
141 142
template <typename Char>
class JsonParser final {
143
 public:
144 145 146
  using SeqString = typename CharTraits<Char>::String;
  using SeqExternalString = typename CharTraits<Char>::ExternalString;

147 148
  V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Parse(
      Isolate* isolate, Handle<String> source, Handle<Object> reviver) {
149 150
    HighAllocationThroughputScope high_throughput_scope(
        V8::GetCurrentPlatform());
151 152 153 154 155 156 157
    Handle<Object> result;
    ASSIGN_RETURN_ON_EXCEPTION(isolate, result,
                               JsonParser(isolate, source).ParseJson(), Object);
    if (reviver->IsCallable()) {
      return JsonParseInternalizer::Internalize(isolate, result, reviver);
    }
    return result;
158 159
  }

160 161 162
  static constexpr base::uc32 kEndOfString = static_cast<base::uc32>(-1);
  static constexpr base::uc32 kInvalidUnicodeCharacter =
      static_cast<base::uc32>(-1);
163 164

 private:
165 166
  template <typename T>
  using SmallVector = base::SmallVector<T, 16>;
167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
  struct JsonContinuation {
    enum Type : uint8_t { kReturn, kObjectProperty, kArrayElement };
    JsonContinuation(Isolate* isolate, Type type, size_t index)
        : scope(isolate),
          type_(type),
          index(static_cast<uint32_t>(index)),
          max_index(0),
          elements(0) {}

    Type type() const { return static_cast<Type>(type_); }
    void set_type(Type type) { type_ = static_cast<uint8_t>(type); }

    HandleScope scope;
    // Unfortunately GCC doesn't like packing Type in two bits.
    uint32_t type_ : 2;
    uint32_t index : 30;
    uint32_t max_index;
    uint32_t elements;
  };
186

187
  JsonParser(Isolate* isolate, Handle<String> source);
188
  ~JsonParser();
189

190
  // Parse a string containing a single JSON value.
191
  MaybeHandle<Object> ParseJson();
192

193 194
  void advance() { ++cursor_; }

195
  base::uc32 CurrentCharacter() {
196 197 198 199
    if (V8_UNLIKELY(is_at_end())) return kEndOfString;
    return *cursor_;
  }

200
  base::uc32 NextCharacter() {
201 202 203 204 205 206 207 208 209 210 211 212 213
    advance();
    return CurrentCharacter();
  }

  void AdvanceToNonDecimal();

  V8_INLINE JsonToken peek() const { return next_; }

  void Consume(JsonToken token) {
    DCHECK_EQ(peek(), token);
    advance();
  }

214 215
  void Expect(JsonToken token,
              base::Optional<MessageTemplate> errorMessage = base::nullopt) {
216 217 218
    if (V8_LIKELY(peek() == token)) {
      advance();
    } else {
219 220
      errorMessage ? ReportUnexpectedToken(peek(), errorMessage.value())
                   : ReportUnexpectedToken(peek());
221 222 223
    }
  }

224 225 226
  void ExpectNext(
      JsonToken token,
      base::Optional<MessageTemplate> errorMessage = base::nullopt) {
227
    SkipWhitespace();
228
    errorMessage ? Expect(token, errorMessage.value()) : Expect(token);
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
  }

  bool Check(JsonToken token) {
    SkipWhitespace();
    if (next_ != token) return false;
    advance();
    return true;
  }

  template <size_t N>
  void ScanLiteral(const char (&s)[N]) {
    DCHECK(!is_at_end());
    // There's at least 1 character, we always consume a character and compare
    // the next character. The first character was compared before we jumped
    // to ScanLiteral.
244
    static_assert(N > 2);
245 246
    size_t remaining = static_cast<size_t>(end_ - cursor_);
    if (V8_LIKELY(remaining >= N - 1 &&
247
                  CompareCharsEqual(s + 1, cursor_ + 1, N - 2))) {
248 249 250 251 252
      cursor_ += N - 1;
      return;
    }

    cursor_++;
253
    for (size_t i = 0; i < std::min(N - 2, remaining - 1); i++) {
254 255 256 257 258 259 260 261 262 263
      if (*(s + 1 + i) != *cursor_) {
        ReportUnexpectedCharacter(*cursor_);
        return;
      }
      cursor_++;
    }

    DCHECK(is_at_end());
    ReportUnexpectedToken(JsonToken::EOS);
  }
264

265 266 267
  // The JSON lexical grammar is specified in the ECMAScript 5 standard,
  // section 15.12.1.1. The only allowed whitespace characters between tokens
  // are tab, carriage-return, newline and space.
268
  void SkipWhitespace();
269 270 271 272 273

  // A JSON string (production JSONString) is subset of valid JavaScript string
  // literals. The string must only be double-quoted (not single-quoted), and
  // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
  // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid.
274 275
  JsonString ScanJsonString(bool needs_internalization);
  JsonString ScanJsonPropertyKey(JsonContinuation* cont);
276
  base::uc32 ScanUnicodeCharacter();
277 278
  Handle<String> MakeString(const JsonString& string,
                            Handle<String> hint = Handle<String>());
279

280 281 282
  template <typename SinkChar>
  void DecodeString(SinkChar* sink, int start, int length);

283 284 285 286
  template <typename SinkSeqString>
  Handle<String> DecodeString(const JsonString& string,
                              Handle<SinkSeqString> intermediate,
                              Handle<String> hint);
287 288 289 290 291 292 293

  // A JSON number (production JSONNumber) is a subset of the valid JavaScript
  // decimal number literals.
  // It includes an optional minus sign, must have at least one
  // digit before and after a decimal point, may not have prefixed zeros (unless
  // the integer part is zero), and may include an exponent part (e.g., "e-10").
  // Hexadecimal and octal numbers are not allowed.
294
  Handle<Object> ParseJsonNumber();
295 296 297 298

  // Parse a single JSON value from input (grammar production JSONValue).
  // A JSON value is either a (double-quoted) string literal, a number literal,
  // one of "true", "false", or "null", or an object or array literal.
299 300 301 302
  MaybeHandle<Object> ParseJsonValue();

  Handle<Object> BuildJsonObject(
      const JsonContinuation& cont,
303
      const SmallVector<JsonProperty>& property_stack, Handle<Map> feedback);
304 305
  Handle<Object> BuildJsonArray(
      const JsonContinuation& cont,
306
      const SmallVector<Handle<Object>>& element_stack);
307

308 309 310 311
  static const int kMaxContextCharacters = 10;
  static const int kMinOriginalSourceLengthForContext =
      (kMaxContextCharacters * 2) + 1;

312
  // Mark that a parsing error has happened at the current character.
313
  void ReportUnexpectedCharacter(base::uc32 c);
314 315 316 317 318 319
  bool IsSpecialString();
  MessageTemplate GetErrorMessageWithEllipses(Handle<Object>& arg,
                                              Handle<Object>& arg2, int pos);
  MessageTemplate LookUpErrorMessageForJsonToken(JsonToken token,
                                                 Handle<Object>& arg,
                                                 Handle<Object>& arg2, int pos);
320
  // Mark that a parsing error has happened at the current token.
321 322 323
  void ReportUnexpectedToken(
      JsonToken token,
      base::Optional<MessageTemplate> errorMessage = base::nullopt);
324 325

  inline Isolate* isolate() { return isolate_; }
326
  inline Factory* factory() { return isolate_->factory(); }
327
  inline Handle<JSFunction> object_constructor() { return object_constructor_; }
328

329
  static const int kInitialSpecialStringLength = 32;
330

331
  static void UpdatePointersCallback(void* parser) {
332 333 334 335
    reinterpret_cast<JsonParser<Char>*>(parser)->UpdatePointers();
  }

  void UpdatePointers() {
336
    DisallowGarbageCollection no_gc;
337 338
    const Char* chars = Handle<SeqString>::cast(source_)->GetChars(no_gc);
    if (chars_ != chars) {
339 340
      size_t position = cursor_ - chars_;
      size_t length = end_ - chars_;
341
      chars_ = chars;
342 343
      cursor_ = chars_ + position;
      end_ = chars_ + length;
344 345 346
    }
  }

347
 private:
348
  static const bool kIsOneByte = sizeof(Char) == 1;
349 350 351 352 353 354 355

  bool is_at_end() const {
    DCHECK_LE(cursor_, end_);
    return cursor_ == end_;
  }

  int position() const { return static_cast<int>(cursor_ - chars_); }
356

357
  Isolate* isolate_;
358
  const uint64_t hash_seed_;
359 360 361
  JsonToken next_;
  // Indicates whether the bytes underneath source_ can relocate during GC.
  bool chars_may_relocate_;
362
  Handle<JSFunction> object_constructor_;
363
  const Handle<String> original_source_;
364 365 366
  Handle<String> source_;

  // Cached pointer to the raw chars in source. In case source is on-heap, we
367 368
  // register an UpdatePointers callback. For this reason, chars_, cursor_ and
  // end_ should never be locally cached across a possible allocation. The scope
369
  // in which we cache chars has to be guarded by a DisallowGarbageCollection
370 371 372
  // scope.
  const Char* cursor_;
  const Char* end_;
373
  const Char* chars_;
374 375
};

376
// Explicit instantiation declarations.
377 378
extern template class JsonParser<uint8_t>;
extern template class JsonParser<uint16_t>;
379

380 381
}  // namespace internal
}  // namespace v8
382

Yang Guo's avatar
Yang Guo committed
383
#endif  // V8_JSON_JSON_PARSER_H_