scanner.h 24.5 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5 6
// Features shared by parsing and pre-parsing scanners.

7 8
#ifndef V8_PARSING_SCANNER_H_
#define V8_PARSING_SCANNER_H_
9

10 11
#include <algorithm>

12
#include "src/allocation.h"
13
#include "src/base/logging.h"
14
#include "src/execution/message-template.h"
15
#include "src/globals.h"
16
#include "src/parsing/literal-buffer.h"
17
#include "src/parsing/token.h"
18
#include "src/pointer-with-payload.h"
19 20
#include "src/strings/char-predicates.h"
#include "src/strings/unicode.h"
21

22 23
namespace v8 {
namespace internal {
24

25 26
class AstRawString;
class AstValueFactory;
27 28
class ExternalOneByteString;
class ExternalTwoByteString;
29
class ParserRecorder;
30
class RuntimeCallStats;
31
class Zone;
32

33 34 35 36 37 38 39 40
// ---------------------------------------------------------------------
// Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
// A code unit is a 16 bit value representing either a 16 bit code point
// or one part of a surrogate pair that make a single 21 bit code point.
class Utf16CharacterStream {
 public:
  static const uc32 kEndOfInput = -1;

41
  virtual ~Utf16CharacterStream() = default;
42

43
  V8_INLINE void set_parser_error() {
44 45 46
    buffer_cursor_ = buffer_end_;
    has_parser_error_ = true;
  }
47 48
  V8_INLINE void reset_parser_error_flag() { has_parser_error_ = false; }
  V8_INLINE bool has_parser_error() const { return has_parser_error_; }
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
  inline uc32 Peek() {
    if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
      return static_cast<uc32>(*buffer_cursor_);
    } else if (ReadBlockChecked()) {
      return static_cast<uc32>(*buffer_cursor_);
    } else {
      return kEndOfInput;
    }
  }

  // Returns and advances past the next UTF-16 code unit in the input
  // stream. If there are no more code units it returns kEndOfInput.
  inline uc32 Advance() {
    uc32 result = Peek();
    buffer_cursor_++;
    return result;
  }

  // Returns and advances past the next UTF-16 code unit in the input stream
  // that meets the checks requirement. If there are no more code units it
  // returns kEndOfInput.
  template <typename FunctionType>
  V8_INLINE uc32 AdvanceUntil(FunctionType check) {
    while (true) {
74 75 76 77 78 79 80 81 82
      auto next_cursor_pos =
          std::find_if(buffer_cursor_, buffer_end_, [&check](uint16_t raw_c0_) {
            uc32 c0_ = static_cast<uc32>(raw_c0_);
            return check(c0_);
          });

      if (next_cursor_pos == buffer_end_) {
        buffer_cursor_ = buffer_end_;
        if (!ReadBlockChecked()) {
83
          buffer_cursor_++;
84
          return kEndOfInput;
85
        }
86 87 88
      } else {
        buffer_cursor_ = next_cursor_pos + 1;
        return static_cast<uc32>(*next_cursor_pos);
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
      }
    }
  }

  // Go back one by one character in the input stream.
  // This undoes the most recent Advance().
  inline void Back() {
    // The common case - if the previous character is within
    // buffer_start_ .. buffer_end_ will be handles locally.
    // Otherwise, a new block is requested.
    if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
      buffer_cursor_--;
    } else {
      ReadBlockAt(pos() - 1);
    }
  }

  inline size_t pos() const {
    return buffer_pos_ + (buffer_cursor_ - buffer_start_);
  }

  inline void Seek(size_t pos) {
    if (V8_LIKELY(pos >= buffer_pos_ &&
                  pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
      buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
    } else {
      ReadBlockAt(pos);
    }
  }

119 120 121 122 123
  // Returns true if the stream could access the V8 heap after construction.
  bool can_be_cloned_for_parallel_access() const {
    return can_be_cloned() && !can_access_heap();
  }

124 125 126 127 128 129 130 131
  // Returns true if the stream can be cloned with Clone.
  // TODO(rmcilroy): Remove this once ChunkedStreams can be cloned.
  virtual bool can_be_cloned() const = 0;

  // Clones the character stream to enable another independent scanner to access
  // the same underlying stream.
  virtual std::unique_ptr<Utf16CharacterStream> Clone() const = 0;

132
  // Returns true if the stream could access the V8 heap after construction.
133
  virtual bool can_access_heap() const = 0;
134

135 136 137 138 139
  RuntimeCallStats* runtime_call_stats() const { return runtime_call_stats_; }
  void set_runtime_call_stats(RuntimeCallStats* runtime_call_stats) {
    runtime_call_stats_ = runtime_call_stats;
  }

140 141 142 143 144 145 146 147 148 149 150 151 152
 protected:
  Utf16CharacterStream(const uint16_t* buffer_start,
                       const uint16_t* buffer_cursor,
                       const uint16_t* buffer_end, size_t buffer_pos)
      : buffer_start_(buffer_start),
        buffer_cursor_(buffer_cursor),
        buffer_end_(buffer_end),
        buffer_pos_(buffer_pos) {}
  Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}

  bool ReadBlockChecked() {
    size_t position = pos();
    USE(position);
153
    bool success = !has_parser_error() && ReadBlock();
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199

    // Post-conditions: 1, We should always be at the right position.
    //                  2, Cursor should be inside the buffer.
    //                  3, We should have more characters available iff success.
    DCHECK_EQ(pos(), position);
    DCHECK_LE(buffer_cursor_, buffer_end_);
    DCHECK_LE(buffer_start_, buffer_cursor_);
    DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
    return success;
  }

  void ReadBlockAt(size_t new_pos) {
    // The callers of this method (Back/Back2/Seek) should handle the easy
    // case (seeking within the current buffer), and we should only get here
    // if we actually require new data.
    // (This is really an efficiency check, not a correctness invariant.)
    DCHECK(new_pos < buffer_pos_ ||
           new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));

    // Change pos() to point to new_pos.
    buffer_pos_ = new_pos;
    buffer_cursor_ = buffer_start_;
    DCHECK_EQ(pos(), new_pos);
    ReadBlockChecked();
  }

  // Read more data, and update buffer_*_ to point to it.
  // Returns true if more data was available.
  //
  // ReadBlock() may modify any of the buffer_*_ members, but must sure that
  // the result of pos() remains unaffected.
  //
  // Examples:
  // - a stream could either fill a separate buffer. Then buffer_start_ and
  //   buffer_cursor_ would point to the beginning of the buffer, and
  //   buffer_pos would be the old pos().
  // - a stream with existing buffer chunks would set buffer_start_ and
  //   buffer_end_ to cover the full chunk, and then buffer_cursor_ would
  //   point into the middle of the buffer, while buffer_pos_ would describe
  //   the start of the buffer.
  virtual bool ReadBlock() = 0;

  const uint16_t* buffer_start_;
  const uint16_t* buffer_cursor_;
  const uint16_t* buffer_end_;
  size_t buffer_pos_;
200
  RuntimeCallStats* runtime_call_stats_;
201
  bool has_parser_error_ = false;
202 203
};

204
// ----------------------------------------------------------------------------
205
// JavaScript Scanner.
206

207
class V8_EXPORT_PRIVATE Scanner {
208
 public:
209
  // Scoped helper for a re-settable bookmark.
210
  class V8_EXPORT_PRIVATE BookmarkScope {
211
   public:
212
    explicit BookmarkScope(Scanner* scanner)
213 214
        : scanner_(scanner),
          bookmark_(kNoBookmark),
215
          had_parser_error_(scanner->has_parser_error()) {
216 217
      DCHECK_NOT_NULL(scanner_);
    }
218
    ~BookmarkScope() = default;
219

220
    void Set(size_t bookmark);
221
    void Apply();
222 223
    bool HasBeenSet() const;
    bool HasBeenApplied() const;
224 225

   private:
226 227 228
    static const size_t kNoBookmark;
    static const size_t kBookmarkWasApplied;

229
    Scanner* scanner_;
230
    size_t bookmark_;
231
    bool had_parser_error_;
232 233 234 235

    DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
  };

236 237
  // Sets the Scanner into an error state to stop further scanning and terminate
  // the parsing by only returning ILLEGAL tokens after that.
238
  V8_INLINE void set_parser_error() {
239
    if (!has_parser_error()) {
240 241
      c0_ = kEndOfInput;
      source_->set_parser_error();
242
      for (TokenDesc& desc : token_storage_) desc.token = Token::ILLEGAL;
243 244
    }
  }
245 246 247
  V8_INLINE void reset_parser_error_flag() {
    source_->reset_parser_error_flag();
  }
248
  V8_INLINE bool has_parser_error() const {
249 250
    return source_->has_parser_error();
  }
251

252
  // Representation of an interval of source positions.
253 254 255 256
  struct Location {
    Location(int b, int e) : beg_pos(b), end_pos(e) { }
    Location() : beg_pos(0), end_pos(0) { }

257
    int length() const { return end_pos - beg_pos; }
258
    bool IsValid() const { return IsInRange(beg_pos, 0, end_pos); }
259

260
    static Location invalid() { return Location(-1, 0); }
261 262 263 264 265

    int beg_pos;
    int end_pos;
  };

266 267
  // -1 is outside of the range of any real source code.
  static const int kNoOctalLocation = -1;
268
  static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
269

270
  explicit Scanner(Utf16CharacterStream* source, bool is_module);
271

272
  void Initialize();
273 274

  // Returns the next token and advances input.
275
  Token::Value Next();
littledan's avatar
littledan committed
276
  // Returns the token following peek()
277
  Token::Value PeekAhead();
278
  // Returns the current token again.
279
  Token::Value current_token() const { return current().token; }
280

281
  // Returns the location information for the current token
282
  // (the token last returned by Next()).
283
  const Location& location() const { return current().location; }
284

285
  // This error is specifically an invalid hex or unicode escape sequence.
286
  bool has_error() const { return scanner_error_ != MessageTemplate::kNone; }
287
  MessageTemplate error() const { return scanner_error_; }
288
  const Location& error_location() const { return scanner_error_location_; }
289

290
  bool has_invalid_template_escape() const {
291
    return current().invalid_template_escape_message != MessageTemplate::kNone;
292
  }
293
  MessageTemplate invalid_template_escape_message() const {
294
    DCHECK(has_invalid_template_escape());
295
    return current().invalid_template_escape_message;
296
  }
297 298 299 300 301 302

  void clear_invalid_template_escape_message() {
    DCHECK(has_invalid_template_escape());
    current_->invalid_template_escape_message = MessageTemplate::kNone;
  }

303 304
  Location invalid_template_escape_location() const {
    DCHECK(has_invalid_template_escape());
305
    return current().invalid_template_escape_location;
306 307
  }

308 309 310
  // Similar functions for the upcoming token.

  // One token look-ahead (past the token returned by Next()).
311
  Token::Value peek() const { return next().token; }
312

313
  const Location& peek_location() const { return next().location; }
314 315

  bool literal_contains_escapes() const {
316
    return LiteralContainsEscapes(current());
317
  }
318

319 320 321 322
  bool next_literal_contains_escapes() const {
    return LiteralContainsEscapes(next());
  }

323
  const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory) const;
324

325 326 327
  const AstRawString* NextSymbol(AstValueFactory* ast_value_factory) const;
  const AstRawString* CurrentRawSymbol(
      AstValueFactory* ast_value_factory) const;
328 329

  double DoubleValue();
330

331 332
  const char* CurrentLiteralAsCString(Zone* zone) const;

333 334
  inline bool CurrentMatches(Token::Value token) const {
    DCHECK(Token::IsKeyword(token));
335
    return current().token == token;
336
  }
337

338
  template <size_t N>
339 340
  bool NextLiteralExactlyEquals(const char (&s)[N]) {
    DCHECK(next().CanAccessLiteral());
341 342 343 344 345 346 347
    // The length of the token is used to make sure the literal equals without
    // taking escape sequences (e.g., "use \x73trict") or line continuations
    // (e.g., "use \(newline) strict") into account.
    if (!is_next_literal_one_byte()) return false;
    if (peek_location().length() != N + 1) return false;

    Vector<const uint8_t> next = next_literal_one_byte_string();
348
    const char* chars = reinterpret_cast<const char*>(next.begin());
349 350 351
    return next.length() == N - 1 && strncmp(s, chars, N - 1) == 0;
  }

352 353 354 355 356 357
  template <size_t N>
  bool CurrentLiteralEquals(const char (&s)[N]) {
    DCHECK(current().CanAccessLiteral());
    if (!is_literal_one_byte()) return false;

    Vector<const uint8_t> current = literal_one_byte_string();
358
    const char* chars = reinterpret_cast<const char*>(current.begin());
359 360 361
    return current.length() == N - 1 && strncmp(s, chars, N - 1) == 0;
  }

362 363
  // Returns the location of the last seen octal literal.
  Location octal_position() const { return octal_pos_; }
364 365 366 367
  void clear_octal_position() {
    octal_pos_ = Location::invalid();
    octal_message_ = MessageTemplate::kNone;
  }
368
  MessageTemplate octal_message() const { return octal_message_; }
369

verwaest's avatar
verwaest committed
370
  // Returns the value of the last smi that was scanned.
371
  uint32_t smi_value() const { return current().smi_value_; }
verwaest's avatar
verwaest committed
372

373 374 375 376 377 378 379 380
  // Seek forward to the given position.  This operation does not
  // work in general, for instance when there are pushed back
  // characters, but works for seeking forward until simple delimiter
  // tokens, which is what it is used for.
  void SeekForward(int pos);

  // Returns true if there was a line terminator before the peek'ed token,
  // possibly inside a multi-line comment.
381 382
  bool HasLineTerminatorBeforeNext() const {
    return next().after_line_terminator;
383 384
  }

385
  bool HasLineTerminatorAfterNext() {
386 387
    Token::Value ensure_next_next = PeekAhead();
    USE(ensure_next_next);
388
    return next_next().after_line_terminator;
389 390
  }

391 392
  // Scans the input as a regular expression pattern, next token must be /(=).
  // Returns true if a pattern is scanned.
393
  bool ScanRegExpPattern();
394
  // Scans the input as regular expression flags. Returns the flags on success.
395
  Maybe<RegExp::Flags> ScanRegExpFlags();
396

397
  // Scans the input as a template literal
398
  Token::Value ScanTemplateContinuation() {
399
    DCHECK_EQ(next().token, Token::RBRACE);
400
    DCHECK_EQ(source_pos() - 1, next().location.beg_pos);
401
    return ScanTemplateSpan();
402
  }
403

404 405
  Handle<String> SourceUrl(Isolate* isolate) const;
  Handle<String> SourceMappingUrl(Isolate* isolate) const;
406

407 408
  bool FoundHtmlComment() const { return found_html_comment_; }

409 410 411 412 413 414
  bool allow_harmony_numeric_separator() const {
    return allow_harmony_numeric_separator_;
  }
  void set_allow_harmony_numeric_separator(bool allow) {
    allow_harmony_numeric_separator_ = allow;
  }
415

416 417
  const Utf16CharacterStream* stream() const { return source_; }

418 419 420
  // If the next characters in the stream are "#!", the line is skipped.
  void SkipHashBang();

421
 private:
422 423 424 425 426
  // Scoped helper for saving & restoring scanner error state.
  // This is used for tagged template literals, in which normally forbidden
  // escape sequences are allowed.
  class ErrorState;

427 428
  // The current and look-ahead token.
  struct TokenDesc {
429 430 431 432
    Location location = {0, 0};
    LiteralBuffer literal_chars;
    LiteralBuffer raw_literal_chars;
    Token::Value token = Token::UNINITIALIZED;
433
    MessageTemplate invalid_template_escape_message = MessageTemplate::kNone;
434
    Location invalid_template_escape_location;
435 436
    uint32_t smi_value_ = 0;
    bool after_line_terminator = false;
437 438 439 440 441 442

#ifdef DEBUG
    bool CanAccessLiteral() const {
      return token == Token::PRIVATE_NAME || token == Token::ILLEGAL ||
             token == Token::UNINITIALIZED || token == Token::REGEXP_LITERAL ||
             IsInRange(token, Token::NUMBER, Token::STRING) ||
443
             Token::IsAnyIdentifier(token) || Token::IsKeyword(token) ||
444 445 446 447 448 449 450
             IsInRange(token, Token::TEMPLATE_SPAN, Token::TEMPLATE_TAIL);
    }
    bool CanAccessRawLiteral() const {
      return token == Token::ILLEGAL || token == Token::UNINITIALIZED ||
             IsInRange(token, Token::TEMPLATE_SPAN, Token::TEMPLATE_TAIL);
    }
#endif  // DEBUG
451 452
  };

453
  enum NumberKind {
454
    IMPLICIT_OCTAL,
455 456 457 458 459 460 461
    BINARY,
    OCTAL,
    HEX,
    DECIMAL,
    DECIMAL_WITH_LEADING_ZERO
  };

462 463 464 465 466 467 468 469
  inline bool IsValidBigIntKind(NumberKind kind) {
    return IsInRange(kind, BINARY, DECIMAL);
  }

  inline bool IsDecimalNumberKind(NumberKind kind) {
    return IsInRange(kind, DECIMAL, DECIMAL_WITH_LEADING_ZERO);
  }

470
  static const int kCharacterLookaheadBufferSize = 1;
471
  static const int kMaxAscii = 127;
472 473

  // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
474
  template <bool capture_raw>
475
  uc32 ScanOctalEscape(uc32 c, int length);
476

477 478 479 480 481
  // Call this after setting source_ to the input.
  void Init() {
    // Set c0_ (one character ahead)
    STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
    Advance();
482

483 484 485 486
    current_ = &token_storage_[0];
    next_ = &token_storage_[1];
    next_next_ = &token_storage_[2];

487 488 489
    found_html_comment_ = false;
    scanner_error_ = MessageTemplate::kNone;
  }
490

491
  void ReportScannerError(const Location& location, MessageTemplate error) {
492 493 494 495 496
    if (has_error()) return;
    scanner_error_ = error;
    scanner_error_location_ = location;
  }

497
  void ReportScannerError(int pos, MessageTemplate error) {
498 499 500 501 502
    if (has_error()) return;
    scanner_error_ = error;
    scanner_error_location_ = Location(pos, pos + 1);
  }

503 504 505
  // Seek to the next_ token at the given position.
  void SeekNext(size_t position);

506 507 508
  V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }

  V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); }
509

510
  V8_INLINE void AddRawLiteralChar(uc32 c) {
511
    next().raw_literal_chars.AddChar(c);
512 513
  }

514
  V8_INLINE void AddLiteralCharAdvance() {
515
    AddLiteralChar(c0_);
516
    Advance();
517 518 519
  }

  // Low-level scanning support.
520
  template <bool capture_raw = false>
521
  void Advance() {
522 523 524 525
    if (capture_raw) {
      AddRawLiteralChar(c0_);
    }
    c0_ = source_->Advance();
526 527
  }

528
  template <typename FunctionType>
529
  V8_INLINE void AdvanceUntil(FunctionType check) {
530
    c0_ = source_->AdvanceUntil(check);
531 532
  }

533
  bool CombineSurrogatePair() {
534
    DCHECK(!unibrow::Utf16::IsLeadSurrogate(kEndOfInput));
535
    if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
536
      uc32 c1 = source_->Advance();
537
      DCHECK(!unibrow::Utf16::IsTrailSurrogate(kEndOfInput));
538
      if (unibrow::Utf16::IsTrailSurrogate(c1)) {
539
        c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
540
        return true;
541
      }
542
      source_->Back();
543
    }
544
    return false;
545 546
  }

547
  void PushBack(uc32 ch) {
548 549
    DCHECK_LE(c0_, static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode));
    source_->Back();
550 551 552
    c0_ = ch;
  }

553
  uc32 Peek() const { return source_->Peek(); }
554

555
  inline Token::Value Select(Token::Value tok) {
556
    Advance();
557 558 559 560
    return tok;
  }

  inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
561
    Advance();
562
    if (c0_ == next) {
563
      Advance();
564
      return then;
565 566
    } else {
      return else_;
567 568
    }
  }
569 570
  // Returns the literal string, if any, for the current token (the
  // token last returned by Next()). The string is 0-terminated.
571 572 573
  // Literal strings are collected for identifiers, strings, numbers as well
  // as for template literals. For template literals we also collect the raw
  // form.
574 575
  // These functions only give the correct result if the literal was scanned
  // when a LiteralScope object is alive.
576 577 578 579 580 581
  //
  // Current usage of these functions is unfortunately a little undisciplined,
  // and is_literal_one_byte() + is_literal_one_byte_string() is also
  // requested for tokens that do not have a literal. Hence, we treat any
  // token as a one-byte literal. E.g. Token::FUNCTION pretends to have a
  // literal "function".
582
  Vector<const uint8_t> literal_one_byte_string() const {
583 584
    DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
    return current().literal_chars.one_byte_literal();
585
  }
586
  Vector<const uint16_t> literal_two_byte_string() const {
587
    DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
588
    return current().literal_chars.two_byte_literal();
589
  }
590
  bool is_literal_one_byte() const {
591 592
    DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
    return current().literal_chars.is_one_byte();
593 594 595
  }
  // Returns the literal string for the next token (the token that
  // would be returned if Next() were called).
596
  Vector<const uint8_t> next_literal_one_byte_string() const {
597
    DCHECK(next().CanAccessLiteral());
598
    return next().literal_chars.one_byte_literal();
599
  }
600
  Vector<const uint16_t> next_literal_two_byte_string() const {
601
    DCHECK(next().CanAccessLiteral());
602
    return next().literal_chars.two_byte_literal();
603
  }
604
  bool is_next_literal_one_byte() const {
605
    DCHECK(next().CanAccessLiteral());
606
    return next().literal_chars.is_one_byte();
607
  }
608
  Vector<const uint8_t> raw_literal_one_byte_string() const {
609
    DCHECK(current().CanAccessRawLiteral());
610
    return current().raw_literal_chars.one_byte_literal();
611
  }
612
  Vector<const uint16_t> raw_literal_two_byte_string() const {
613
    DCHECK(current().CanAccessRawLiteral());
614
    return current().raw_literal_chars.two_byte_literal();
615
  }
616
  bool is_raw_literal_one_byte() const {
617
    DCHECK(current().CanAccessRawLiteral());
618
    return current().raw_literal_chars.is_one_byte();
619 620
  }

621
  template <bool capture_raw, bool unicode = false>
622
  uc32 ScanHexNumber(int expected_length);
marja's avatar
marja committed
623 624 625
  // Scan a number of any length but not bigger than max_value. For example, the
  // number can be 000000001, so it's very long in characters but its value is
  // small.
626
  template <bool capture_raw>
627
  uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
628

629
  // Scans a single JavaScript token.
630
  V8_INLINE Token::Value ScanSingleToken();
631
  V8_INLINE void Scan();
632 633 634 635 636
  // Performance hack: pass through a pre-calculated "next()" value to avoid
  // having to re-calculate it in Scan. You'd think the compiler would be able
  // to hoist the next() calculation out of the inlined Scan method, but seems
  // that pointer aliasing analysis fails show that this is safe.
  V8_INLINE void Scan(TokenDesc* next_desc);
637

638
  V8_INLINE Token::Value SkipWhiteSpace();
639
  Token::Value SkipSingleHTMLComment();
640
  Token::Value SkipSingleLineComment();
641 642
  Token::Value SkipSourceURLComment();
  void TryToParseSourceURLComment();
643
  Token::Value SkipMultiLineComment();
644 645
  // Scans a possible HTML comment -- begins with '<!'.
  Token::Value ScanHtmlComment();
646

647 648
  bool ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
                                       bool is_check_first_digit);
649
  bool ScanDecimalDigits();
650
  // Optimized function to scan decimal number as Smi.
651 652 653 654 655 656
  bool ScanDecimalAsSmi(uint64_t* value);
  bool ScanDecimalAsSmiWithNumericSeparators(uint64_t* value);
  bool ScanHexDigits();
  bool ScanBinaryDigits();
  bool ScanSignedInteger();
  bool ScanOctalDigits();
657
  bool ScanImplicitOctalDigits(int start_pos, NumberKind* kind);
658

659
  Token::Value ScanNumber(bool seen_period);
660
  V8_INLINE Token::Value ScanIdentifierOrKeyword();
661
  V8_INLINE Token::Value ScanIdentifierOrKeywordInner();
662 663
  Token::Value ScanIdentifierOrKeywordInnerSlow(bool escaped,
                                                bool can_be_keyword);
664 665

  Token::Value ScanString();
666
  Token::Value ScanPrivateName();
667

668 669 670
  // Scans an escape-sequence which is part of a string and adds the
  // decoded character to the current literal. Returns true if a pattern
  // is scanned.
671
  template <bool capture_raw>
672
  bool ScanEscape();
673

674
  // Decodes a Unicode escape-sequence which is part of an identifier.
675 676
  // If the escape sequence cannot be decoded the result is kBadChar.
  uc32 ScanIdentifierUnicodeEscape();
marja's avatar
marja committed
677
  // Helper for the above functions.
678
  template <bool capture_raw>
marja's avatar
marja committed
679
  uc32 ScanUnicodeEscape();
680

681 682
  Token::Value ScanTemplateSpan();

683
  // Return the current source position.
684 685
  int source_pos() {
    return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
686 687
  }

688 689 690 691 692 693 694
  static bool LiteralContainsEscapes(const TokenDesc& token) {
    Location location = token.location;
    int source_length = (location.end_pos - location.beg_pos);
    if (token.token == Token::STRING) {
      // Subtract delimiters.
      source_length -= 2;
    }
695
    return token.literal_chars.length() != source_length;
696 697
  }

698 699 700 701
#ifdef DEBUG
  void SanityCheckTokenDesc(const TokenDesc&) const;
#endif

702
  TokenDesc& next() { return *next_; }
703

704 705 706
  const TokenDesc& current() const { return *current_; }
  const TokenDesc& next() const { return *next_; }
  const TokenDesc& next_next() const { return *next_next_; }
707

708 709 710
  TokenDesc* current_;    // desc for current token (as returned by Next())
  TokenDesc* next_;       // desc for next token (one token look-ahead)
  TokenDesc* next_next_;  // desc for the token after next (after PeakAhead())
711

712
  // Input stream. Must be initialized to an Utf16CharacterStream.
713
  Utf16CharacterStream* const source_;
714 715 716 717

  // One Unicode character look-ahead; c0_ < 0 at the end of the input.
  uc32 c0_;

718 719
  TokenDesc token_storage_[3];

720 721
  // Whether this scanner encountered an HTML comment.
  bool found_html_comment_;
722

723
  // Harmony flags to allow ESNext features.
724
  bool allow_harmony_numeric_separator_;
725

726 727
  const bool is_module_;

728 729 730 731 732 733
  // Values parsed from magic comments.
  LiteralBuffer source_url_;
  LiteralBuffer source_mapping_url_;

  // Last-seen positions of potentially problematic tokens.
  Location octal_pos_;
734
  MessageTemplate octal_message_;
735

736
  MessageTemplate scanner_error_;
737
  Location scanner_error_location_;
738 739
};

740 741
}  // namespace internal
}  // namespace v8
742

743
#endif  // V8_PARSING_SCANNER_H_