scanner.cc 57.2 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5
// Features shared by parsing and pre-parsing scanners.
6

7
#include "src/parsing/scanner.h"
8

9 10
#include <stdint.h>

11 12
#include <cmath>

13
#include "src/ast/ast-value-factory.h"
14 15
#include "src/char-predicates-inl.h"
#include "src/conversions-inl.h"
16
#include "src/objects/bigint.h"
17
#include "src/parsing/duplicate-finder.h"  // For Scanner::FindSymbol
18
#include "src/unicode-cache-inl.h"
19

20 21
namespace v8 {
namespace internal {
22

23
class Scanner::ErrorState {
24 25 26 27 28 29 30 31
 public:
  ErrorState(MessageTemplate::Template* message_stack,
             Scanner::Location* location_stack)
      : message_stack_(message_stack),
        old_message_(*message_stack),
        location_stack_(location_stack),
        old_location_(*location_stack) {
    *message_stack_ = MessageTemplate::kNone;
32
    *location_stack_ = Location::invalid();
33 34 35 36 37 38 39
  }

  ~ErrorState() {
    *message_stack_ = old_message_;
    *location_stack_ = old_location_;
  }

40
  void MoveErrorTo(TokenDesc* dest) {
41 42 43
    if (*message_stack_ == MessageTemplate::kNone) {
      return;
    }
44 45 46
    if (dest->invalid_template_escape_message == MessageTemplate::kNone) {
      dest->invalid_template_escape_message = *message_stack_;
      dest->invalid_template_escape_location = *location_stack_;
47 48
    }
    *message_stack_ = MessageTemplate::kNone;
49
    *location_stack_ = Location::invalid();
50 51 52 53 54 55 56 57 58
  }

 private:
  MessageTemplate::Template* const message_stack_;
  MessageTemplate::Template const old_message_;
  Scanner::Location* const location_stack_;
  Scanner::Location const old_location_;
};

59 60 61
// ----------------------------------------------------------------------------
// Scanner::LiteralBuffer

62
Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
63 64 65 66 67 68
  if (is_one_byte()) {
    return isolate->factory()->InternalizeOneByteString(one_byte_literal());
  }
  return isolate->factory()->InternalizeTwoByteString(two_byte_literal());
}

69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
int Scanner::LiteralBuffer::NewCapacity(int min_capacity) {
  int capacity = Max(min_capacity, backing_store_.length());
  int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
  return new_capacity;
}

void Scanner::LiteralBuffer::ExpandBuffer() {
  Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
  MemCopy(new_store.start(), backing_store_.start(), position_);
  backing_store_.Dispose();
  backing_store_ = new_store;
}

void Scanner::LiteralBuffer::ConvertToTwoByte() {
  DCHECK(is_one_byte_);
  Vector<byte> new_store;
  int new_content_size = position_ * kUC16Size;
  if (new_content_size >= backing_store_.length()) {
    // Ensure room for all currently read code units as UC16 as well
    // as the code unit about to be stored.
    new_store = Vector<byte>::New(NewCapacity(new_content_size));
  } else {
    new_store = backing_store_;
  }
  uint8_t* src = backing_store_.start();
  uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
  for (int i = position_ - 1; i >= 0; i--) {
    dst[i] = src[i];
  }
  if (new_store.start() != backing_store_.start()) {
    backing_store_.Dispose();
    backing_store_ = new_store;
  }
  position_ = new_content_size;
  is_one_byte_ = false;
}

void Scanner::LiteralBuffer::AddCharSlow(uc32 code_unit) {
  if (position_ >= backing_store_.length()) ExpandBuffer();
  if (is_one_byte_) {
    if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
      backing_store_[position_] = static_cast<byte>(code_unit);
      position_ += kOneByteSize;
      return;
    }
    ConvertToTwoByte();
  }
  if (code_unit <=
      static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
    *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
    position_ += kUC16Size;
  } else {
    *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
        unibrow::Utf16::LeadSurrogate(code_unit);
    position_ += kUC16Size;
    if (position_ >= backing_store_.length()) ExpandBuffer();
    *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
        unibrow::Utf16::TrailSurrogate(code_unit);
    position_ += kUC16Size;
  }
}

131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163
// ----------------------------------------------------------------------------
// Scanner::BookmarkScope

const size_t Scanner::BookmarkScope::kBookmarkAtFirstPos =
    std::numeric_limits<size_t>::max() - 2;
const size_t Scanner::BookmarkScope::kNoBookmark =
    std::numeric_limits<size_t>::max() - 1;
const size_t Scanner::BookmarkScope::kBookmarkWasApplied =
    std::numeric_limits<size_t>::max();

void Scanner::BookmarkScope::Set() {
  DCHECK_EQ(bookmark_, kNoBookmark);
  DCHECK_EQ(scanner_->next_next_.token, Token::UNINITIALIZED);

  // The first token is a bit special, since current_ will still be
  // uninitialized. In this case, store kBookmarkAtFirstPos and special-case it
  // when
  // applying the bookmark.
  DCHECK_IMPLIES(
      scanner_->current_.token == Token::UNINITIALIZED,
      scanner_->current_.location.beg_pos == scanner_->next_.location.beg_pos);
  bookmark_ = (scanner_->current_.token == Token::UNINITIALIZED)
                  ? kBookmarkAtFirstPos
                  : scanner_->location().beg_pos;
}

void Scanner::BookmarkScope::Apply() {
  DCHECK(HasBeenSet());  // Caller hasn't called SetBookmark.
  if (bookmark_ == kBookmarkAtFirstPos) {
    scanner_->SeekNext(0);
  } else {
    scanner_->SeekNext(bookmark_);
    scanner_->Next();
164
    DCHECK_EQ(scanner_->location().beg_pos, static_cast<int>(bookmark_));
165 166 167 168 169 170 171 172 173 174 175
  }
  bookmark_ = kBookmarkWasApplied;
}

bool Scanner::BookmarkScope::HasBeenSet() {
  return bookmark_ != kNoBookmark && bookmark_ != kBookmarkWasApplied;
}

bool Scanner::BookmarkScope::HasBeenApplied() {
  return bookmark_ == kBookmarkWasApplied;
}
176

177
// ----------------------------------------------------------------------------
178
// Scanner
179

180
Scanner::Scanner(UnicodeCache* unicode_cache)
181
    : unicode_cache_(unicode_cache),
182
      octal_pos_(Location::invalid()),
183
      octal_message_(MessageTemplate::kNone),
184
      found_html_comment_(false),
185 186
      allow_harmony_bigint_(false),
      allow_harmony_numeric_separator_(false) {}
187

188
void Scanner::Initialize(Utf16CharacterStream* source, bool is_module) {
189
  DCHECK_NOT_NULL(source);
190
  source_ = source;
191
  is_module_ = is_module;
192 193 194 195 196
  // Need to capture identifiers in order to recognize "get" and "set"
  // in object literals.
  Init();
  has_line_terminator_before_next_ = true;
  Scan();
197 198
}

199
template <bool capture_raw, bool unicode>
200
uc32 Scanner::ScanHexNumber(int expected_length) {
201
  DCHECK_LE(expected_length, 4);  // prevent overflow
202

203
  int begin = source_pos() - 2;
204 205 206 207
  uc32 x = 0;
  for (int i = 0; i < expected_length; i++) {
    int d = HexValue(c0_);
    if (d < 0) {
208 209 210 211
      ReportScannerError(Location(begin, begin + expected_length + 2),
                         unicode
                             ? MessageTemplate::kInvalidUnicodeEscapeSequence
                             : MessageTemplate::kInvalidHexEscapeSequence);
212 213 214
      return -1;
    }
    x = x * 16 + d;
215
    Advance<capture_raw>();
216 217 218
  }

  return x;
219
}
lrn@chromium.org's avatar
lrn@chromium.org committed
220

221
template <bool capture_raw>
222
uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
marja's avatar
marja committed
223 224
  uc32 x = 0;
  int d = HexValue(c0_);
225 226
  if (d < 0) return -1;

marja's avatar
marja committed
227 228
  while (d >= 0) {
    x = x * 16 + d;
229 230 231 232 233
    if (x > max_value) {
      ReportScannerError(Location(beg_pos, source_pos() + 1),
                         MessageTemplate::kUndefinedUnicodeCodePoint);
      return -1;
    }
234
    Advance<capture_raw>();
marja's avatar
marja committed
235 236
    d = HexValue(c0_);
  }
237

marja's avatar
marja committed
238 239 240 241
  return x;
}


242 243 244
// Ensure that tokens can be stored in a byte.
STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);

245 246
// Table of one-character tokens, by character (0x00..0x7F only).
// clang-format off
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
static const byte one_char_tokens[] = {
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::LPAREN,       // 0x28
  Token::RPAREN,       // 0x29
  Token::ILLEGAL,
  Token::ILLEGAL,
292
  Token::COMMA,        // 0x2C
293 294 295 296 297 298 299 300 301 302 303 304 305
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
306 307
  Token::COLON,        // 0x3A
  Token::SEMICOLON,    // 0x3B
308 309 310
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
311
  Token::CONDITIONAL,  // 0x3F
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
339
  Token::LBRACK,     // 0x5B
340
  Token::ILLEGAL,
341
  Token::RBRACK,     // 0x5D
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
  Token::ILLEGAL,
371
  Token::LBRACE,       // 0x7B
372
  Token::ILLEGAL,
373 374
  Token::RBRACE,       // 0x7D
  Token::BIT_NOT,      // 0x7E
375 376
  Token::ILLEGAL
};
377
// clang-format on
378

379
Token::Value Scanner::Next() {
380 381 382 383
  if (next_.token == Token::EOS) {
    next_.location.beg_pos = current_.location.beg_pos;
    next_.location.end_pos = current_.location.end_pos;
  }
384
  current_ = next_;
littledan's avatar
littledan committed
385 386 387
  if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {
    next_ = next_next_;
    next_next_.token = Token::UNINITIALIZED;
388
    next_next_.contextual_token = Token::UNINITIALIZED;
389
    has_line_terminator_before_next_ = has_line_terminator_after_next_;
littledan's avatar
littledan committed
390 391
    return current_.token;
  }
392 393 394 395 396 397 398
  has_line_terminator_before_next_ = false;
  has_multiline_comment_before_next_ = false;
  Scan();
  return current_.token;
}


littledan's avatar
littledan committed
399
Token::Value Scanner::PeekAhead() {
400 401 402
  DCHECK(next_.token != Token::DIV);
  DCHECK(next_.token != Token::ASSIGN_DIV);

littledan's avatar
littledan committed
403 404 405 406
  if (next_next_.token != Token::UNINITIALIZED) {
    return next_next_.token;
  }
  TokenDesc prev = current_;
407 408
  bool has_line_terminator_before_next =
      has_line_terminator_before_next_ || has_multiline_comment_before_next_;
littledan's avatar
littledan committed
409
  Next();
410 411 412
  has_line_terminator_after_next_ =
      has_line_terminator_before_next_ || has_multiline_comment_before_next_;
  has_line_terminator_before_next_ = has_line_terminator_before_next;
littledan's avatar
littledan committed
413 414 415 416 417 418 419 420
  Token::Value ret = next_.token;
  next_next_ = next_;
  next_ = current_;
  current_ = prev;
  return ret;
}


421
Token::Value Scanner::SkipWhiteSpace() {
422 423 424
  int start_position = source_pos();

  while (true) {
425
    while (true) {
426 427
      // We won't skip behind the end of input.
      DCHECK(!unicode_cache_->IsWhiteSpace(kEndOfInput));
428

429 430
      // Advance as long as character is a WhiteSpace or LineTerminator.
      // Remember if the latter is the case.
431
      if (unibrow::IsLineTerminator(c0_)) {
432
        has_line_terminator_before_next_ = true;
433
      } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
434
        break;
435 436 437 438 439 440 441 442
      }
      Advance();
    }

    // If there is an HTML comment end '-->' at the beginning of a
    // line (with only whitespace in front of it), we treat the rest
    // of the line as a comment. This is in line with the way
    // SpiderMonkey handles it.
443 444 445 446
    if (c0_ != '-' || !has_line_terminator_before_next_) break;

    Advance();
    if (c0_ != '-') {
447
      PushBack('-');  // undo Advance()
448 449 450 451 452 453 454
      break;
    }

    Advance();
    if (c0_ != '>') {
      PushBack2('-', '-');  // undo 2x Advance();
      break;
455
    }
456 457

    // Treat the rest of the line as a comment.
458 459 460 461
    Token::Value token = SkipSingleHTMLComment();
    if (token == Token::ILLEGAL) {
      return token;
    }
462
  }
463

464
  // Return whether or not we skipped any characters.
465 466 467 468 469 470 471 472 473 474 475 476 477
  if (source_pos() == start_position) {
    return Token::ILLEGAL;
  }

  return Token::WHITESPACE;
}

Token::Value Scanner::SkipSingleHTMLComment() {
  if (is_module_) {
    ReportScannerError(source_pos(), MessageTemplate::kHtmlCommentInModule);
    return Token::ILLEGAL;
  }
  return SkipSingleLineComment();
478
}
lrn@chromium.org's avatar
lrn@chromium.org committed
479

480
Token::Value Scanner::SkipSingleLineComment() {
481
  Advance();
482

483 484 485 486 487
  // The line terminator at the end of the line is not considered
  // to be part of the single-line comment; it is recognized
  // separately by the lexical grammar and becomes part of the
  // stream of input elements for the syntactic grammar (see
  // ECMA-262, section 7.4).
488
  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
489
    Advance();
490
  }
491 492 493 494 495

  return Token::WHITESPACE;
}


496 497
Token::Value Scanner::SkipSourceURLComment() {
  TryToParseSourceURLComment();
498
  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
499 500 501 502 503 504 505 506
    Advance();
  }

  return Token::WHITESPACE;
}


void Scanner::TryToParseSourceURLComment() {
507
  // Magic comments are of the form: //[#@]\s<name>=\s*<value>\s*.* and this
508
  // function will just return if it cannot parse a magic comment.
509 510
  DCHECK(!unicode_cache_->IsWhiteSpaceOrLineTerminator(kEndOfInput));
  if (!unicode_cache_->IsWhiteSpace(c0_)) return;
511 512
  Advance();
  LiteralBuffer name;
513

514 515
  while (c0_ != kEndOfInput &&
         !unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
516 517 518 519 520 521
    name.AddChar(c0_);
    Advance();
  }
  if (!name.is_one_byte()) return;
  Vector<const uint8_t> name_literal = name.one_byte_literal();
  LiteralBuffer* value;
522
  if (name_literal == STATIC_CHAR_VECTOR("sourceURL")) {
523
    value = &source_url_;
524
  } else if (name_literal == STATIC_CHAR_VECTOR("sourceMappingURL")) {
525 526 527 528 529 530 531 532
    value = &source_mapping_url_;
  } else {
    return;
  }
  if (c0_ != '=')
    return;
  Advance();
  value->Reset();
533
  while (unicode_cache_->IsWhiteSpace(c0_)) {
534 535
    Advance();
  }
536
  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
537 538 539 540 541 542 543 544 545 546 547 548
    // Disallowed characters.
    if (c0_ == '"' || c0_ == '\'') {
      value->Reset();
      return;
    }
    if (unicode_cache_->IsWhiteSpace(c0_)) {
      break;
    }
    value->AddChar(c0_);
    Advance();
  }
  // Allow whitespace at the end.
549
  while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
550 551 552 553 554 555 556 557 558
    if (!unicode_cache_->IsWhiteSpace(c0_)) {
      value->Reset();
      break;
    }
    Advance();
  }
}


559
Token::Value Scanner::SkipMultiLineComment() {
560
  DCHECK_EQ(c0_, '*');
561 562
  Advance();

563
  while (c0_ != kEndOfInput) {
564 565
    uc32 ch = c0_;
    Advance();
566 567
    DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
    if (unibrow::IsLineTerminator(ch)) {
568 569 570 571 572 573 574 575 576 577 578
      // Following ECMA-262, section 7.4, a comment containing
      // a newline will make the comment count as a line-terminator.
      has_multiline_comment_before_next_ = true;
    }
    // If we have reached the end of the multi-line comment, we
    // consume the '/' and insert a whitespace. This way all
    // multi-line comments are treated as whitespace.
    if (ch == '*' && c0_ == '/') {
      c0_ = ' ';
      return Token::WHITESPACE;
    }
579
  }
580 581 582

  // Unterminated multi-line comment.
  return Token::ILLEGAL;
583 584
}

585
Token::Value Scanner::ScanHtmlComment() {
586
  // Check for <!-- comments.
587
  DCHECK_EQ(c0_, '!');
588
  Advance();
589 590 591 592 593 594 595 596 597
  if (c0_ != '-') {
    PushBack('!');  // undo Advance()
    return Token::LT;
  }

  Advance();
  if (c0_ != '-') {
    PushBack2('-', '!');  // undo 2x Advance()
    return Token::LT;
598 599
  }

600
  found_html_comment_ = true;
601
  return SkipSingleHTMLComment();
602
}
603

604
void Scanner::Scan() {
605 606
  next_.literal_chars = nullptr;
  next_.raw_literal_chars = nullptr;
607
  next_.invalid_template_escape_message = MessageTemplate::kNone;
608

609 610
  Token::Value token;
  do {
611 612 613 614 615 616 617 618 619 620 621 622 623
    if (static_cast<unsigned>(c0_) <= 0x7F) {
      Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
      if (token != Token::ILLEGAL) {
        int pos = source_pos();
        next_.token = token;
        next_.contextual_token = Token::UNINITIALIZED;
        next_.location.beg_pos = pos;
        next_.location.end_pos = pos + 1;
        Advance();
        return;
      }
    }

624 625 626 627
    // Remember the position of the next token
    next_.location.beg_pos = source_pos();

    switch (c0_) {
628 629
      case '"':
      case '\'':
630 631 632 633 634 635 636 637 638 639
        token = ScanString();
        break;

      case '<':
        // < <= << <<= <!--
        Advance();
        if (c0_ == '=') {
          token = Select(Token::LTE);
        } else if (c0_ == '<') {
          token = Select('=', Token::ASSIGN_SHL, Token::SHL);
640
        } else if (c0_ == '!') {
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
          token = ScanHtmlComment();
        } else {
          token = Token::LT;
        }
        break;

      case '>':
        // > >= >> >>= >>> >>>=
        Advance();
        if (c0_ == '=') {
          token = Select(Token::GTE);
        } else if (c0_ == '>') {
          // >> >>= >>> >>>=
          Advance();
          if (c0_ == '=') {
            token = Select(Token::ASSIGN_SAR);
          } else if (c0_ == '>') {
            token = Select('=', Token::ASSIGN_SHR, Token::SHR);
          } else {
            token = Token::SAR;
          }
        } else {
          token = Token::GT;
        }
        break;

      case '=':
668
        // = == === =>
669 670 671
        Advance();
        if (c0_ == '=') {
          token = Select('=', Token::EQ_STRICT, Token::EQ);
672 673
        } else if (c0_ == '>') {
          token = Select(Token::ARROW);
674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
        } else {
          token = Token::ASSIGN;
        }
        break;

      case '!':
        // ! != !==
        Advance();
        if (c0_ == '=') {
          token = Select('=', Token::NE_STRICT, Token::NE);
        } else {
          token = Token::NOT;
        }
        break;

      case '+':
        // + ++ +=
        Advance();
        if (c0_ == '+') {
          token = Select(Token::INC);
        } else if (c0_ == '=') {
          token = Select(Token::ASSIGN_ADD);
        } else {
          token = Token::ADD;
        }
        break;

      case '-':
        // - -- --> -=
        Advance();
        if (c0_ == '-') {
          Advance();
706
          if (c0_ == '>' && HasAnyLineTerminatorBeforeNext()) {
707 708
            // For compatibility with SpiderMonkey, we skip lines that
            // start with an HTML comment end '-->'.
709
            token = SkipSingleHTMLComment();
710 711 712 713 714 715 716 717 718 719 720 721
          } else {
            token = Token::DEC;
          }
        } else if (c0_ == '=') {
          token = Select(Token::ASSIGN_SUB);
        } else {
          token = Token::SUB;
        }
        break;

      case '*':
        // * *=
722
        Advance();
723
        if (c0_ == '*') {
724 725 726 727 728 729
          token = Select('=', Token::ASSIGN_EXP, Token::EXP);
        } else if (c0_ == '=') {
          token = Select(Token::ASSIGN_MUL);
        } else {
          token = Token::MUL;
        }
730 731 732 733 734 735 736 737 738 739 740
        break;

      case '%':
        // % %=
        token = Select('=', Token::ASSIGN_MOD, Token::MOD);
        break;

      case '/':
        // /  // /* /=
        Advance();
        if (c0_ == '/') {
741
          Advance();
742
          if (c0_ == '#' || c0_ == '@') {
743 744 745 746 747 748
            Advance();
            token = SkipSourceURLComment();
          } else {
            PushBack(c0_);
            token = SkipSingleLineComment();
          }
749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
        } else if (c0_ == '*') {
          token = SkipMultiLineComment();
        } else if (c0_ == '=') {
          token = Select(Token::ASSIGN_DIV);
        } else {
          token = Token::DIV;
        }
        break;

      case '&':
        // & && &=
        Advance();
        if (c0_ == '&') {
          token = Select(Token::AND);
        } else if (c0_ == '=') {
          token = Select(Token::ASSIGN_BIT_AND);
        } else {
          token = Token::BIT_AND;
        }
        break;

      case '|':
        // | || |=
        Advance();
        if (c0_ == '|') {
          token = Select(Token::OR);
        } else if (c0_ == '=') {
          token = Select(Token::ASSIGN_BIT_OR);
        } else {
          token = Token::BIT_OR;
        }
        break;

      case '^':
        // ^ ^=
        token = Select('=', Token::ASSIGN_BIT_XOR, Token::BIT_XOR);
        break;

      case '.':
        // . Number
        Advance();
        if (IsDecimalDigit(c0_)) {
          token = ScanNumber(true);
        } else {
          token = Token::PERIOD;
794 795 796 797 798 799 800 801 802
          if (c0_ == '.') {
            Advance();
            if (c0_ == '.') {
              Advance();
              token = Token::ELLIPSIS;
            } else {
              PushBack('.');
            }
          }
803 804 805
        }
        break;

806
      case '`':
807 808
        token = ScanTemplateStart();
        break;
809

810 811 812 813
      case '#':
        token = ScanPrivateName();
        break;

814
      default:
815 816 817
        if (unicode_cache_->IsIdentifierStart(c0_) ||
            (CombineSurrogatePair() &&
             unicode_cache_->IsIdentifierStart(c0_))) {
818 819 820
          token = ScanIdentifierOrKeyword();
        } else if (IsDecimalDigit(c0_)) {
          token = ScanNumber(false);
821 822
        } else if (c0_ == kEndOfInput) {
          token = Token::EOS;
823
        } else {
824
          token = SkipWhiteSpace();
825
          if (token == Token::ILLEGAL) Advance();
826 827 828 829 830 831 832 833 834
        }
        break;
    }

    // Continue scanning for tokens as long as we're just skipping
    // whitespace.
  } while (token == Token::WHITESPACE);

  next_.location.end_pos = source_pos();
835 836 837 838 839 840 841
  if (Token::IsContextualKeyword(token)) {
    next_.token = Token::IDENTIFIER;
    next_.contextual_token = token;
  } else {
    next_.token = token;
    next_.contextual_token = Token::UNINITIALIZED;
  }
842 843 844 845 846 847

#ifdef DEBUG
  SanityCheckTokenDesc(current_);
  SanityCheckTokenDesc(next_);
  SanityCheckTokenDesc(next_next_);
#endif
848 849
}

850 851 852 853 854 855 856 857
#ifdef DEBUG
void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
  // Most tokens should not have literal_chars or even raw_literal chars.
  // The rules are:
  // - UNINITIALIZED: we don't care.
  // - TEMPLATE_*: need both literal + raw literal chars.
  // - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.
  // - all others: should have neither.
858 859
  // Furthermore, only TEMPLATE_* tokens can have a
  // invalid_template_escape_message.
860 861 862 863 864 865 866 867 868 869 870 871 872 873 874

  switch (token.token) {
    case Token::UNINITIALIZED:
      // token.literal_chars & other members might be garbage. That's ok.
      break;
    case Token::TEMPLATE_SPAN:
    case Token::TEMPLATE_TAIL:
      DCHECK_NOT_NULL(token.raw_literal_chars);
      DCHECK_NOT_NULL(token.literal_chars);
      break;
    case Token::ESCAPED_KEYWORD:
    case Token::ESCAPED_STRICT_RESERVED_WORD:
    case Token::FUTURE_STRICT_RESERVED_WORD:
    case Token::IDENTIFIER:
    case Token::NUMBER:
875
    case Token::BIGINT:
876 877 878
    case Token::REGEXP_LITERAL:
    case Token::SMI:
    case Token::STRING:
879
    case Token::PRIVATE_NAME:
880 881
      DCHECK_NOT_NULL(token.literal_chars);
      DCHECK_NULL(token.raw_literal_chars);
882
      DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
883 884 885 886
      break;
    default:
      DCHECK_NULL(token.literal_chars);
      DCHECK_NULL(token.raw_literal_chars);
887
      DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
888 889
      break;
  }
890 891 892 893 894 895 896

  DCHECK_IMPLIES(token.token != Token::IDENTIFIER,
                 token.contextual_token == Token::UNINITIALIZED);
  DCHECK_IMPLIES(token.contextual_token != Token::UNINITIALIZED,
                 token.token == Token::IDENTIFIER &&
                     Token::IsContextualKeyword(token.contextual_token));
  DCHECK(!Token::IsContextualKeyword(token.token));
897 898
}
#endif  // DEBUG
899

900
void Scanner::SeekForward(int pos) {
901 902 903 904
  // After this call, we will have the token at the given position as
  // the "next" token. The "current" token will be invalid.
  if (pos == next_.location.beg_pos) return;
  int current_pos = source_pos();
905
  DCHECK_EQ(next_.location.end_pos, current_pos);
906
  // Positions inside the lookahead token aren't supported.
907
  DCHECK(pos >= current_pos);
908
  if (pos != current_pos) {
909
    source_->Seek(pos);
910 911 912 913 914 915 916 917
    Advance();
    // This function is only called to seek to the location
    // of the end of a function (at the "}" token). It doesn't matter
    // whether there was a line terminator in the part we skip.
    has_line_terminator_before_next_ = false;
    has_multiline_comment_before_next_ = false;
  }
  Scan();
918 919 920
}


921
template <bool capture_raw, bool in_template_literal>
922
bool Scanner::ScanEscape() {
923
  uc32 c = c0_;
924
  Advance<capture_raw>();
925 926

  // Skip escaped newlines.
927 928
  DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
  if (!in_template_literal && unibrow::IsLineTerminator(c)) {
929
    // Allow escaped CR+LF newlines in multiline string literals.
930
    if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
931
    return true;
932
  }
933

934 935 936 937 938 939 940 941 942 943
  switch (c) {
    case '\'':  // fall through
    case '"' :  // fall through
    case '\\': break;
    case 'b' : c = '\b'; break;
    case 'f' : c = '\f'; break;
    case 'n' : c = '\n'; break;
    case 'r' : c = '\r'; break;
    case 't' : c = '\t'; break;
    case 'u' : {
944
      c = ScanUnicodeEscape<capture_raw>();
945
      if (c < 0) return false;
946 947
      break;
    }
948 949 950 951
    case 'v':
      c = '\v';
      break;
    case 'x': {
952
      c = ScanHexNumber<capture_raw>(2);
953
      if (c < 0) return false;
954 955
      break;
    }
956
    case '0':  // Fall through.
957 958 959 960 961 962
    case '1':  // fall through
    case '2':  // fall through
    case '3':  // fall through
    case '4':  // fall through
    case '5':  // fall through
    case '6':  // fall through
963
    case '7':
964
      c = ScanOctalEscape<capture_raw>(c, 2, in_template_literal);
965
      break;
966
  }
967

968
  // Other escaped characters are interpreted as their non-escaped version.
969
  AddLiteralChar(c);
970
  return true;
971 972
}

973
template <bool capture_raw>
974
uc32 Scanner::ScanOctalEscape(uc32 c, int length, bool in_template_literal) {
975 976 977 978 979 980 981 982
  uc32 x = c - '0';
  int i = 0;
  for (; i < length; i++) {
    int d = c0_ - '0';
    if (d < 0 || d > 7) break;
    int nx = x * 8 + d;
    if (nx >= 256) break;
    x = nx;
983
    Advance<capture_raw>();
984 985 986 987 988 989
  }
  // Anything except '\0' is an octal escape sequence, illegal in strict mode.
  // Remember the position of octal escape sequences so that an error
  // can be reported later (in strict mode).
  // We don't report the error immediately, because the octal escape can
  // occur before the "use strict" directive.
990
  if (c != '0' || i > 0 || c0_ == '8' || c0_ == '9') {
991
    octal_pos_ = Location(source_pos() - i - 1, source_pos() - 1);
992 993 994
    octal_message_ = in_template_literal
                         ? MessageTemplate::kTemplateOctalLiteral
                         : MessageTemplate::kStrictOctalEscape;
995
  }
996
  return x;
997 998 999
}


1000
Token::Value Scanner::ScanString() {
1001
  uc32 quote = c0_;
1002
  Advance();  // consume quote
1003

1004
  LiteralScope literal(this);
verwaest's avatar
verwaest committed
1005 1006 1007
  while (true) {
    if (c0_ == quote) {
      literal.Complete();
1008
      Advance();
verwaest's avatar
verwaest committed
1009 1010
      return Token::STRING;
    }
1011 1012 1013 1014 1015 1016
    if (c0_ == kEndOfInput || unibrow::IsStringLiteralLineTerminator(c0_)) {
      return Token::ILLEGAL;
    }
    if (c0_ == '\\') {
      Advance();
      // TODO(verwaest): Check whether we can remove the additional check.
1017
      if (c0_ == kEndOfInput || !ScanEscape<false, false>()) {
1018 1019
        return Token::ILLEGAL;
      }
1020
      continue;
1021
    }
1022
    AddLiteralCharAdvance();
1023 1024
  }
}
1025

1026 1027 1028 1029 1030 1031 1032 1033 1034 1035
Token::Value Scanner::ScanPrivateName() {
  if (!allow_harmony_private_fields()) {
    ReportScannerError(source_pos(),
                       MessageTemplate::kInvalidOrUnexpectedToken);
    return Token::ILLEGAL;
  }

  LiteralScope literal(this);
  DCHECK_EQ(c0_, '#');
  AddLiteralCharAdvance();
1036 1037
  DCHECK(!unicode_cache_->IsIdentifierStart(kEndOfInput));
  if (!unicode_cache_->IsIdentifierStart(c0_)) {
1038 1039 1040 1041 1042 1043 1044 1045 1046
    PushBack(c0_);
    ReportScannerError(source_pos(),
                       MessageTemplate::kInvalidOrUnexpectedToken);
    return Token::ILLEGAL;
  }

  Token::Value token = ScanIdentifierOrKeywordInner(&literal);
  return token == Token::ILLEGAL ? Token::ILLEGAL : Token::PRIVATE_NAME;
}
1047

1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061
Token::Value Scanner::ScanTemplateSpan() {
  // When scanning a TemplateSpan, we are looking for the following construct:
  // TEMPLATE_SPAN ::
  //     ` LiteralChars* ${
  //   | } LiteralChars* ${
  //
  // TEMPLATE_TAIL ::
  //     ` LiteralChars* `
  //   | } LiteralChar* `
  //
  // A TEMPLATE_SPAN should always be followed by an Expression, while a
  // TEMPLATE_TAIL terminates a TemplateLiteral and does not need to be
  // followed by an Expression.

1062 1063 1064 1065 1066 1067
  // These scoped helpers save and restore the original error state, so that we
  // can specially treat invalid escape sequences in templates (which are
  // handled by the parser).
  ErrorState scanner_error_state(&scanner_error_, &scanner_error_location_);
  ErrorState octal_error_state(&octal_message_, &octal_pos_);

1068
  Token::Value result = Token::TEMPLATE_SPAN;
1069 1070 1071
  LiteralScope literal(this);
  StartRawLiteral();
  const bool capture_raw = true;
1072
  const bool in_template_literal = true;
1073 1074
  while (true) {
    uc32 c = c0_;
1075
    Advance<capture_raw>();
1076 1077
    if (c == '`') {
      result = Token::TEMPLATE_TAIL;
1078
      ReduceRawLiteralLength(1);
1079 1080
      break;
    } else if (c == '$' && c0_ == '{') {
1081
      Advance<capture_raw>();  // Consume '{'
1082
      ReduceRawLiteralLength(2);
1083 1084
      break;
    } else if (c == '\\') {
1085 1086
      DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
      if (unibrow::IsLineTerminator(c0_)) {
1087 1088 1089
        // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
        // code unit sequence.
        uc32 lastChar = c0_;
1090
        Advance<capture_raw>();
1091 1092 1093
        if (lastChar == '\r') {
          ReduceRawLiteralLength(1);  // Remove \r
          if (c0_ == '\n') {
1094
            Advance<capture_raw>();  // Adds \n
1095 1096 1097 1098
          } else {
            AddRawLiteralChar('\n');
          }
        }
1099 1100 1101 1102 1103 1104
      } else {
        bool success = ScanEscape<capture_raw, in_template_literal>();
        USE(success);
        DCHECK_EQ(!success, has_error());
        // For templates, invalid escape sequence checking is handled in the
        // parser.
1105 1106
        scanner_error_state.MoveErrorTo(&next_);
        octal_error_state.MoveErrorTo(&next_);
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
      }
    } else if (c < 0) {
      // Unterminated template literal
      PushBack(c);
      break;
    } else {
      // The TRV of LineTerminatorSequence :: <CR> is the CV 0x000A.
      // The TRV of LineTerminatorSequence :: <CR><LF> is the sequence
      // consisting of the CV 0x000A.
      if (c == '\r') {
1117 1118
        ReduceRawLiteralLength(1);  // Remove \r
        if (c0_ == '\n') {
1119
          Advance<capture_raw>();  // Adds \n
1120 1121 1122
        } else {
          AddRawLiteralChar('\n');
        }
1123 1124 1125 1126 1127 1128 1129 1130
        c = '\n';
      }
      AddLiteralChar(c);
    }
  }
  literal.Complete();
  next_.location.end_pos = source_pos();
  next_.token = result;
1131
  next_.contextual_token = Token::UNINITIALIZED;
1132

1133 1134 1135 1136
  return result;
}


1137
Token::Value Scanner::ScanTemplateStart() {
1138 1139
  DCHECK_EQ(next_next_.token, Token::UNINITIALIZED);
  DCHECK_EQ(c0_, '`');
1140 1141 1142 1143 1144
  next_.location.beg_pos = source_pos();
  Advance();  // Consume `
  return ScanTemplateSpan();
}

1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
  Handle<String> tmp;
  if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
  return tmp;
}

Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
  Handle<String> tmp;
  if (source_mapping_url_.length() > 0)
    tmp = source_mapping_url_.Internalize(isolate);
  return tmp;
}
1157

1158 1159 1160 1161 1162 1163 1164 1165
bool Scanner::ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
                                              bool is_check_first_digit) {
  // we must have at least one digit after 'x'/'b'/'o'
  if (is_check_first_digit && !predicate(c0_)) return false;

  bool separator_seen = false;
  while (predicate(c0_) || c0_ == '_') {
    if (c0_ == '_') {
1166
      Advance();
1167
      if (c0_ == '_') {
1168
        ReportScannerError(Location(source_pos(), source_pos() + 1),
1169 1170 1171 1172 1173 1174 1175
                           MessageTemplate::kContinuousNumericSeparator);
        return false;
      }
      separator_seen = true;
      continue;
    }
    separator_seen = false;
1176
    AddLiteralCharAdvance();
1177 1178 1179
  }

  if (separator_seen) {
1180
    ReportScannerError(Location(source_pos(), source_pos() + 1),
1181 1182 1183 1184 1185 1186 1187
                       MessageTemplate::kTrailingNumericSeparator);
    return false;
  }

  return true;
}

1188
bool Scanner::ScanDecimalDigits() {
1189
  if (allow_harmony_numeric_separator()) {
1190
    return ScanDigitsWithNumericSeparators(&IsDecimalDigit, false);
1191 1192 1193 1194 1195 1196 1197
  }
  while (IsDecimalDigit(c0_)) {
    AddLiteralCharAdvance();
  }
  return true;
}

1198
bool Scanner::ScanDecimalAsSmiWithNumericSeparators(uint64_t* value) {
1199 1200 1201
  bool separator_seen = false;
  while (IsDecimalDigit(c0_) || c0_ == '_') {
    if (c0_ == '_') {
1202
      Advance();
1203
      if (c0_ == '_') {
1204
        ReportScannerError(Location(source_pos(), source_pos() + 1),
1205 1206 1207 1208 1209 1210 1211 1212 1213
                           MessageTemplate::kContinuousNumericSeparator);
        return false;
      }
      separator_seen = true;
      continue;
    }
    separator_seen = false;
    *value = 10 * *value + (c0_ - '0');
    uc32 first_char = c0_;
1214
    Advance();
1215 1216 1217 1218
    AddLiteralChar(first_char);
  }

  if (separator_seen) {
1219
    ReportScannerError(Location(source_pos(), source_pos() + 1),
1220 1221 1222 1223 1224
                       MessageTemplate::kTrailingNumericSeparator);
    return false;
  }

  return true;
1225 1226
}

1227
bool Scanner::ScanDecimalAsSmi(uint64_t* value) {
1228
  if (allow_harmony_numeric_separator()) {
1229
    return ScanDecimalAsSmiWithNumericSeparators(value);
1230 1231 1232 1233 1234
  }

  while (IsDecimalDigit(c0_)) {
    *value = 10 * *value + (c0_ - '0');
    uc32 first_char = c0_;
1235
    Advance();
1236 1237 1238 1239 1240
    AddLiteralChar(first_char);
  }
  return true;
}

1241
bool Scanner::ScanBinaryDigits() {
1242
  if (allow_harmony_numeric_separator()) {
1243
    return ScanDigitsWithNumericSeparators(&IsBinaryDigit, true);
1244 1245
  }

1246
  // we must have at least one binary digit after 'b'/'B'
1247 1248 1249 1250
  if (!IsBinaryDigit(c0_)) {
    return false;
  }

1251 1252 1253 1254 1255 1256
  while (IsBinaryDigit(c0_)) {
    AddLiteralCharAdvance();
  }
  return true;
}

1257
bool Scanner::ScanOctalDigits() {
1258
  if (allow_harmony_numeric_separator()) {
1259
    return ScanDigitsWithNumericSeparators(&IsOctalDigit, true);
1260 1261
  }

1262
  // we must have at least one octal digit after 'o'/'O'
1263 1264 1265 1266
  if (!IsOctalDigit(c0_)) {
    return false;
  }

1267 1268 1269
  while (IsOctalDigit(c0_)) {
    AddLiteralCharAdvance();
  }
1270
  return true;
1271 1272
}

1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292
bool Scanner::ScanImplicitOctalDigits(int start_pos,
                                      Scanner::NumberKind* kind) {
  *kind = IMPLICIT_OCTAL;

  while (true) {
    // (possible) octal number
    if (c0_ == '8' || c0_ == '9') {
      *kind = DECIMAL_WITH_LEADING_ZERO;
      return true;
    }
    if (c0_ < '0' || '7' < c0_) {
      // Octal literal finished.
      octal_pos_ = Location(start_pos, source_pos());
      octal_message_ = MessageTemplate::kStrictOctalLiteral;
      return true;
    }
    AddLiteralCharAdvance();
  }
}

1293
bool Scanner::ScanHexDigits() {
1294
  if (allow_harmony_numeric_separator()) {
1295
    return ScanDigitsWithNumericSeparators(&IsHexDigit, true);
1296 1297
  }

1298
  // we must have at least one hex digit after 'x'/'X'
1299 1300 1301 1302
  if (!IsHexDigit(c0_)) {
    return false;
  }

1303 1304 1305 1306 1307 1308
  while (IsHexDigit(c0_)) {
    AddLiteralCharAdvance();
  }
  return true;
}

1309
bool Scanner::ScanSignedInteger() {
1310 1311 1312
  if (c0_ == '+' || c0_ == '-') AddLiteralCharAdvance();
  // we must have at least one decimal digit after 'e'/'E'
  if (!IsDecimalDigit(c0_)) return false;
1313
  return ScanDecimalDigits();
1314
}
1315

1316
Token::Value Scanner::ScanNumber(bool seen_period) {
1317
  DCHECK(IsDecimalDigit(c0_));  // the first digit of the number or the fraction
1318

1319
  NumberKind kind = DECIMAL;
1320 1321

  LiteralScope literal(this);
verwaest's avatar
verwaest committed
1322
  bool at_start = !seen_period;
1323
  int start_pos = source_pos();  // For reporting octal positions.
1324 1325
  if (seen_period) {
    // we have already seen a decimal point of the float
1326
    AddLiteralChar('.');
1327 1328 1329 1330
    if (allow_harmony_numeric_separator() && c0_ == '_') {
      return Token::ILLEGAL;
    }
    // we know we have at least one digit
1331
    if (!ScanDecimalDigits()) return Token::ILLEGAL;
1332 1333 1334 1335 1336
  } else {
    // if the first character is '0' we must check for octals and hex
    if (c0_ == '0') {
      AddLiteralCharAdvance();

1337 1338
      // either 0, 0exxx, 0Exxx, 0.xxx, a hex number, a binary number or
      // an octal number.
1339 1340
      if (c0_ == 'x' || c0_ == 'X') {
        AddLiteralCharAdvance();
1341
        kind = HEX;
1342
        if (!ScanHexDigits()) return Token::ILLEGAL;
1343
      } else if (c0_ == 'o' || c0_ == 'O') {
1344
        AddLiteralCharAdvance();
1345
        kind = OCTAL;
1346
        if (!ScanOctalDigits()) return Token::ILLEGAL;
1347
      } else if (c0_ == 'b' || c0_ == 'B') {
1348
        AddLiteralCharAdvance();
1349
        kind = BINARY;
1350
        if (!ScanBinaryDigits()) return Token::ILLEGAL;
1351
      } else if ('0' <= c0_ && c0_ <= '7') {
1352
        kind = IMPLICIT_OCTAL;
1353 1354 1355 1356
        if (!ScanImplicitOctalDigits(start_pos, &kind)) {
          return Token::ILLEGAL;
        }
        if (kind == DECIMAL_WITH_LEADING_ZERO) {
1357
          at_start = false;
1358
        }
1359 1360
      } else if (c0_ == '8' || c0_ == '9') {
        kind = DECIMAL_WITH_LEADING_ZERO;
1361 1362 1363 1364
      } else if (allow_harmony_numeric_separator() && c0_ == '_') {
        ReportScannerError(Location(source_pos(), source_pos() + 1),
                           MessageTemplate::kZeroDigitNumericSeparator);
        return Token::ILLEGAL;
1365 1366 1367 1368
      }
    }

    // Parse decimal digits and allow trailing fractional part.
1369
    if (kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO) {
1370
      // This is an optimization for parsing Decimal numbers as Smi's.
verwaest's avatar
verwaest committed
1371
      if (at_start) {
1372
        uint64_t value = 0;
1373
        // scan subsequent decimal digits
1374
        if (!ScanDecimalAsSmi(&value)) {
1375
          return Token::ILLEGAL;
verwaest's avatar
verwaest committed
1376 1377
        }

1378
        if (next_.literal_chars->one_byte_literal().length() <= 10 &&
1379
            value <= Smi::kMaxValue && c0_ != '.' &&
1380
            !unicode_cache_->IsIdentifierStart(c0_)) {
heimbuef's avatar
heimbuef committed
1381
          next_.smi_value_ = static_cast<uint32_t>(value);
verwaest's avatar
verwaest committed
1382
          literal.Complete();
1383

1384 1385 1386 1387
          if (kind == DECIMAL_WITH_LEADING_ZERO) {
            octal_pos_ = Location(start_pos, source_pos());
            octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
          }
verwaest's avatar
verwaest committed
1388 1389 1390 1391
          return Token::SMI;
        }
      }

1392
      if (!ScanDecimalDigits()) return Token::ILLEGAL;
1393
      if (c0_ == '.') {
1394
        seen_period = true;
1395
        AddLiteralCharAdvance();
1396 1397 1398
        if (allow_harmony_numeric_separator() && c0_ == '_') {
          return Token::ILLEGAL;
        }
1399
        if (!ScanDecimalDigits()) return Token::ILLEGAL;
1400 1401 1402 1403
      }
    }
  }

1404 1405 1406
  bool is_bigint = false;
  if (allow_harmony_bigint() && c0_ == 'n' && !seen_period &&
      (kind == DECIMAL || kind == HEX || kind == OCTAL || kind == BINARY)) {
1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417
    // Check that the literal is within our limits for BigInt length.
    // For simplicity, use 4 bits per character to calculate the maximum
    // allowed literal length.
    static const int kMaxBigIntCharacters = BigInt::kMaxLengthBits / 4;
    int length = source_pos() - start_pos - (kind != DECIMAL ? 2 : 0);
    if (length > kMaxBigIntCharacters) {
      ReportScannerError(Location(start_pos, source_pos()),
                         MessageTemplate::kBigIntTooBig);
      return Token::ILLEGAL;
    }

1418 1419 1420 1421
    is_bigint = true;
    Advance();
  } else if (c0_ == 'e' || c0_ == 'E') {
    // scan exponent, if any
1422
    DCHECK(kind != HEX);  // 'e'/'E' must be scanned as part of the hex number
1423

1424 1425
    if (!(kind == DECIMAL || kind == DECIMAL_WITH_LEADING_ZERO))
      return Token::ILLEGAL;
1426

1427 1428
    // scan exponent
    AddLiteralCharAdvance();
1429

1430
    if (!ScanSignedInteger()) return Token::ILLEGAL;
1431
  }
1432

1433 1434 1435 1436
  // The source character immediately following a numeric literal must
  // not be an identifier start or a decimal digit; see ECMA-262
  // section 7.8.3, page 17 (note that we read only one decimal digit
  // if the value is 0).
1437
  if (IsDecimalDigit(c0_) || unicode_cache_->IsIdentifierStart(c0_)) {
1438
    return Token::ILLEGAL;
1439
  }
1440

1441
  literal.Complete();
1442

1443 1444 1445 1446
  if (kind == DECIMAL_WITH_LEADING_ZERO) {
    octal_pos_ = Location(start_pos, source_pos());
    octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
  }
1447

1448
  return is_bigint ? Token::BIGINT : Token::NUMBER;
1449 1450 1451
}


1452
uc32 Scanner::ScanIdentifierUnicodeEscape() {
1453 1454 1455
  Advance();
  if (c0_ != 'u') return -1;
  Advance();
1456
  return ScanUnicodeEscape<false>();
marja's avatar
marja committed
1457 1458 1459
}


1460
template <bool capture_raw>
marja's avatar
marja committed
1461
uc32 Scanner::ScanUnicodeEscape() {
adamk's avatar
adamk committed
1462 1463 1464
  // Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
  // hex digits between { } is arbitrary. \ and u have already been read.
  if (c0_ == '{') {
1465
    int begin = source_pos() - 2;
1466
    Advance<capture_raw>();
1467
    uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10FFFF, begin);
1468 1469 1470
    if (cp < 0 || c0_ != '}') {
      ReportScannerError(source_pos(),
                         MessageTemplate::kInvalidUnicodeEscapeSequence);
marja's avatar
marja committed
1471 1472
      return -1;
    }
1473
    Advance<capture_raw>();
marja's avatar
marja committed
1474 1475
    return cp;
  }
1476 1477
  const bool unicode = true;
  return ScanHexNumber<capture_raw, unicode>(4);
1478 1479 1480
}


1481 1482 1483
// ----------------------------------------------------------------------------
// Keyword Matcher

1484
#define KEYWORDS(KEYWORD_GROUP, KEYWORD)                    \
1485
  KEYWORD_GROUP('a')                                        \
1486 1487
  KEYWORD("arguments", Token::ARGUMENTS)                    \
  KEYWORD("as", Token::AS)                                  \
1488
  KEYWORD("async", Token::ASYNC)                            \
1489
  KEYWORD("await", Token::AWAIT)                            \
1490
  KEYWORD("anonymous", Token::ANONYMOUS)                    \
1491 1492 1493 1494 1495 1496 1497
  KEYWORD_GROUP('b')                                        \
  KEYWORD("break", Token::BREAK)                            \
  KEYWORD_GROUP('c')                                        \
  KEYWORD("case", Token::CASE)                              \
  KEYWORD("catch", Token::CATCH)                            \
  KEYWORD("class", Token::CLASS)                            \
  KEYWORD("const", Token::CONST)                            \
1498
  KEYWORD("constructor", Token::CONSTRUCTOR)                \
1499 1500 1501 1502 1503 1504 1505 1506
  KEYWORD("continue", Token::CONTINUE)                      \
  KEYWORD_GROUP('d')                                        \
  KEYWORD("debugger", Token::DEBUGGER)                      \
  KEYWORD("default", Token::DEFAULT)                        \
  KEYWORD("delete", Token::DELETE)                          \
  KEYWORD("do", Token::DO)                                  \
  KEYWORD_GROUP('e')                                        \
  KEYWORD("else", Token::ELSE)                              \
1507
  KEYWORD("enum", Token::ENUM)                              \
1508
  KEYWORD("eval", Token::EVAL)                              \
1509 1510 1511 1512 1513 1514
  KEYWORD("export", Token::EXPORT)                          \
  KEYWORD("extends", Token::EXTENDS)                        \
  KEYWORD_GROUP('f')                                        \
  KEYWORD("false", Token::FALSE_LITERAL)                    \
  KEYWORD("finally", Token::FINALLY)                        \
  KEYWORD("for", Token::FOR)                                \
1515
  KEYWORD("from", Token::FROM)                              \
1516
  KEYWORD("function", Token::FUNCTION)                      \
1517 1518
  KEYWORD_GROUP('g')                                        \
  KEYWORD("get", Token::GET)                                \
1519 1520 1521 1522 1523 1524 1525 1526 1527
  KEYWORD_GROUP('i')                                        \
  KEYWORD("if", Token::IF)                                  \
  KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
  KEYWORD("import", Token::IMPORT)                          \
  KEYWORD("in", Token::IN)                                  \
  KEYWORD("instanceof", Token::INSTANCEOF)                  \
  KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD)  \
  KEYWORD_GROUP('l')                                        \
  KEYWORD("let", Token::LET)                                \
1528 1529
  KEYWORD_GROUP('m')                                        \
  KEYWORD("meta", Token::META)                              \
1530
  KEYWORD_GROUP('n')                                        \
1531
  KEYWORD("name", Token::NAME)                              \
1532 1533
  KEYWORD("new", Token::NEW)                                \
  KEYWORD("null", Token::NULL_LITERAL)                      \
1534 1535
  KEYWORD_GROUP('o')                                        \
  KEYWORD("of", Token::OF)                                  \
1536 1537 1538 1539
  KEYWORD_GROUP('p')                                        \
  KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD)    \
  KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD)    \
  KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD)  \
1540
  KEYWORD("prototype", Token::PROTOTYPE)                    \
1541 1542 1543 1544
  KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD)     \
  KEYWORD_GROUP('r')                                        \
  KEYWORD("return", Token::RETURN)                          \
  KEYWORD_GROUP('s')                                        \
1545
  KEYWORD("set", Token::SET)                                \
1546 1547 1548 1549
  KEYWORD("static", Token::STATIC)                          \
  KEYWORD("super", Token::SUPER)                            \
  KEYWORD("switch", Token::SWITCH)                          \
  KEYWORD_GROUP('t')                                        \
1550
  KEYWORD("target", Token::TARGET)                          \
1551 1552 1553 1554 1555
  KEYWORD("this", Token::THIS)                              \
  KEYWORD("throw", Token::THROW)                            \
  KEYWORD("true", Token::TRUE_LITERAL)                      \
  KEYWORD("try", Token::TRY)                                \
  KEYWORD("typeof", Token::TYPEOF)                          \
1556 1557
  KEYWORD_GROUP('u')                                        \
  KEYWORD("undefined", Token::UNDEFINED)                    \
1558 1559 1560 1561 1562 1563 1564
  KEYWORD_GROUP('v')                                        \
  KEYWORD("var", Token::VAR)                                \
  KEYWORD("void", Token::VOID)                              \
  KEYWORD_GROUP('w')                                        \
  KEYWORD("while", Token::WHILE)                            \
  KEYWORD("with", Token::WITH)                              \
  KEYWORD_GROUP('y')                                        \
1565 1566
  KEYWORD("yield", Token::YIELD)                            \
  KEYWORD_GROUP('_')                                        \
1567 1568 1569
  KEYWORD("__proto__", Token::PROTO_UNDERSCORED)            \
  KEYWORD_GROUP('#')                                        \
  KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR)
1570

1571
static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
1572
                                             int input_length) {
1573
  DCHECK_GE(input_length, 1);
1574
  const int kMinLength = 2;
1575
  const int kMaxLength = 12;
1576 1577 1578 1579 1580 1581 1582 1583
  if (input_length < kMinLength || input_length > kMaxLength) {
    return Token::IDENTIFIER;
  }
  switch (input[0]) {
    default:
#define KEYWORD_GROUP_CASE(ch)                                \
      break;                                                  \
    case ch:
1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605
#define KEYWORD(keyword, token)                                           \
  {                                                                       \
    /* 'keyword' is a char array, so sizeof(keyword) is */                \
    /* strlen(keyword) plus 1 for the NUL char. */                        \
    const int keyword_length = sizeof(keyword) - 1;                       \
    STATIC_ASSERT(keyword_length >= kMinLength);                          \
    STATIC_ASSERT(keyword_length <= kMaxLength);                          \
    DCHECK_EQ(input[0], keyword[0]);                                      \
    DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD ||                 \
           0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
    if (input_length == keyword_length && input[1] == keyword[1] &&       \
        (keyword_length <= 2 || input[2] == keyword[2]) &&                \
        (keyword_length <= 3 || input[3] == keyword[3]) &&                \
        (keyword_length <= 4 || input[4] == keyword[4]) &&                \
        (keyword_length <= 5 || input[5] == keyword[5]) &&                \
        (keyword_length <= 6 || input[6] == keyword[6]) &&                \
        (keyword_length <= 7 || input[7] == keyword[7]) &&                \
        (keyword_length <= 8 || input[8] == keyword[8]) &&                \
        (keyword_length <= 9 || input[9] == keyword[9]) &&                \
        (keyword_length <= 10 || input[10] == keyword[10])) {             \
      return token;                                                       \
    }                                                                     \
1606
  }
1607
      KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
1608
  }
1609
  return Token::IDENTIFIER;
1610 1611 1612
#undef KEYWORDS
#undef KEYWORD
#undef KEYWORD_GROUP_CASE
1613 1614
}

1615
Token::Value Scanner::ScanIdentifierOrKeyword() {
1616
  LiteralScope literal(this);
1617 1618 1619 1620 1621
  return ScanIdentifierOrKeywordInner(&literal);
}

Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
  DCHECK(unicode_cache_->IsIdentifierStart(c0_));
1622
  bool escaped = false;
1623
  if (IsInRange(c0_, 'a', 'z') || c0_ == '_') {
1624
    do {
1625
      AddLiteralCharAdvance();
1626
    } while (IsInRange(c0_, 'a', 'z') || c0_ == '_');
1627

1628 1629 1630 1631 1632 1633
    if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
      // Identifier starting with lowercase or _.
      do {
        AddLiteralCharAdvance();
      } while (IsAsciiIdentifier(c0_));

verwaest's avatar
verwaest committed
1634
      if (c0_ <= kMaxAscii && c0_ != '\\') {
1635
        literal->Complete();
1636 1637
        return Token::IDENTIFIER;
      }
verwaest's avatar
verwaest committed
1638
    } else if (c0_ <= kMaxAscii && c0_ != '\\') {
1639
      // Only a-z+ or _: could be a keyword or identifier.
1640
      Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1641 1642 1643
      Token::Value token =
          KeywordOrIdentifierToken(chars.start(), chars.length());
      if (token == Token::IDENTIFIER ||
1644 1645
          token == Token::FUTURE_STRICT_RESERVED_WORD ||
          Token::IsContextualKeyword(token))
1646
        literal->Complete();
1647
      return token;
1648
    }
1649
  } else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
1650
    do {
1651
      AddLiteralCharAdvance();
1652 1653
    } while (IsAsciiIdentifier(c0_));

verwaest's avatar
verwaest committed
1654
    if (c0_ <= kMaxAscii && c0_ != '\\') {
1655
      literal->Complete();
1656 1657 1658
      return Token::IDENTIFIER;
    }
  } else if (c0_ == '\\') {
1659
    escaped = true;
1660
    uc32 c = ScanIdentifierUnicodeEscape();
1661 1662
    DCHECK(!unicode_cache_->IsIdentifierStart(-1));
    if (c == '\\' || !unicode_cache_->IsIdentifierStart(c)) {
1663 1664 1665
      return Token::ILLEGAL;
    }
    AddLiteralChar(c);
1666
  }
1667

1668
  while (true) {
1669
    if (c0_ == '\\') {
1670
      escaped = true;
1671
      uc32 c = ScanIdentifierUnicodeEscape();
1672
      // Only allow legal identifier part characters.
1673 1674 1675 1676
      // TODO(verwaest): Make this true.
      // DCHECK(!unicode_cache_->IsIdentifierPart('\\'));
      DCHECK(!unicode_cache_->IsIdentifierPart(-1));
      if (c == '\\' || !unicode_cache_->IsIdentifierPart(c)) {
1677 1678 1679
        return Token::ILLEGAL;
      }
      AddLiteralChar(c);
1680 1681 1682 1683
    } else if (unicode_cache_->IsIdentifierPart(c0_) ||
               (CombineSurrogatePair() &&
                unicode_cache_->IsIdentifierPart(c0_))) {
      AddLiteralCharAdvance();
1684
    } else {
1685
      break;
1686 1687 1688
    }
  }

1689
  if (next_.literal_chars->is_one_byte()) {
1690
    Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
1691 1692 1693
    Token::Value token =
        KeywordOrIdentifierToken(chars.start(), chars.length());
    /* TODO(adamk): YIELD should be handled specially. */
1694 1695 1696 1697 1698
    if (token == Token::FUTURE_STRICT_RESERVED_WORD) {
      literal->Complete();
      if (escaped) return Token::ESCAPED_STRICT_RESERVED_WORD;
      return token;
    }
1699
    if (token == Token::IDENTIFIER || Token::IsContextualKeyword(token)) {
1700
      literal->Complete();
1701
      return token;
1702 1703 1704 1705 1706
    }

    if (!escaped) return token;

    literal->Complete();
1707
    if (token == Token::LET || token == Token::STATIC) {
1708 1709
      return Token::ESCAPED_STRICT_RESERVED_WORD;
    }
1710
    return Token::ESCAPED_KEYWORD;
1711
  }
1712 1713

  literal->Complete();
1714
  return Token::IDENTIFIER;
1715 1716
}

1717 1718 1719
bool Scanner::ScanRegExpPattern() {
  DCHECK(next_next_.token == Token::UNINITIALIZED);
  DCHECK(next_.token == Token::DIV || next_.token == Token::ASSIGN_DIV);
lrn@chromium.org's avatar
lrn@chromium.org committed
1720

1721 1722
  // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
  bool in_character_class = false;
1723
  bool seen_equal = (next_.token == Token::ASSIGN_DIV);
1724 1725 1726 1727 1728

  // Previous token is either '/' or '/=', in the second case, the
  // pattern starts at =.
  next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
  next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
1729

1730 1731 1732 1733 1734 1735 1736 1737 1738
  // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
  // the scanner should pass uninterpreted bodies to the RegExp
  // constructor.
  LiteralScope literal(this);
  if (seen_equal) {
    AddLiteralChar('=');
  }

  while (c0_ != '/' || in_character_class) {
1739
    if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
1740
      return false;
1741
    }
1742 1743
    if (c0_ == '\\') {  // Escape sequence.
      AddLiteralCharAdvance();
1744
      if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
1745
        return false;
1746
      }
1747 1748 1749 1750 1751 1752 1753 1754 1755
      AddLiteralCharAdvance();
      // If the escape allows more characters, i.e., \x??, \u????, or \c?,
      // only "safe" characters are allowed (letters, digits, underscore),
      // otherwise the escape isn't valid and the invalid character has
      // its normal meaning. I.e., we can just continue scanning without
      // worrying whether the following characters are part of the escape
      // or not, since any '/', '\\' or '[' is guaranteed to not be part
      // of the escape sequence.

1756
      // TODO(896): At some point, parse RegExps more thoroughly to capture
1757 1758 1759 1760 1761 1762 1763 1764 1765 1766
      // octal esacpes in strict mode.
    } else {  // Unescaped character.
      if (c0_ == '[') in_character_class = true;
      if (c0_ == ']') in_character_class = false;
      AddLiteralCharAdvance();
    }
  }
  Advance();  // consume '/'

  literal.Complete();
1767
  next_.token = Token::REGEXP_LITERAL;
1768
  next_.contextual_token = Token::UNINITIALIZED;
1769
  return true;
1770 1771 1772
}


1773
Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
1774 1775
  DCHECK(next_.token == Token::REGEXP_LITERAL);

1776
  // Scan regular expression flags.
1777
  int flags = 0;
1778
  while (unicode_cache_->IsIdentifierPart(c0_)) {
1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789
    RegExp::Flags flag = RegExp::kNone;
    switch (c0_) {
      case 'g':
        flag = RegExp::kGlobal;
        break;
      case 'i':
        flag = RegExp::kIgnoreCase;
        break;
      case 'm':
        flag = RegExp::kMultiline;
        break;
1790
      case 's':
1791
        flag = RegExp::kDotAll;
1792
        break;
1793 1794 1795 1796 1797 1798 1799 1800
      case 'u':
        flag = RegExp::kUnicode;
        break;
      case 'y':
        flag = RegExp::kSticky;
        break;
      default:
        return Nothing<RegExp::Flags>();
1801
    }
1802 1803 1804 1805
    if (flags & flag) {
      return Nothing<RegExp::Flags>();
    }
    Advance();
1806
    flags |= flag;
1807 1808
  }

1809
  next_.location.end_pos = source_pos();
1810
  return Just(RegExp::Flags(flags));
1811 1812
}

1813 1814
const AstRawString* Scanner::CurrentSymbol(
    AstValueFactory* ast_value_factory) const {
1815
  if (is_literal_one_byte()) {
1816
    return ast_value_factory->GetOneByteString(literal_one_byte_string());
1817
  }
1818
  return ast_value_factory->GetTwoByteString(literal_two_byte_string());
1819 1820
}

1821 1822
const AstRawString* Scanner::NextSymbol(
    AstValueFactory* ast_value_factory) const {
1823
  if (is_next_literal_one_byte()) {
1824
    return ast_value_factory->GetOneByteString(next_literal_one_byte_string());
1825
  }
1826
  return ast_value_factory->GetTwoByteString(next_literal_two_byte_string());
1827 1828
}

1829
const AstRawString* Scanner::CurrentRawSymbol(
1830
    AstValueFactory* ast_value_factory) const {
1831
  if (is_raw_literal_one_byte()) {
1832
    return ast_value_factory->GetOneByteString(raw_literal_one_byte_string());
1833
  }
1834
  return ast_value_factory->GetTwoByteString(raw_literal_two_byte_string());
1835 1836 1837
}


1838
double Scanner::DoubleValue() {
1839
  DCHECK(is_literal_one_byte());
1840
  return StringToDouble(
1841 1842
      unicode_cache_,
      literal_one_byte_string(),
1843 1844 1845
      ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY);
}

1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
const char* Scanner::CurrentLiteralAsCString(Zone* zone) const {
  DCHECK(is_literal_one_byte());
  Vector<const uint8_t> vector = literal_one_byte_string();
  int length = vector.length();
  char* buffer = zone->NewArray<char>(length + 1);
  memcpy(buffer, vector.start(), length);
  buffer[length] = '\0';
  return buffer;
}

1856 1857 1858 1859 1860 1861
bool Scanner::IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
                                AstValueFactory* ast_value_factory) const {
  DCHECK_NOT_NULL(duplicate_finder);
  DCHECK_NOT_NULL(ast_value_factory);
  const AstRawString* string = CurrentSymbol(ast_value_factory);
  return !duplicate_finder->known_symbols_.insert(string).second;
1862 1863
}

1864 1865 1866 1867 1868 1869 1870 1871
void Scanner::SeekNext(size_t position) {
  // Use with care: This cleanly resets most, but not all scanner state.
  // TODO(vogelheim): Fix this, or at least DCHECK the relevant conditions.

  // To re-scan from a given character position, we need to:
  // 1, Reset the current_, next_ and next_next_ tokens
  //    (next_ + next_next_ will be overwrittem by Next(),
  //     current_ will remain unchanged, so overwrite it fully.)
1872 1873 1874 1875 1876 1877 1878
  current_ = {{0, 0},
              nullptr,
              nullptr,
              0,
              Token::UNINITIALIZED,
              MessageTemplate::kNone,
              {0, 0},
1879
              Token::UNINITIALIZED};
1880
  next_.token = Token::UNINITIALIZED;
1881
  next_.contextual_token = Token::UNINITIALIZED;
1882
  next_next_.token = Token::UNINITIALIZED;
1883
  next_next_.contextual_token = Token::UNINITIALIZED;
1884 1885 1886 1887 1888
  // 2, reset the source to the desired position,
  source_->Seek(position);
  // 3, re-scan, by scanning the look-ahead char + 1 token (next_).
  c0_ = source_->Advance();
  Next();
1889
  DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));
1890 1891
}

1892 1893
}  // namespace internal
}  // namespace v8