Commit 5e5d5df4 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Use TokenDesc* rather TokenDesc to keep track of the stream

This embeds LiteralBuffers in the TokenDesc directly so that we do not need to
figure out which one is free; as well as newline tracking. Instead of copying
around TokenDesc we now just update the pointer to keep track of the state.
Based on this architecture we'll be able to precompute more tokens at once.


Change-Id: Ie2e1a95f91713f7ab619fc8632f1eb644884a51f
Reviewed-on: https://chromium-review.googlesource.com/1184911
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55385}
parent 34700869
......@@ -21,7 +21,7 @@ V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
// Advance as long as character is a WhiteSpace or LineTerminator.
// Remember if the latter is the case.
if (unibrow::IsLineTerminator(c0_)) {
has_line_terminator_before_next_ = true;
next().after_line_terminator = true;
} else if (!unicode_cache_->IsWhiteSpace(c0_)) {
break;
}
......
......@@ -60,6 +60,7 @@ class Scanner::ErrorState {
// Scanner::LiteralBuffer
Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
DCHECK(is_used_);
if (is_one_byte()) {
return isolate->factory()->InternalizeOneByteString(one_byte_literal());
}
......@@ -133,16 +134,16 @@ const size_t Scanner::BookmarkScope::kBookmarkWasApplied =
void Scanner::BookmarkScope::Set() {
DCHECK_EQ(bookmark_, kNoBookmark);
DCHECK_EQ(scanner_->next_next_.token, Token::UNINITIALIZED);
DCHECK_EQ(scanner_->next_next().token, Token::UNINITIALIZED);
// The first token is a bit special, since current_ will still be
// uninitialized. In this case, store kBookmarkAtFirstPos and special-case it
// when
// applying the bookmark.
DCHECK_IMPLIES(
scanner_->current_.token == Token::UNINITIALIZED,
scanner_->current_.location.beg_pos == scanner_->next_.location.beg_pos);
bookmark_ = (scanner_->current_.token == Token::UNINITIALIZED)
DCHECK_IMPLIES(scanner_->current().token == Token::UNINITIALIZED,
scanner_->current().location.beg_pos ==
scanner_->next().location.beg_pos);
bookmark_ = (scanner_->current().token == Token::UNINITIALIZED)
? kBookmarkAtFirstPos
: scanner_->location().beg_pos;
}
......@@ -176,9 +177,6 @@ Scanner::Scanner(UnicodeCache* unicode_cache, Utf16CharacterStream* source,
source_(source),
octal_pos_(Location::invalid()),
octal_message_(MessageTemplate::kNone),
has_line_terminator_before_next_(false),
has_multiline_comment_before_next_(false),
has_line_terminator_after_next_(false),
found_html_comment_(false),
allow_harmony_bigint_(false),
allow_harmony_numeric_separator_(false),
......@@ -190,7 +188,7 @@ void Scanner::Initialize() {
// Need to capture identifiers in order to recognize "get" and "set"
// in object literals.
Init();
has_line_terminator_before_next_ = true;
next().after_line_terminator = true;
Scan();
}
......@@ -375,44 +373,45 @@ static const byte one_char_tokens[] = {
// clang-format on
Token::Value Scanner::Next() {
if (next_.token == Token::EOS) {
next_.location.beg_pos = current_.location.beg_pos;
next_.location.end_pos = current_.location.end_pos;
}
if (next().token == Token::EOS) next().location = current().location;
// Rotate through tokens.
TokenDesc* previous = current_;
current_ = next_;
if (V8_UNLIKELY(next_next_.token != Token::UNINITIALIZED)) {
// Either we already have the next token lined up, in which case next_next_
// simply becomes next_. In that case we use current_ as new next_next_ and
// clear its token to indicate that it wasn't scanned yet. Otherwise we use
// current_ as next_ and scan into it, leaving next_next_ uninitialized.
if (V8_LIKELY(next_next().token == Token::UNINITIALIZED)) {
next_ = previous;
next().after_line_terminator = false;
next().after_multiline_comment = false;
Scan();
} else {
next_ = next_next_;
next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
has_line_terminator_before_next_ = has_line_terminator_after_next_;
return current_.token;
next_next_ = previous;
previous->token = Token::UNINITIALIZED;
previous->contextual_token = Token::UNINITIALIZED;
DCHECK_NE(Token::UNINITIALIZED, current().token);
}
has_line_terminator_before_next_ = false;
has_multiline_comment_before_next_ = false;
Scan();
return current_.token;
return current().token;
}
Token::Value Scanner::PeekAhead() {
DCHECK(next_.token != Token::DIV);
DCHECK(next_.token != Token::ASSIGN_DIV);
if (next_next_.token != Token::UNINITIALIZED) {
return next_next_.token;
}
TokenDesc prev = current_;
bool has_line_terminator_before_next =
has_line_terminator_before_next_ || has_multiline_comment_before_next_;
Next();
has_line_terminator_after_next_ =
has_line_terminator_before_next_ || has_multiline_comment_before_next_;
has_line_terminator_before_next_ = has_line_terminator_before_next;
Token::Value ret = next_.token;
DCHECK(next().token != Token::DIV);
DCHECK(next().token != Token::ASSIGN_DIV);
if (next_next().token != Token::UNINITIALIZED) {
return next_next().token;
}
TokenDesc* temp = next_;
next_ = next_next_;
next().after_line_terminator = false;
next().after_multiline_comment = false;
Scan();
next_next_ = next_;
next_ = current_;
current_ = prev;
return ret;
next_ = temp;
return next_next().token;
}
Token::Value Scanner::SkipSingleHTMLComment() {
......@@ -450,6 +449,7 @@ void Scanner::TryToParseSourceURLComment() {
if (!unicode_cache_->IsWhiteSpace(c0_)) return;
Advance();
LiteralBuffer name;
name.Start();
while (c0_ != kEndOfInput &&
!unicode_cache_->IsWhiteSpaceOrLineTerminator(c0_) && c0_ != '=') {
......@@ -468,15 +468,16 @@ void Scanner::TryToParseSourceURLComment() {
}
if (c0_ != '=')
return;
value->Drop();
value->Start();
Advance();
value->Reset();
while (unicode_cache_->IsWhiteSpace(c0_)) {
Advance();
}
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
// Disallowed characters.
if (c0_ == '"' || c0_ == '\'') {
value->Reset();
value->Drop();
return;
}
if (unicode_cache_->IsWhiteSpace(c0_)) {
......@@ -488,7 +489,7 @@ void Scanner::TryToParseSourceURLComment() {
// Allow whitespace at the end.
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
if (!unicode_cache_->IsWhiteSpace(c0_)) {
value->Reset();
value->Drop();
break;
}
Advance();
......@@ -501,10 +502,10 @@ Token::Value Scanner::SkipMultiLineComment() {
while (c0_ != kEndOfInput) {
DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
if (!has_multiline_comment_before_next_ && unibrow::IsLineTerminator(c0_)) {
if (!next().after_multiline_comment && unibrow::IsLineTerminator(c0_)) {
// Following ECMA-262, section 7.4, a comment containing
// a newline will make the comment count as a line-terminator.
has_multiline_comment_before_next_ = true;
next().after_multiline_comment = true;
}
while (V8_UNLIKELY(c0_ == '*')) {
......@@ -536,9 +537,9 @@ Token::Value Scanner::ScanHtmlComment() {
}
void Scanner::Scan() {
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
next_.invalid_template_escape_message = MessageTemplate::kNone;
next().literal_chars.Drop();
next().raw_literal_chars.Drop();
next().invalid_template_escape_message = MessageTemplate::kNone;
Token::Value token;
do {
......@@ -546,17 +547,17 @@ void Scanner::Scan() {
Token::Value token = static_cast<Token::Value>(one_char_tokens[c0_]);
if (token != Token::ILLEGAL) {
int pos = source_pos();
next_.token = token;
next_.contextual_token = Token::UNINITIALIZED;
next_.location.beg_pos = pos;
next_.location.end_pos = pos + 1;
next().token = token;
next().contextual_token = Token::UNINITIALIZED;
next().location.beg_pos = pos;
next().location.end_pos = pos + 1;
Advance();
return;
}
}
// Remember the position of the next token
next_.location.beg_pos = source_pos();
next().location.beg_pos = source_pos();
switch (c0_) {
case '"':
......@@ -763,19 +764,19 @@ void Scanner::Scan() {
// whitespace.
} while (token == Token::WHITESPACE);
next_.location.end_pos = source_pos();
next().location.end_pos = source_pos();
if (Token::IsContextualKeyword(token)) {
next_.token = Token::IDENTIFIER;
next_.contextual_token = token;
next().token = Token::IDENTIFIER;
next().contextual_token = token;
} else {
next_.token = token;
next_.contextual_token = Token::UNINITIALIZED;
next().token = token;
next().contextual_token = Token::UNINITIALIZED;
}
#ifdef DEBUG
SanityCheckTokenDesc(current_);
SanityCheckTokenDesc(next_);
SanityCheckTokenDesc(next_next_);
SanityCheckTokenDesc(current());
SanityCheckTokenDesc(next());
SanityCheckTokenDesc(next_next());
#endif
}
......@@ -796,8 +797,8 @@ void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
break;
case Token::TEMPLATE_SPAN:
case Token::TEMPLATE_TAIL:
DCHECK_NOT_NULL(token.raw_literal_chars);
DCHECK_NOT_NULL(token.literal_chars);
DCHECK(token.raw_literal_chars.is_used());
DCHECK(token.literal_chars.is_used());
break;
case Token::ESCAPED_KEYWORD:
case Token::ESCAPED_STRICT_RESERVED_WORD:
......@@ -809,13 +810,13 @@ void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
case Token::SMI:
case Token::STRING:
case Token::PRIVATE_NAME:
DCHECK_NOT_NULL(token.literal_chars);
DCHECK_NULL(token.raw_literal_chars);
DCHECK(token.literal_chars.is_used());
DCHECK(!token.raw_literal_chars.is_used());
DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
break;
default:
DCHECK_NULL(token.literal_chars);
DCHECK_NULL(token.raw_literal_chars);
DCHECK(!token.literal_chars.is_used());
DCHECK(!token.raw_literal_chars.is_used());
DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
break;
}
......@@ -832,9 +833,9 @@ void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
void Scanner::SeekForward(int pos) {
// After this call, we will have the token at the given position as
// the "next" token. The "current" token will be invalid.
if (pos == next_.location.beg_pos) return;
if (pos == next().location.beg_pos) return;
int current_pos = source_pos();
DCHECK_EQ(next_.location.end_pos, current_pos);
DCHECK_EQ(next().location.end_pos, current_pos);
// Positions inside the lookahead token aren't supported.
DCHECK(pos >= current_pos);
if (pos != current_pos) {
......@@ -843,8 +844,8 @@ void Scanner::SeekForward(int pos) {
// This function is only called to seek to the location
// of the end of a function (at the "}" token). It doesn't matter
// whether there was a line terminator in the part we skip.
has_line_terminator_before_next_ = false;
has_multiline_comment_before_next_ = false;
next().after_line_terminator = false;
next().after_multiline_comment = false;
}
Scan();
}
......@@ -1028,8 +1029,8 @@ Token::Value Scanner::ScanTemplateSpan() {
DCHECK_EQ(!success, has_error());
// For templates, invalid escape sequence checking is handled in the
// parser.
scanner_error_state.MoveErrorTo(&next_);
octal_error_state.MoveErrorTo(&next_);
scanner_error_state.MoveErrorTo(next_);
octal_error_state.MoveErrorTo(next_);
}
} else if (c < 0) {
// Unterminated template literal
......@@ -1048,31 +1049,36 @@ Token::Value Scanner::ScanTemplateSpan() {
}
}
literal.Complete();
next_.location.end_pos = source_pos();
next_.token = result;
next_.contextual_token = Token::UNINITIALIZED;
next().location.end_pos = source_pos();
next().token = result;
next().contextual_token = Token::UNINITIALIZED;
return result;
}
Token::Value Scanner::ScanTemplateStart() {
DCHECK_EQ(next_next_.token, Token::UNINITIALIZED);
DCHECK_EQ(next_next().token, Token::UNINITIALIZED);
DCHECK_EQ(c0_, '`');
next_.location.beg_pos = source_pos();
next().location.beg_pos = source_pos();
Advance(); // Consume `
return ScanTemplateSpan();
}
Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
if (source_url_.length() > 0) {
DCHECK(source_url_.is_used());
tmp = source_url_.Internalize(isolate);
}
return tmp;
}
Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_mapping_url_.length() > 0)
if (source_mapping_url_.length() > 0) {
DCHECK(source_mapping_url_.is_used());
tmp = source_mapping_url_.Internalize(isolate);
}
return tmp;
}
......@@ -1296,10 +1302,10 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
return Token::ILLEGAL;
}
if (next_.literal_chars->one_byte_literal().length() <= 10 &&
if (next().literal_chars.one_byte_literal().length() <= 10 &&
value <= Smi::kMaxValue && c0_ != '.' &&
!unicode_cache_->IsIdentifierStart(c0_)) {
next_.smi_value_ = static_cast<uint32_t>(value);
next().smi_value_ = static_cast<uint32_t>(value);
literal.Complete();
if (kind == DECIMAL_WITH_LEADING_ZERO) {
......@@ -1558,7 +1564,7 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
}
} else if (c0_ <= kMaxAscii && c0_ != '\\') {
// Only a-z+ or _: could be a keyword or identifier.
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER ||
......@@ -1608,8 +1614,8 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
}
}
if (next_.literal_chars->is_one_byte()) {
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
if (next().literal_chars.is_one_byte()) {
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
/* TODO(adamk): YIELD should be handled specially. */
......@@ -1637,17 +1643,17 @@ Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
}
bool Scanner::ScanRegExpPattern() {
DCHECK(next_next_.token == Token::UNINITIALIZED);
DCHECK(next_.token == Token::DIV || next_.token == Token::ASSIGN_DIV);
DCHECK_EQ(Token::UNINITIALIZED, next_next().token);
DCHECK(next().token == Token::DIV || next().token == Token::ASSIGN_DIV);
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
bool in_character_class = false;
bool seen_equal = (next_.token == Token::ASSIGN_DIV);
bool seen_equal = (next().token == Token::ASSIGN_DIV);
// Previous token is either '/' or '/=', in the second case, the
// pattern starts at =.
next_.location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
next_.location.end_pos = source_pos() - (seen_equal ? 1 : 0);
next().location.beg_pos = source_pos() - (seen_equal ? 2 : 1);
next().location.end_pos = source_pos() - (seen_equal ? 1 : 0);
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp
......@@ -1686,14 +1692,14 @@ bool Scanner::ScanRegExpPattern() {
Advance(); // consume '/'
literal.Complete();
next_.token = Token::REGEXP_LITERAL;
next_.contextual_token = Token::UNINITIALIZED;
next().token = Token::REGEXP_LITERAL;
next().contextual_token = Token::UNINITIALIZED;
return true;
}
Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
DCHECK(next_.token == Token::REGEXP_LITERAL);
DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
// Scan regular expression flags.
int flags = 0;
......@@ -1728,7 +1734,7 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
flags |= flag;
}
next_.location.end_pos = source_pos();
next().location.end_pos = source_pos();
return Just(RegExp::Flags(flags));
}
......@@ -1791,24 +1797,18 @@ void Scanner::SeekNext(size_t position) {
// 1, Reset the current_, next_ and next_next_ tokens
// (next_ + next_next_ will be overwrittem by Next(),
// current_ will remain unchanged, so overwrite it fully.)
current_ = {{0, 0},
nullptr,
nullptr,
0,
Token::UNINITIALIZED,
MessageTemplate::kNone,
{0, 0},
Token::UNINITIALIZED};
next_.token = Token::UNINITIALIZED;
next_.contextual_token = Token::UNINITIALIZED;
next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
for (TokenDesc& token : token_storage_) {
token.token = Token::UNINITIALIZED;
token.contextual_token = Token::UNINITIALIZED;
}
// 2, reset the source to the desired position,
source_->Seek(position);
// 3, re-scan, by scanning the look-ahead char + 1 token (next_).
c0_ = source_->Advance();
Next();
DCHECK_EQ(next_.location.beg_pos, static_cast<int>(position));
next().after_line_terminator = false;
next().after_multiline_comment = false;
Scan();
DCHECK_EQ(next().location.beg_pos, static_cast<int>(position));
}
} // namespace internal
......
......@@ -233,14 +233,14 @@ class Scanner {
// Returns the token following peek()
Token::Value PeekAhead();
// Returns the current token again.
Token::Value current_token() { return current_.token; }
Token::Value current_token() { return current().token; }
Token::Value current_contextual_token() { return current_.contextual_token; }
Token::Value next_contextual_token() { return next_.contextual_token; }
Token::Value current_contextual_token() { return current().contextual_token; }
Token::Value next_contextual_token() { return next().contextual_token; }
// Returns the location information for the current token
// (the token last returned by Next()).
Location location() const { return current_.location; }
Location location() const { return current().location; }
// This error is specifically an invalid hex or unicode escape sequence.
bool has_error() const { return scanner_error_ != MessageTemplate::kNone; }
......@@ -248,26 +248,26 @@ class Scanner {
Location error_location() const { return scanner_error_location_; }
bool has_invalid_template_escape() const {
return current_.invalid_template_escape_message != MessageTemplate::kNone;
return current().invalid_template_escape_message != MessageTemplate::kNone;
}
MessageTemplate::Template invalid_template_escape_message() const {
DCHECK(has_invalid_template_escape());
return current_.invalid_template_escape_message;
return current().invalid_template_escape_message;
}
Location invalid_template_escape_location() const {
DCHECK(has_invalid_template_escape());
return current_.invalid_template_escape_location;
return current().invalid_template_escape_location;
}
// Similar functions for the upcoming token.
// One token look-ahead (past the token returned by Next()).
Token::Value peek() const { return next_.token; }
Token::Value peek() const { return next().token; }
Location peek_location() const { return next_.location; }
Location peek_location() const { return next().location; }
bool literal_contains_escapes() const {
return LiteralContainsEscapes(current_);
return LiteralContainsEscapes(current());
}
const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory) const;
......@@ -281,12 +281,12 @@ class Scanner {
inline bool CurrentMatches(Token::Value token) const {
DCHECK(Token::IsKeyword(token));
return current_.token == token;
return current().token == token;
}
inline bool CurrentMatchesContextual(Token::Value token) const {
DCHECK(Token::IsContextualKeyword(token));
return current_.contextual_token == token;
return current().contextual_token == token;
}
// Match the token against the contextual keyword or literal buffer.
......@@ -295,17 +295,17 @@ class Scanner {
// Escaped keywords are not matched as tokens. So if we require escape
// and/or string processing we need to look at the literal content
// (which was escape-processed already).
// Conveniently, current_.literal_chars == nullptr for all proper keywords,
// so this second condition should exit early in common cases.
return (current_.contextual_token == token) ||
(current_.literal_chars &&
current_.literal_chars->Equals(Vector<const char>(
// Conveniently, !current().literal_chars.is_used() for all proper
// keywords, so this second condition should exit early in common cases.
return (current().contextual_token == token) ||
(current().literal_chars.is_used() &&
current().literal_chars.Equals(Vector<const char>(
Token::String(token), Token::StringLength(token))));
}
bool IsUseStrict() const {
return current_.token == Token::STRING &&
current_.literal_chars->Equals(
return current().token == Token::STRING &&
current().literal_chars.Equals(
Vector<const char>("use strict", strlen("use strict")));
}
bool IsGetOrSet(bool* is_get, bool* is_set) const {
......@@ -335,7 +335,7 @@ class Scanner {
MessageTemplate::Template octal_message() const { return octal_message_; }
// Returns the value of the last smi that was scanned.
uint32_t smi_value() const { return current_.smi_value_; }
uint32_t smi_value() const { return current().smi_value_; }
// Seek forward to the given position. This operation does not
// work in general, for instance when there are pushed back
......@@ -346,14 +346,13 @@ class Scanner {
// Returns true if there was a line terminator before the peek'ed token,
// possibly inside a multi-line comment.
bool HasAnyLineTerminatorBeforeNext() const {
return has_line_terminator_before_next_ ||
has_multiline_comment_before_next_;
return next().after_line_terminator || next().after_multiline_comment;
}
bool HasAnyLineTerminatorAfterNext() {
Token::Value ensure_next_next = PeekAhead();
USE(ensure_next_next);
return has_line_terminator_after_next_;
return next_next().after_line_terminator;
}
// Scans the input as a regular expression pattern, next token must be /(=).
......@@ -365,8 +364,8 @@ class Scanner {
// Scans the input as a template literal
Token::Value ScanTemplateStart();
Token::Value ScanTemplateContinuation() {
DCHECK_EQ(next_.token, Token::RBRACE);
next_.location.beg_pos = source_pos() - 1; // We already consumed }
DCHECK_EQ(next().token, Token::RBRACE);
next().location.beg_pos = source_pos() - 1; // We already consumed }
return ScanTemplateSpan();
}
......@@ -416,16 +415,19 @@ class Scanner {
// LiteralBuffer - Collector of chars of literals.
class LiteralBuffer {
public:
LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() {}
LiteralBuffer()
: position_(0), is_one_byte_(true), is_used_(false), backing_store_() {}
~LiteralBuffer() { backing_store_.Dispose(); }
V8_INLINE void AddChar(char code_unit) {
DCHECK(is_used_);
DCHECK(IsValidAscii(code_unit));
AddOneByteChar(static_cast<byte>(code_unit));
}
V8_INLINE void AddChar(uc32 code_unit) {
DCHECK(is_used_);
if (is_one_byte_) {
if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
AddOneByteChar(static_cast<byte>(code_unit));
......@@ -439,12 +441,14 @@ class Scanner {
bool is_one_byte() const { return is_one_byte_; }
bool Equals(Vector<const char> keyword) const {
DCHECK(is_used_);
return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.start(), backing_store_.start(), position_) == 0);
}
Vector<const uint16_t> two_byte_literal() const {
DCHECK(!is_one_byte_);
DCHECK(is_used_);
DCHECK_EQ(position_ & 0x1, 0);
return Vector<const uint16_t>(
reinterpret_cast<const uint16_t*>(backing_store_.start()),
......@@ -453,13 +457,23 @@ class Scanner {
Vector<const uint8_t> one_byte_literal() const {
DCHECK(is_one_byte_);
DCHECK(is_used_);
return Vector<const uint8_t>(
reinterpret_cast<const uint8_t*>(backing_store_.start()), position_);
}
int length() const { return is_one_byte_ ? position_ : (position_ >> 1); }
void Reset() {
void Start() {
DCHECK(!is_used_);
DCHECK_EQ(0, position_);
is_used_ = true;
}
bool is_used() const { return is_used_; }
void Drop() {
is_used_ = false;
position_ = 0;
is_one_byte_ = true;
}
......@@ -492,8 +506,9 @@ class Scanner {
void ExpandBuffer();
void ConvertToTwoByte();
bool is_one_byte_;
int position_;
bool is_one_byte_;
bool is_used_;
Vector<byte> backing_store_;
DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
......@@ -501,14 +516,17 @@ class Scanner {
// The current and look-ahead token.
struct TokenDesc {
Location location;
LiteralBuffer* literal_chars;
LiteralBuffer* raw_literal_chars;
uint32_t smi_value_;
Token::Value token;
MessageTemplate::Template invalid_template_escape_message;
Location location = {0, 0};
LiteralBuffer literal_chars;
LiteralBuffer raw_literal_chars;
Token::Value token = Token::UNINITIALIZED;
MessageTemplate::Template invalid_template_escape_message =
MessageTemplate::kNone;
Location invalid_template_escape_location;
Token::Value contextual_token;
Token::Value contextual_token = Token::UNINITIALIZED;
uint32_t smi_value_ = 0;
bool after_line_terminator = false;
bool after_multiline_comment = false;
};
enum NumberKind {
......@@ -532,22 +550,11 @@ class Scanner {
// Set c0_ (one character ahead)
STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
Advance();
// Initialize current_ to not refer to a literal.
current_.token = Token::UNINITIALIZED;
current_.contextual_token = Token::UNINITIALIZED;
current_.literal_chars = nullptr;
current_.raw_literal_chars = nullptr;
current_.invalid_template_escape_message = MessageTemplate::kNone;
next_.token = Token::UNINITIALIZED;
next_.contextual_token = Token::UNINITIALIZED;
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
next_.invalid_template_escape_message = MessageTemplate::kNone;
next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
next_next_.literal_chars = nullptr;
next_next_.raw_literal_chars = nullptr;
next_next_.invalid_template_escape_message = MessageTemplate::kNone;
current_ = &token_storage_[0];
next_ = &token_storage_[1];
next_next_ = &token_storage_[2];
found_html_comment_ = false;
scanner_error_ = MessageTemplate::kNone;
}
......@@ -569,47 +576,23 @@ class Scanner {
void SeekNext(size_t position);
// Literal buffer support
inline void StartLiteral() {
LiteralBuffer* free_buffer =
(current_.literal_chars == &literal_buffer0_)
? &literal_buffer1_
: (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
: &literal_buffer0_;
free_buffer->Reset();
next_.literal_chars = free_buffer;
}
inline void StartLiteral() { next().literal_chars.Start(); }
inline void StartRawLiteral() {
LiteralBuffer* free_buffer =
(current_.raw_literal_chars == &raw_literal_buffer0_)
? &raw_literal_buffer1_
: (current_.raw_literal_chars == &raw_literal_buffer1_)
? &raw_literal_buffer2_
: &raw_literal_buffer0_;
free_buffer->Reset();
next_.raw_literal_chars = free_buffer;
}
inline void StartRawLiteral() { next().raw_literal_chars.Start(); }
V8_INLINE void AddLiteralChar(uc32 c) {
DCHECK_NOT_NULL(next_.literal_chars);
next_.literal_chars->AddChar(c);
}
V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }
V8_INLINE void AddLiteralChar(char c) {
DCHECK_NOT_NULL(next_.literal_chars);
next_.literal_chars->AddChar(c);
}
V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); }
V8_INLINE void AddRawLiteralChar(uc32 c) {
DCHECK_NOT_NULL(next_.raw_literal_chars);
next_.raw_literal_chars->AddChar(c);
next().raw_literal_chars.AddChar(c);
}
// Stops scanning of a literal and drop the collected characters,
// e.g., due to an encountered error.
inline void DropLiteral() {
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
next().literal_chars.Drop();
next().raw_literal_chars.Drop();
}
inline void AddLiteralCharAdvance() {
......@@ -681,45 +664,46 @@ class Scanner {
// token as a one-byte literal. E.g. Token::FUNCTION pretends to have a
// literal "function".
Vector<const uint8_t> literal_one_byte_string() const {
if (current_.literal_chars)
return current_.literal_chars->one_byte_literal();
const char* str = Token::String(current_.token);
if (current().literal_chars.is_used())
return current().literal_chars.one_byte_literal();
const char* str = Token::String(current().token);
const uint8_t* str_as_uint8 = reinterpret_cast<const uint8_t*>(str);
return Vector<const uint8_t>(str_as_uint8,
Token::StringLength(current_.token));
Token::StringLength(current().token));
}
Vector<const uint16_t> literal_two_byte_string() const {
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->two_byte_literal();
DCHECK(current().literal_chars.is_used());
return current().literal_chars.two_byte_literal();
}
bool is_literal_one_byte() const {
return !current_.literal_chars || current_.literal_chars->is_one_byte();
return !current().literal_chars.is_used() ||
current().literal_chars.is_one_byte();
}
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
Vector<const uint8_t> next_literal_one_byte_string() const {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->one_byte_literal();
DCHECK(next().literal_chars.is_used());
return next().literal_chars.one_byte_literal();
}
Vector<const uint16_t> next_literal_two_byte_string() const {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->two_byte_literal();
DCHECK(next().literal_chars.is_used());
return next().literal_chars.two_byte_literal();
}
bool is_next_literal_one_byte() const {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->is_one_byte();
DCHECK(next().literal_chars.is_used());
return next().literal_chars.is_one_byte();
}
Vector<const uint8_t> raw_literal_one_byte_string() const {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->one_byte_literal();
DCHECK(current().raw_literal_chars.is_used());
return current().raw_literal_chars.one_byte_literal();
}
Vector<const uint16_t> raw_literal_two_byte_string() const {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->two_byte_literal();
DCHECK(current().raw_literal_chars.is_used());
return current().raw_literal_chars.two_byte_literal();
}
bool is_raw_literal_one_byte() const {
DCHECK_NOT_NULL(current_.raw_literal_chars);
return current_.raw_literal_chars->is_one_byte();
DCHECK(current().raw_literal_chars.is_used());
return current().raw_literal_chars.is_one_byte();
}
template <bool capture_raw, bool unicode = false>
......@@ -788,8 +772,8 @@ class Scanner {
// Subtract delimiters.
source_length -= 2;
}
return token.literal_chars &&
(token.literal_chars->length() != source_length);
return token.literal_chars.is_used() &&
(token.literal_chars.length() != source_length);
}
#ifdef DEBUG
......@@ -798,23 +782,21 @@ class Scanner {
UnicodeCache* unicode_cache_;
// Buffers collecting literal strings, numbers, etc.
LiteralBuffer literal_buffer0_;
LiteralBuffer literal_buffer1_;
LiteralBuffer literal_buffer2_;
// Values parsed from magic comments.
LiteralBuffer source_url_;
LiteralBuffer source_mapping_url_;
// Buffer to store raw string values
LiteralBuffer raw_literal_buffer0_;
LiteralBuffer raw_literal_buffer1_;
LiteralBuffer raw_literal_buffer2_;
TokenDesc token_storage_[3];
TokenDesc current_; // desc for current token (as returned by Next())
TokenDesc next_; // desc for next token (one token look-ahead)
TokenDesc next_next_; // desc for the token after next (after PeakAhead())
TokenDesc& next() { return *next_; }
const TokenDesc& current() const { return *current_; }
const TokenDesc& next() const { return *next_; }
const TokenDesc& next_next() const { return *next_next_; }
TokenDesc* current_; // desc for current token (as returned by Next())
TokenDesc* next_; // desc for next token (one token look-ahead)
TokenDesc* next_next_; // desc for the token after next (after PeakAhead())
// Input stream. Must be initialized to an Utf16CharacterStream.
Utf16CharacterStream* const source_;
......@@ -826,15 +808,6 @@ class Scanner {
// One Unicode character look-ahead; c0_ < 0 at the end of the input.
uc32 c0_;
// Whether there is a line terminator whitespace character after
// the current token, and before the next. Does not count newlines
// inside multiline comments.
bool has_line_terminator_before_next_;
// Whether there is a multi-line comment that contains a
// line-terminator after the current token, and before the next.
bool has_multiline_comment_before_next_;
bool has_line_terminator_after_next_;
// Whether this scanner encountered an HTML comment.
bool found_html_comment_;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment