Commit c677f813 authored by vogelheim's avatar vogelheim Committed by Commit bot

Scanner::LiteralBuffer usage cleanup.

1, restrict use of LiteralBuffers to the tokens that actually need it.
  - E.g., previously the Token::FUNCTION would have a literal buffer
    containing "function", which was never actually used.
  - This eliminates copies of the string data for every call to
    PeekAhead or SetBookmark.
2, document & enforce the "secret" Scanner API contract w/ DCHECK
  - Document & check the correspondence of token value and literal buffer.
  - Document & check preconditions for calling PeekAhead, ScanRegExp*,
    ScanTemplate*.

BUG=v8:4947

Review-Url: https://codereview.chromium.org/2240513003
Cr-Commit-Position: refs/heads/master@{#38677}
parent b8b4a443
......@@ -1055,7 +1055,7 @@ class ParserBase : public Traits {
IdentifierT ParseIdentifierName(bool* ok);
ExpressionT ParseRegExpLiteral(bool seen_equal, bool* ok);
ExpressionT ParseRegExpLiteral(bool* ok);
ExpressionT ParsePrimaryExpression(ExpressionClassifier* classifier,
bool* is_async, bool* ok);
......@@ -1349,6 +1349,9 @@ void ParserBase<Traits>::GetUnexpectedTokenMessage(
*message = MessageTemplate::kInvalidOrUnexpectedToken;
}
break;
case Token::REGEXP_LITERAL:
*message = MessageTemplate::kUnexpectedTokenRegExp;
break;
default:
const char* name = Token::String(token);
DCHECK(name != NULL);
......@@ -1502,9 +1505,9 @@ ParserBase<Traits>::ParseIdentifierName(bool* ok) {
template <class Traits>
typename ParserBase<Traits>::ExpressionT ParserBase<Traits>::ParseRegExpLiteral(
bool seen_equal, bool* ok) {
bool* ok) {
int pos = peek_position();
if (!scanner()->ScanRegExpPattern(seen_equal)) {
if (!scanner()->ScanRegExpPattern()) {
Next();
ReportMessage(MessageTemplate::kUnterminatedRegExp);
*ok = false;
......@@ -1594,14 +1597,10 @@ ParserBase<Traits>::ParsePrimaryExpression(ExpressionClassifier* classifier,
}
case Token::ASSIGN_DIV:
classifier->RecordBindingPatternError(
scanner()->peek_location(), MessageTemplate::kUnexpectedTokenRegExp);
return this->ParseRegExpLiteral(true, ok);
case Token::DIV:
classifier->RecordBindingPatternError(
scanner()->peek_location(), MessageTemplate::kUnexpectedTokenRegExp);
return this->ParseRegExpLiteral(false, ok);
return this->ParseRegExpLiteral(ok);
case Token::LBRACK:
return this->ParseArrayLiteral(classifier, ok);
......
......@@ -260,6 +260,8 @@ Token::Value Scanner::Next() {
next_.token = token;
next_.location.beg_pos = pos;
next_.location.end_pos = pos + 1;
next_.literal_chars = nullptr;
next_.raw_literal_chars = nullptr;
Advance();
return current_.token;
}
......@@ -270,6 +272,9 @@ Token::Value Scanner::Next() {
Token::Value Scanner::PeekAhead() {
DCHECK(next_.token != Token::DIV);
DCHECK(next_.token != Token::ASSIGN_DIV);
if (next_next_.token != Token::UNINITIALIZED) {
return next_next_.token;
}
......@@ -731,8 +736,50 @@ void Scanner::Scan() {
next_.location.end_pos = source_pos();
next_.token = token;
#ifdef DEBUG
SanityCheckTokenDesc(current_);
SanityCheckTokenDesc(next_);
SanityCheckTokenDesc(next_next_);
#endif
}
#ifdef DEBUG
void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
// Most tokens should not have literal_chars or even raw_literal chars.
// The rules are:
// - UNINITIALIZED: we don't care.
// - TEMPLATE_*: need both literal + raw literal chars.
// - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.
// - all others: should have neither.
switch (token.token) {
case Token::UNINITIALIZED:
// token.literal_chars & other members might be garbage. That's ok.
break;
case Token::TEMPLATE_SPAN:
case Token::TEMPLATE_TAIL:
DCHECK_NOT_NULL(token.raw_literal_chars);
DCHECK_NOT_NULL(token.literal_chars);
break;
case Token::ESCAPED_KEYWORD:
case Token::ESCAPED_STRICT_RESERVED_WORD:
case Token::FUTURE_STRICT_RESERVED_WORD:
case Token::IDENTIFIER:
case Token::NUMBER:
case Token::REGEXP_LITERAL:
case Token::SMI:
case Token::STRING:
DCHECK_NOT_NULL(token.literal_chars);
DCHECK_NULL(token.raw_literal_chars);
break;
default:
DCHECK_NULL(token.literal_chars);
DCHECK_NULL(token.raw_literal_chars);
break;
}
}
#endif // DEBUG
void Scanner::SeekForward(int pos) {
// After this call, we will have the token at the given position as
......@@ -954,6 +1001,7 @@ Token::Value Scanner::ScanTemplateSpan() {
Token::Value Scanner::ScanTemplateStart() {
DCHECK(next_next_.token == Token::UNINITIALIZED);
DCHECK(c0_ == '`');
next_.location.beg_pos = source_pos();
Advance(); // Consume `
......@@ -1216,7 +1264,7 @@ uc32 Scanner::ScanUnicodeEscape() {
KEYWORD("yield", Token::YIELD)
static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
int input_length, bool escaped) {
int input_length) {
DCHECK(input_length >= 1);
const int kMinLength = 2;
const int kMaxLength = 10;
......@@ -1244,13 +1292,6 @@ static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
(keyword_length <= 7 || input[7] == keyword[7]) && \
(keyword_length <= 8 || input[8] == keyword[8]) && \
(keyword_length <= 9 || input[9] == keyword[9])) { \
if (escaped) { \
/* TODO(adamk): YIELD should be handled specially. */ \
return (token == Token::FUTURE_STRICT_RESERVED_WORD || \
token == Token::LET || token == Token::STATIC) \
? Token::ESCAPED_STRICT_RESERVED_WORD \
: Token::ESCAPED_KEYWORD; \
} \
return token; \
} \
}
......@@ -1269,7 +1310,7 @@ bool Scanner::IdentifierIsFutureStrictReserved(
return true;
}
return Token::FUTURE_STRICT_RESERVED_WORD ==
KeywordOrIdentifierToken(string->raw_data(), string->length(), false);
KeywordOrIdentifierToken(string->raw_data(), string->length());
}
......@@ -1300,9 +1341,13 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {
}
} else if (c0_ <= kMaxAscii && c0_ != '\\') {
// Only a-z+: could be a keyword or identifier.
literal.Complete();
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
return KeywordOrIdentifierToken(chars.start(), chars.length(), false);
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER ||
token == Token::FUTURE_STRICT_RESERVED_WORD)
literal.Complete();
return token;
}
HandleLeadSurrogate();
......@@ -1348,12 +1393,14 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {
return ScanIdentifierSuffix(&literal, false);
}
literal.Complete();
if (next_.literal_chars->is_one_byte()) {
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
return KeywordOrIdentifierToken(chars.start(), chars.length(), false);
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER) literal.Complete();
return token;
}
literal.Complete();
return Token::IDENTIFIER;
}
......@@ -1381,15 +1428,28 @@ Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal,
if (escaped && next_.literal_chars->is_one_byte()) {
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
return KeywordOrIdentifierToken(chars.start(), chars.length(), true);
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
/* TODO(adamk): YIELD should be handled specially. */
if (token == Token::IDENTIFIER) {
return Token::IDENTIFIER;
} else if (token == Token::FUTURE_STRICT_RESERVED_WORD ||
token == Token::LET || token == Token::STATIC) {
return Token::ESCAPED_STRICT_RESERVED_WORD;
} else {
return Token::ESCAPED_KEYWORD;
}
}
return Token::IDENTIFIER;
}
bool Scanner::ScanRegExpPattern() {
DCHECK(next_next_.token == Token::UNINITIALIZED);
DCHECK(next_.token == Token::DIV || next_.token == Token::ASSIGN_DIV);
bool Scanner::ScanRegExpPattern(bool seen_equal) {
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
bool in_character_class = false;
bool seen_equal = (next_.token == Token::ASSIGN_DIV);
// Previous token is either '/' or '/=', in the second case, the
// pattern starts at =.
......@@ -1429,14 +1489,15 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
Advance(); // consume '/'
literal.Complete();
next_.token = Token::REGEXP_LITERAL;
return true;
}
Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
DCHECK(next_.token == Token::REGEXP_LITERAL);
// Scan regular expression flags.
LiteralScope literal(this);
int flags = 0;
while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
RegExp::Flags flag = RegExp::kNone;
......@@ -1459,11 +1520,12 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
default:
return Nothing<RegExp::Flags>();
}
if (flags & flag) return Nothing<RegExp::Flags>();
AddLiteralCharAdvance();
if (flags & flag) {
return Nothing<RegExp::Flags>();
}
Advance();
flags |= flag;
}
literal.Complete();
next_.location.end_pos = source_pos();
return Just(RegExp::Flags(flags));
......
......@@ -219,10 +219,9 @@ class Scanner {
bool literal_contains_escapes() const {
return LiteralContainsEscapes(current_);
}
bool next_literal_contains_escapes() const {
return LiteralContainsEscapes(next_);
}
bool is_literal_contextual_keyword(Vector<const char> keyword) {
DCHECK(current_.token == Token::IDENTIFIER ||
current_.token == Token::ESCAPED_STRICT_RESERVED_WORD);
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->is_contextual_keyword(keyword);
}
......@@ -238,9 +237,10 @@ class Scanner {
double DoubleValue();
bool ContainsDot();
bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
if (is_literal_one_byte() &&
literal_length() == length &&
(allow_escapes || !literal_contains_escapes())) {
if (!current_.literal_chars) {
return !strncmp(Token::Name(current_.token), data, length);
} else if (is_literal_one_byte() && literal_length() == length &&
(allow_escapes || !literal_contains_escapes())) {
const char* token =
reinterpret_cast<const char*>(literal_one_byte_string().start());
return !strncmp(token, data, length);
......@@ -299,9 +299,9 @@ class Scanner {
return has_line_terminator_after_next_;
}
// Scans the input as a regular expression pattern, previous
// character(s) must be /(=). Returns true if a pattern is scanned.
bool ScanRegExpPattern(bool seen_equal);
// Scans the input as a regular expression pattern, next token must be /(=).
// Returns true if a pattern is scanned.
bool ScanRegExpPattern();
// Scans the input as regular expression flags. Returns the flags on success.
Maybe<RegExp::Flags> ScanRegExpFlags();
......@@ -515,9 +515,15 @@ class Scanner {
STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
Advance();
// Initialize current_ to not refer to a literal.
current_.token = Token::UNINITIALIZED;
current_.literal_chars = NULL;
current_.raw_literal_chars = NULL;
next_.token = Token::UNINITIALIZED;
next_.literal_chars = NULL;
next_.raw_literal_chars = NULL;
next_next_.token = Token::UNINITIALIZED;
next_next_.literal_chars = NULL;
next_next_.raw_literal_chars = NULL;
found_html_comment_ = false;
scanner_error_ = MessageTemplate::kNone;
}
......@@ -650,21 +656,30 @@ class Scanner {
// form.
// These functions only give the correct result if the literal was scanned
// when a LiteralScope object is alive.
//
// Current usage of these functions is unfortunately a little undisciplined,
// and is_literal_one_byte() + is_literal_one_byte_string() is also
// requested for tokens that do not have a literal. Hence, we treat any
// token as a one-byte literal. E.g. Token::FUNCTION pretends to have a
// literal "function".
Vector<const uint8_t> literal_one_byte_string() {
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->one_byte_literal();
if (current_.literal_chars)
return current_.literal_chars->one_byte_literal();
const char* str = Token::String(current_.token);
const uint8_t* str_as_uint8 = reinterpret_cast<const uint8_t*>(str);
return Vector<const uint8_t>(str_as_uint8,
Token::StringLength(current_.token));
}
Vector<const uint16_t> literal_two_byte_string() {
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->two_byte_literal();
}
bool is_literal_one_byte() {
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->is_one_byte();
return !current_.literal_chars || current_.literal_chars->is_one_byte();
}
int literal_length() const {
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->length();
if (current_.literal_chars) return current_.literal_chars->length();
return Token::StringLength(current_.token);
}
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
......@@ -746,9 +761,14 @@ class Scanner {
// Subtract delimiters.
source_length -= 2;
}
return token.literal_chars->length() != source_length;
return token.literal_chars &&
(token.literal_chars->length() != source_length);
}
#ifdef DEBUG
void SanityCheckTokenDesc(const TokenDesc&) const;
#endif
UnicodeCache* unicode_cache_;
// Buffers collecting literal strings, numbers, etc.
......
......@@ -22,6 +22,16 @@ const char* const Token::string_[NUM_TOKENS] = {
};
#undef T
#if !V8_CC_MSVC
// TODO(vogelheim): Remove #if once MSVC supports constexpr on functions.
constexpr
#endif
uint8_t length(const char* str) {
return str ? static_cast<uint8_t>(strlen(str)) : 0;
}
#define T(name, string, precedence) length(string),
const uint8_t Token::string_length_[NUM_TOKENS] = {TOKEN_LIST(T, T)};
#undef T
#define T(name, string, precedence) precedence,
const int8_t Token::precedence_[NUM_TOKENS] = {
......
......@@ -171,6 +171,7 @@ namespace internal {
/* Scanner-internal use only. */ \
T(WHITESPACE, NULL, 0) \
T(UNINITIALIZED, NULL, 0) \
T(REGEXP_LITERAL, NULL, 0) \
\
/* ES6 Template Literals */ \
T(TEMPLATE_SPAN, NULL, 0) \
......@@ -325,6 +326,11 @@ class Token {
return string_[tok];
}
static uint8_t StringLength(Value tok) {
DCHECK(tok < NUM_TOKENS);
return string_length_[tok];
}
// Returns the precedence > 0 for binary and compare
// operators; returns 0 otherwise.
static int Precedence(Value tok) {
......@@ -335,6 +341,7 @@ class Token {
private:
static const char* const name_[NUM_TOKENS];
static const char* const string_[NUM_TOKENS];
static const uint8_t string_length_[NUM_TOKENS];
static const int8_t precedence_[NUM_TOKENS];
static const char token_type[NUM_TOKENS];
};
......
......@@ -885,7 +885,7 @@ void TestScanRegExp(const char* re_source, const char* expected) {
i::Token::Value start = scanner.peek();
CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
CHECK(scanner.ScanRegExpPattern());
scanner.Next(); // Current token is now the regexp literal.
i::Zone zone(CcTest::i_isolate()->allocator());
i::AstValueFactory ast_value_factory(&zone,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment