Commit 5bbd1921 authored by Florian Sattler's avatar Florian Sattler Committed by Commit Bot

[scanner] Inlining ScanIdentifier and Token check

This improves parse time for code-load and other benchmarks.

Bug: v8:7926
Change-Id: I9c8017f2e7f73dd952c025db5abe8e4062e6ef9b
Reviewed-on: https://chromium-review.googlesource.com/1196506
Commit-Queue: Florian Sattler <sattlerf@google.com>
Reviewed-by: 's avatarToon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55521}
parent 1c48d52b
......@@ -149,6 +149,200 @@ static const Token::Value one_char_tokens[] = {
};
// clang-format on
// ----------------------------------------------------------------------------
// Keyword Matcher
#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
KEYWORD_GROUP('a') \
KEYWORD("arguments", Token::ARGUMENTS) \
KEYWORD("as", Token::AS) \
KEYWORD("async", Token::ASYNC) \
KEYWORD("await", Token::AWAIT) \
KEYWORD("anonymous", Token::ANONYMOUS) \
KEYWORD_GROUP('b') \
KEYWORD("break", Token::BREAK) \
KEYWORD_GROUP('c') \
KEYWORD("case", Token::CASE) \
KEYWORD("catch", Token::CATCH) \
KEYWORD("class", Token::CLASS) \
KEYWORD("const", Token::CONST) \
KEYWORD("constructor", Token::CONSTRUCTOR) \
KEYWORD("continue", Token::CONTINUE) \
KEYWORD_GROUP('d') \
KEYWORD("debugger", Token::DEBUGGER) \
KEYWORD("default", Token::DEFAULT) \
KEYWORD("delete", Token::DELETE) \
KEYWORD("do", Token::DO) \
KEYWORD_GROUP('e') \
KEYWORD("else", Token::ELSE) \
KEYWORD("enum", Token::ENUM) \
KEYWORD("eval", Token::EVAL) \
KEYWORD("export", Token::EXPORT) \
KEYWORD("extends", Token::EXTENDS) \
KEYWORD_GROUP('f') \
KEYWORD("false", Token::FALSE_LITERAL) \
KEYWORD("finally", Token::FINALLY) \
KEYWORD("for", Token::FOR) \
KEYWORD("from", Token::FROM) \
KEYWORD("function", Token::FUNCTION) \
KEYWORD_GROUP('g') \
KEYWORD("get", Token::GET) \
KEYWORD_GROUP('i') \
KEYWORD("if", Token::IF) \
KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("import", Token::IMPORT) \
KEYWORD("in", Token::IN) \
KEYWORD("instanceof", Token::INSTANCEOF) \
KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD_GROUP('l') \
KEYWORD("let", Token::LET) \
KEYWORD_GROUP('m') \
KEYWORD("meta", Token::META) \
KEYWORD_GROUP('n') \
KEYWORD("name", Token::NAME) \
KEYWORD("new", Token::NEW) \
KEYWORD("null", Token::NULL_LITERAL) \
KEYWORD_GROUP('o') \
KEYWORD("of", Token::OF) \
KEYWORD_GROUP('p') \
KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("prototype", Token::PROTOTYPE) \
KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD_GROUP('r') \
KEYWORD("return", Token::RETURN) \
KEYWORD_GROUP('s') \
KEYWORD("set", Token::SET) \
KEYWORD("static", Token::STATIC) \
KEYWORD("super", Token::SUPER) \
KEYWORD("switch", Token::SWITCH) \
KEYWORD_GROUP('t') \
KEYWORD("target", Token::TARGET) \
KEYWORD("this", Token::THIS) \
KEYWORD("throw", Token::THROW) \
KEYWORD("true", Token::TRUE_LITERAL) \
KEYWORD("try", Token::TRY) \
KEYWORD("typeof", Token::TYPEOF) \
KEYWORD_GROUP('u') \
KEYWORD("undefined", Token::UNDEFINED) \
KEYWORD_GROUP('v') \
KEYWORD("var", Token::VAR) \
KEYWORD("void", Token::VOID) \
KEYWORD_GROUP('w') \
KEYWORD("while", Token::WHILE) \
KEYWORD("with", Token::WITH) \
KEYWORD_GROUP('y') \
KEYWORD("yield", Token::YIELD) \
KEYWORD_GROUP('_') \
KEYWORD("__proto__", Token::PROTO_UNDERSCORED) \
KEYWORD_GROUP('#') \
KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR)
V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t* input,
int input_length) {
DCHECK_GE(input_length, 1);
const int kMinLength = 2;
const int kMaxLength = 12;
if (input_length < kMinLength || input_length > kMaxLength) {
return Token::IDENTIFIER;
}
switch (input[0]) {
default:
#define KEYWORD_GROUP_CASE(ch) \
break; \
case ch:
#define KEYWORD(keyword, token) \
{ \
/* 'keyword' is a char array, so sizeof(keyword) is */ \
/* strlen(keyword) plus 1 for the NUL char. */ \
const int keyword_length = sizeof(keyword) - 1; \
STATIC_ASSERT(keyword_length >= kMinLength); \
STATIC_ASSERT(keyword_length <= kMaxLength); \
DCHECK_EQ(input[0], keyword[0]); \
DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \
0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
if (input_length == keyword_length && input[1] == keyword[1] && \
(keyword_length <= 2 || input[2] == keyword[2]) && \
(keyword_length <= 3 || input[3] == keyword[3]) && \
(keyword_length <= 4 || input[4] == keyword[4]) && \
(keyword_length <= 5 || input[5] == keyword[5]) && \
(keyword_length <= 6 || input[6] == keyword[6]) && \
(keyword_length <= 7 || input[7] == keyword[7]) && \
(keyword_length <= 8 || input[8] == keyword[8]) && \
(keyword_length <= 9 || input[9] == keyword[9]) && \
(keyword_length <= 10 || input[10] == keyword[10])) { \
return token; \
} \
}
KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
}
return Token::IDENTIFIER;
#undef KEYWORDS
#undef KEYWORD
#undef KEYWORD_GROUP_CASE
}
V8_INLINE Token::Value Scanner::ScanIdentifierOrKeyword() {
LiteralScope literal(this);
return ScanIdentifierOrKeywordInner(&literal);
}
V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner(
LiteralScope* literal) {
DCHECK(unicode_cache_->IsIdentifierStart(c0_));
bool escaped = false;
if (IsInRange(c0_, 'a', 'z') || c0_ == '_') {
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
} while (IsInRange(c0_, 'a', 'z') || c0_ == '_');
if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
// Identifier starting with lowercase or _.
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
} while (IsAsciiIdentifier(c0_));
if (c0_ <= kMaxAscii && c0_ != '\\') {
literal->Complete();
return Token::IDENTIFIER;
}
} else if (c0_ <= kMaxAscii && c0_ != '\\') {
// Only a-z+ or _: could be a keyword or identifier.
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER ||
token == Token::FUTURE_STRICT_RESERVED_WORD ||
Token::IsContextualKeyword(token))
literal->Complete();
return token;
}
} else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
} while (IsAsciiIdentifier(c0_));
if (c0_ <= kMaxAscii && c0_ != '\\') {
literal->Complete();
return Token::IDENTIFIER;
}
} else if (c0_ == '\\') {
escaped = true;
uc32 c = ScanIdentifierUnicodeEscape();
DCHECK(!unicode_cache_->IsIdentifierStart(-1));
if (c == '\\' || !unicode_cache_->IsIdentifierStart(c)) {
return Token::ILLEGAL;
}
AddLiteralChar(c);
}
return ScanIdentifierOrKeywordInnerSlow(literal, escaped);
}
V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
int start_position = source_pos();
......
......@@ -1005,197 +1005,8 @@ uc32 Scanner::ScanUnicodeEscape() {
return ScanHexNumber<capture_raw, unicode>(4);
}
// ----------------------------------------------------------------------------
// Keyword Matcher
#define KEYWORDS(KEYWORD_GROUP, KEYWORD) \
KEYWORD_GROUP('a') \
KEYWORD("arguments", Token::ARGUMENTS) \
KEYWORD("as", Token::AS) \
KEYWORD("async", Token::ASYNC) \
KEYWORD("await", Token::AWAIT) \
KEYWORD("anonymous", Token::ANONYMOUS) \
KEYWORD_GROUP('b') \
KEYWORD("break", Token::BREAK) \
KEYWORD_GROUP('c') \
KEYWORD("case", Token::CASE) \
KEYWORD("catch", Token::CATCH) \
KEYWORD("class", Token::CLASS) \
KEYWORD("const", Token::CONST) \
KEYWORD("constructor", Token::CONSTRUCTOR) \
KEYWORD("continue", Token::CONTINUE) \
KEYWORD_GROUP('d') \
KEYWORD("debugger", Token::DEBUGGER) \
KEYWORD("default", Token::DEFAULT) \
KEYWORD("delete", Token::DELETE) \
KEYWORD("do", Token::DO) \
KEYWORD_GROUP('e') \
KEYWORD("else", Token::ELSE) \
KEYWORD("enum", Token::ENUM) \
KEYWORD("eval", Token::EVAL) \
KEYWORD("export", Token::EXPORT) \
KEYWORD("extends", Token::EXTENDS) \
KEYWORD_GROUP('f') \
KEYWORD("false", Token::FALSE_LITERAL) \
KEYWORD("finally", Token::FINALLY) \
KEYWORD("for", Token::FOR) \
KEYWORD("from", Token::FROM) \
KEYWORD("function", Token::FUNCTION) \
KEYWORD_GROUP('g') \
KEYWORD("get", Token::GET) \
KEYWORD_GROUP('i') \
KEYWORD("if", Token::IF) \
KEYWORD("implements", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("import", Token::IMPORT) \
KEYWORD("in", Token::IN) \
KEYWORD("instanceof", Token::INSTANCEOF) \
KEYWORD("interface", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD_GROUP('l') \
KEYWORD("let", Token::LET) \
KEYWORD_GROUP('m') \
KEYWORD("meta", Token::META) \
KEYWORD_GROUP('n') \
KEYWORD("name", Token::NAME) \
KEYWORD("new", Token::NEW) \
KEYWORD("null", Token::NULL_LITERAL) \
KEYWORD_GROUP('o') \
KEYWORD("of", Token::OF) \
KEYWORD_GROUP('p') \
KEYWORD("package", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("private", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("protected", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD("prototype", Token::PROTOTYPE) \
KEYWORD("public", Token::FUTURE_STRICT_RESERVED_WORD) \
KEYWORD_GROUP('r') \
KEYWORD("return", Token::RETURN) \
KEYWORD_GROUP('s') \
KEYWORD("set", Token::SET) \
KEYWORD("static", Token::STATIC) \
KEYWORD("super", Token::SUPER) \
KEYWORD("switch", Token::SWITCH) \
KEYWORD_GROUP('t') \
KEYWORD("target", Token::TARGET) \
KEYWORD("this", Token::THIS) \
KEYWORD("throw", Token::THROW) \
KEYWORD("true", Token::TRUE_LITERAL) \
KEYWORD("try", Token::TRY) \
KEYWORD("typeof", Token::TYPEOF) \
KEYWORD_GROUP('u') \
KEYWORD("undefined", Token::UNDEFINED) \
KEYWORD_GROUP('v') \
KEYWORD("var", Token::VAR) \
KEYWORD("void", Token::VOID) \
KEYWORD_GROUP('w') \
KEYWORD("while", Token::WHILE) \
KEYWORD("with", Token::WITH) \
KEYWORD_GROUP('y') \
KEYWORD("yield", Token::YIELD) \
KEYWORD_GROUP('_') \
KEYWORD("__proto__", Token::PROTO_UNDERSCORED) \
KEYWORD_GROUP('#') \
KEYWORD("#constructor", Token::PRIVATE_CONSTRUCTOR)
static Token::Value KeywordOrIdentifierToken(const uint8_t* input,
int input_length) {
DCHECK_GE(input_length, 1);
const int kMinLength = 2;
const int kMaxLength = 12;
if (input_length < kMinLength || input_length > kMaxLength) {
return Token::IDENTIFIER;
}
switch (input[0]) {
default:
#define KEYWORD_GROUP_CASE(ch) \
break; \
case ch:
#define KEYWORD(keyword, token) \
{ \
/* 'keyword' is a char array, so sizeof(keyword) is */ \
/* strlen(keyword) plus 1 for the NUL char. */ \
const int keyword_length = sizeof(keyword) - 1; \
STATIC_ASSERT(keyword_length >= kMinLength); \
STATIC_ASSERT(keyword_length <= kMaxLength); \
DCHECK_EQ(input[0], keyword[0]); \
DCHECK(token == Token::FUTURE_STRICT_RESERVED_WORD || \
0 == strncmp(keyword, Token::String(token), sizeof(keyword))); \
if (input_length == keyword_length && input[1] == keyword[1] && \
(keyword_length <= 2 || input[2] == keyword[2]) && \
(keyword_length <= 3 || input[3] == keyword[3]) && \
(keyword_length <= 4 || input[4] == keyword[4]) && \
(keyword_length <= 5 || input[5] == keyword[5]) && \
(keyword_length <= 6 || input[6] == keyword[6]) && \
(keyword_length <= 7 || input[7] == keyword[7]) && \
(keyword_length <= 8 || input[8] == keyword[8]) && \
(keyword_length <= 9 || input[9] == keyword[9]) && \
(keyword_length <= 10 || input[10] == keyword[10])) { \
return token; \
} \
}
KEYWORDS(KEYWORD_GROUP_CASE, KEYWORD)
}
return Token::IDENTIFIER;
#undef KEYWORDS
#undef KEYWORD
#undef KEYWORD_GROUP_CASE
}
Token::Value Scanner::ScanIdentifierOrKeyword() {
LiteralScope literal(this);
return ScanIdentifierOrKeywordInner(&literal);
}
Token::Value Scanner::ScanIdentifierOrKeywordInner(LiteralScope* literal) {
DCHECK(unicode_cache_->IsIdentifierStart(c0_));
bool escaped = false;
if (IsInRange(c0_, 'a', 'z') || c0_ == '_') {
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
} while (IsInRange(c0_, 'a', 'z') || c0_ == '_');
if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
// Identifier starting with lowercase or _.
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
} while (IsAsciiIdentifier(c0_));
if (c0_ <= kMaxAscii && c0_ != '\\') {
literal->Complete();
return Token::IDENTIFIER;
}
} else if (c0_ <= kMaxAscii && c0_ != '\\') {
// Only a-z+ or _: could be a keyword or identifier.
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER ||
token == Token::FUTURE_STRICT_RESERVED_WORD ||
Token::IsContextualKeyword(token))
literal->Complete();
return token;
}
} else if (IsInRange(c0_, 'A', 'Z') || c0_ == '$') {
do {
AddLiteralChar(static_cast<char>(c0_));
Advance();
} while (IsAsciiIdentifier(c0_));
if (c0_ <= kMaxAscii && c0_ != '\\') {
literal->Complete();
return Token::IDENTIFIER;
}
} else if (c0_ == '\\') {
escaped = true;
uc32 c = ScanIdentifierUnicodeEscape();
DCHECK(!unicode_cache_->IsIdentifierStart(-1));
if (c == '\\' || !unicode_cache_->IsIdentifierStart(c)) {
return Token::ILLEGAL;
}
AddLiteralChar(c);
}
Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(LiteralScope* literal,
bool escaped) {
while (true) {
if (c0_ == '\\') {
escaped = true;
......
......@@ -738,8 +738,10 @@ class Scanner {
bool ScanImplicitOctalDigits(int start_pos, NumberKind* kind);
Token::Value ScanNumber(bool seen_period);
Token::Value ScanIdentifierOrKeyword();
Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal);
V8_INLINE Token::Value ScanIdentifierOrKeyword();
V8_INLINE Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal);
Token::Value ScanIdentifierOrKeywordInnerSlow(LiteralScope* literal,
bool escaped);
Token::Value ScanString();
Token::Value ScanPrivateName();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment