Commit a8b538dd authored by verwaest's avatar verwaest Committed by Commit bot

Speed up identifier, keyword and smi parsing

BUG=

Review URL: https://codereview.chromium.org/969353003

Cr-Commit-Position: refs/heads/master@{#26962}
parent 01e2ba6e
...@@ -35,6 +35,13 @@ inline bool IsInRange(int value, int lower_limit, int higher_limit) { ...@@ -35,6 +35,13 @@ inline bool IsInRange(int value, int lower_limit, int higher_limit) {
static_cast<unsigned int>(higher_limit - lower_limit); static_cast<unsigned int>(higher_limit - lower_limit);
} }
inline bool IsAsciiIdentifier(uc32 c) {
return IsAlphaNumeric(c) || c == '$' || c == '_';
}
inline bool IsAlphaNumeric(uc32 c) {
return IsInRange(AsciiAlphaToLower(c), 'a', 'z') || IsDecimalDigit(c);
}
inline bool IsDecimalDigit(uc32 c) { inline bool IsDecimalDigit(uc32 c) {
// ECMA-262, 3rd, 7.8.3 (p 16) // ECMA-262, 3rd, 7.8.3 (p 16)
......
...@@ -15,6 +15,8 @@ namespace internal { ...@@ -15,6 +15,8 @@ namespace internal {
inline bool IsCarriageReturn(uc32 c); inline bool IsCarriageReturn(uc32 c);
inline bool IsLineFeed(uc32 c); inline bool IsLineFeed(uc32 c);
inline bool IsAsciiIdentifier(uc32 c);
inline bool IsAlphaNumeric(uc32 c);
inline bool IsDecimalDigit(uc32 c); inline bool IsDecimalDigit(uc32 c);
inline bool IsHexDigit(uc32 c); inline bool IsHexDigit(uc32 c);
inline bool IsOctalDigit(uc32 c); inline bool IsOctalDigit(uc32 c);
......
...@@ -983,15 +983,21 @@ Token::Value Scanner::ScanNumber(bool seen_period) { ...@@ -983,15 +983,21 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
int value = 0; int value = 0;
while (IsDecimalDigit(c0_)) { while (IsDecimalDigit(c0_)) {
value = 10 * value + (c0_ - '0'); value = 10 * value + (c0_ - '0');
AddLiteralCharAdvance();
uc32 first_char = c0_;
Advance<false, false>();
AddLiteralChar(first_char);
} }
if (next_.literal_chars->one_byte_literal().length() < 10 && if (next_.literal_chars->one_byte_literal().length() < 10 &&
c0_ != '.' && c0_ != 'e' && c0_ != 'E') { c0_ != '.' && c0_ != 'e' && c0_ != 'E') {
smi_value_ = value; smi_value_ = value;
literal.Complete(); literal.Complete();
HandleLeadSurrugate();
return Token::SMI; return Token::SMI;
} }
HandleLeadSurrugate();
} }
ScanDecimalDigits(); // optional ScanDecimalDigits(); // optional
...@@ -1193,8 +1199,53 @@ bool Scanner::IdentifierIsFutureStrictReserved( ...@@ -1193,8 +1199,53 @@ bool Scanner::IdentifierIsFutureStrictReserved(
Token::Value Scanner::ScanIdentifierOrKeyword() { Token::Value Scanner::ScanIdentifierOrKeyword() {
DCHECK(unicode_cache_->IsIdentifierStart(c0_)); DCHECK(unicode_cache_->IsIdentifierStart(c0_));
LiteralScope literal(this); LiteralScope literal(this);
// Scan identifier start character. if (IsInRange(c0_, 'a', 'z')) {
if (c0_ == '\\') { do {
uc32 first_char = c0_;
Advance<false, false>();
AddLiteralChar(first_char);
} while (IsInRange(c0_, 'a', 'z'));
if (IsDecimalDigit(c0_) || IsInRange(c0_, 'A', 'Z') || c0_ == '_' ||
c0_ == '$') {
// Identifier starting with lowercase.
uc32 first_char = c0_;
Advance<false, false>();
AddLiteralChar(first_char);
while (IsAsciiIdentifier(c0_)) {
uc32 first_char = c0_;
Advance<false, false>();
AddLiteralChar(first_char);
}
if (c0_ <= 127 && c0_ != '\\') {
literal.Complete();
return Token::IDENTIFIER;
}
} else if (c0_ <= 127 && c0_ != '\\') {
// Only a-z+: could be a keyword or identifier.
literal.Complete();
Vector<const uint8_t> chars = next_.literal_chars->one_byte_literal();
return KeywordOrIdentifierToken(chars.start(), chars.length(),
harmony_scoping_, harmony_modules_,
harmony_classes_);
}
HandleLeadSurrugate();
} else if (IsInRange(c0_, 'A', 'Z') || c0_ == '_' || c0_ == '$') {
do {
uc32 first_char = c0_;
Advance<false, false>();
AddLiteralChar(first_char);
} while (IsAsciiIdentifier(c0_));
if (c0_ <= 127 && c0_ != '\\') {
literal.Complete();
return Token::IDENTIFIER;
}
HandleLeadSurrugate();
} else if (c0_ == '\\') {
// Scan identifier start character.
uc32 c = ScanIdentifierUnicodeEscape(); uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier start characters. // Only allow legal identifier start characters.
if (c < 0 || if (c < 0 ||
...@@ -1204,12 +1255,12 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { ...@@ -1204,12 +1255,12 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {
} }
AddLiteralChar(c); AddLiteralChar(c);
return ScanIdentifierSuffix(&literal); return ScanIdentifierSuffix(&literal);
} else {
uc32 first_char = c0_;
Advance();
AddLiteralChar(first_char);
} }
uc32 first_char = c0_;
Advance();
AddLiteralChar(first_char);
// Scan the rest of the identifier characters. // Scan the rest of the identifier characters.
while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) { while (c0_ >= 0 && unicode_cache_->IsIdentifierPart(c0_)) {
if (c0_ != '\\') { if (c0_ != '\\') {
...@@ -1232,7 +1283,6 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { ...@@ -1232,7 +1283,6 @@ Token::Value Scanner::ScanIdentifierOrKeyword() {
harmony_modules_, harmony_modules_,
harmony_classes_); harmony_classes_);
} }
return Token::IDENTIFIER; return Token::IDENTIFIER;
} }
......
...@@ -565,12 +565,16 @@ class Scanner { ...@@ -565,12 +565,16 @@ class Scanner {
} }
// Low-level scanning support. // Low-level scanning support.
template <bool capture_raw = false> template <bool capture_raw = false, bool check_surrogate = true>
void Advance() { void Advance() {
if (capture_raw) { if (capture_raw) {
AddRawLiteralChar(c0_); AddRawLiteralChar(c0_);
} }
c0_ = source_->Advance(); c0_ = source_->Advance();
if (check_surrogate) HandleLeadSurrugate();
}
void HandleLeadSurrugate() {
if (unibrow::Utf16::IsLeadSurrogate(c0_)) { if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
uc32 c1 = source_->Advance(); uc32 c1 = source_->Advance();
if (!unibrow::Utf16::IsTrailSurrogate(c1)) { if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment