Commit 2b6780dc authored by Marja Hölttä's avatar Marja Hölttä Committed by Commit Bot

[scanner] Don't use UnicodeCache for IsLineTerminator.

For such a simple predicate, calling a(n inline) function that checks against
the values is faster (*) than maintaining the cache.

(*) When scanning a file that contains only comments, we're basically calling
IsLineTerminator in a loop. Parsing such files is now 7-18% faster in local
experiments.

BUG=v8:6092

Change-Id: I6a8f2aba9669a76152292f4e6c7853638d15aae3
Reviewed-on: https://chromium-review.googlesource.com/645633
Commit-Queue: Marja Hölttä <marja@chromium.org>
Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Cr-Commit-Position: refs/heads/master@{#47810}
parent 6d72ccf7
...@@ -80,7 +80,7 @@ struct WhiteSpace { ...@@ -80,7 +80,7 @@ struct WhiteSpace {
// as well as \u0009 - \u000d and \ufeff. // as well as \u0009 - \u000d and \ufeff.
struct WhiteSpaceOrLineTerminator { struct WhiteSpaceOrLineTerminator {
static inline bool Is(uc32 c) { static inline bool Is(uc32 c) {
return WhiteSpace::Is(c) || unibrow::LineTerminator::Is(c); return WhiteSpace::Is(c) || unibrow::IsLineTerminator(c);
} }
}; };
......
...@@ -438,7 +438,7 @@ Token::Value Scanner::SkipWhiteSpace() { ...@@ -438,7 +438,7 @@ Token::Value Scanner::SkipWhiteSpace() {
// Advance as long as character is a WhiteSpace or LineTerminator. // Advance as long as character is a WhiteSpace or LineTerminator.
// Remember if the latter is the case. // Remember if the latter is the case.
if (unicode_cache_->IsLineTerminator(c0_)) { if (unibrow::IsLineTerminator(c0_)) {
has_line_terminator_before_next_ = true; has_line_terminator_before_next_ = true;
} else if (!unicode_cache_->IsWhiteSpace(c0_)) { } else if (!unicode_cache_->IsWhiteSpace(c0_)) {
break; break;
...@@ -495,7 +495,7 @@ Token::Value Scanner::SkipSingleLineComment() { ...@@ -495,7 +495,7 @@ Token::Value Scanner::SkipSingleLineComment() {
// separately by the lexical grammar and becomes part of the // separately by the lexical grammar and becomes part of the
// stream of input elements for the syntactic grammar (see // stream of input elements for the syntactic grammar (see
// ECMA-262, section 7.4). // ECMA-262, section 7.4).
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
Advance(); Advance();
} }
...@@ -505,7 +505,7 @@ Token::Value Scanner::SkipSingleLineComment() { ...@@ -505,7 +505,7 @@ Token::Value Scanner::SkipSingleLineComment() {
Token::Value Scanner::SkipSourceURLComment() { Token::Value Scanner::SkipSourceURLComment() {
TryToParseSourceURLComment(); TryToParseSourceURLComment();
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
Advance(); Advance();
} }
...@@ -541,7 +541,7 @@ void Scanner::TryToParseSourceURLComment() { ...@@ -541,7 +541,7 @@ void Scanner::TryToParseSourceURLComment() {
while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) { while (c0_ != kEndOfInput && unicode_cache_->IsWhiteSpace(c0_)) {
Advance(); Advance();
} }
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
// Disallowed characters. // Disallowed characters.
if (c0_ == '"' || c0_ == '\'') { if (c0_ == '"' || c0_ == '\'') {
value->Reset(); value->Reset();
...@@ -554,7 +554,7 @@ void Scanner::TryToParseSourceURLComment() { ...@@ -554,7 +554,7 @@ void Scanner::TryToParseSourceURLComment() {
Advance(); Advance();
} }
// Allow whitespace at the end. // Allow whitespace at the end.
while (c0_ != kEndOfInput && !unicode_cache_->IsLineTerminator(c0_)) { while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
if (!unicode_cache_->IsWhiteSpace(c0_)) { if (!unicode_cache_->IsWhiteSpace(c0_)) {
value->Reset(); value->Reset();
break; break;
...@@ -571,7 +571,7 @@ Token::Value Scanner::SkipMultiLineComment() { ...@@ -571,7 +571,7 @@ Token::Value Scanner::SkipMultiLineComment() {
while (c0_ != kEndOfInput) { while (c0_ != kEndOfInput) {
uc32 ch = c0_; uc32 ch = c0_;
Advance(); Advance();
if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(ch)) { if (c0_ != kEndOfInput && unibrow::IsLineTerminator(ch)) {
// Following ECMA-262, section 7.4, a comment containing // Following ECMA-262, section 7.4, a comment containing
// a newline will make the comment count as a line-terminator. // a newline will make the comment count as a line-terminator.
has_multiline_comment_before_next_ = true; has_multiline_comment_before_next_ = true;
...@@ -968,7 +968,7 @@ bool Scanner::ScanEscape() { ...@@ -968,7 +968,7 @@ bool Scanner::ScanEscape() {
// Skip escaped newlines. // Skip escaped newlines.
if (!in_template_literal && c0_ != kEndOfInput && if (!in_template_literal && c0_ != kEndOfInput &&
unicode_cache_->IsLineTerminator(c)) { unibrow::IsLineTerminator(c)) {
// Allow escaped CR+LF newlines in multiline string literals. // Allow escaped CR+LF newlines in multiline string literals.
if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>(); if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance<capture_raw>();
return true; return true;
...@@ -1062,7 +1062,7 @@ Token::Value Scanner::ScanString() { ...@@ -1062,7 +1062,7 @@ Token::Value Scanner::ScanString() {
} }
while (c0_ != quote && c0_ != kEndOfInput && while (c0_ != quote && c0_ != kEndOfInput &&
!unicode_cache_->IsLineTerminator(c0_)) { !unibrow::IsLineTerminator(c0_)) {
uc32 c = c0_; uc32 c = c0_;
Advance(); Advance();
if (c == '\\') { if (c == '\\') {
...@@ -1118,7 +1118,7 @@ Token::Value Scanner::ScanTemplateSpan() { ...@@ -1118,7 +1118,7 @@ Token::Value Scanner::ScanTemplateSpan() {
ReduceRawLiteralLength(2); ReduceRawLiteralLength(2);
break; break;
} else if (c == '\\') { } else if (c == '\\') {
if (c0_ != kEndOfInput && unicode_cache_->IsLineTerminator(c0_)) { if (c0_ != kEndOfInput && unibrow::IsLineTerminator(c0_)) {
// The TV of LineContinuation :: \ LineTerminatorSequence is the empty // The TV of LineContinuation :: \ LineTerminatorSequence is the empty
// code unit sequence. // code unit sequence.
uc32 lastChar = c0_; uc32 lastChar = c0_;
...@@ -1659,12 +1659,14 @@ bool Scanner::ScanRegExpPattern() { ...@@ -1659,12 +1659,14 @@ bool Scanner::ScanRegExpPattern() {
} }
while (c0_ != '/' || in_character_class) { while (c0_ != '/' || in_character_class) {
if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_)) if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
return false; return false;
}
if (c0_ == '\\') { // Escape sequence. if (c0_ == '\\') { // Escape sequence.
AddLiteralCharAdvance(); AddLiteralCharAdvance();
if (c0_ == kEndOfInput || unicode_cache_->IsLineTerminator(c0_)) if (c0_ == kEndOfInput || unibrow::IsLineTerminator(c0_)) {
return false; return false;
}
AddLiteralCharAdvance(); AddLiteralCharAdvance();
// If the escape allows more characters, i.e., \x??, \u????, or \c?, // If the escape allows more characters, i.e., \x??, \u????, or \c?,
// only "safe" characters are allowed (letters, digits, underscore), // only "safe" characters are allowed (letters, digits, underscore),
......
...@@ -20,15 +20,9 @@ bool UnicodeCache::IsIdentifierPart(unibrow::uchar c) { ...@@ -20,15 +20,9 @@ bool UnicodeCache::IsIdentifierPart(unibrow::uchar c) {
return kIsIdentifierPart.get(c); return kIsIdentifierPart.get(c);
} }
bool UnicodeCache::IsLineTerminator(unibrow::uchar c) {
return kIsLineTerminator.get(c);
}
bool UnicodeCache::IsLineTerminatorSequence(unibrow::uchar c, bool UnicodeCache::IsLineTerminatorSequence(unibrow::uchar c,
unibrow::uchar next) { unibrow::uchar next) {
if (!IsLineTerminator(c)) return false; if (!unibrow::IsLineTerminator(c)) return false;
if (c == 0x000d && next == 0x000a) return false; // CR with following LF. if (c == 0x000d && next == 0x000a) return false; // CR with following LF.
return true; return true;
} }
......
...@@ -32,7 +32,6 @@ class UnicodeCache { ...@@ -32,7 +32,6 @@ class UnicodeCache {
private: private:
unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart; unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart; unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace; unibrow::Predicate<WhiteSpace, 128> kIsWhiteSpace;
unibrow::Predicate<WhiteSpaceOrLineTerminator, 128> unibrow::Predicate<WhiteSpaceOrLineTerminator, 128>
kIsWhiteSpaceOrLineTerminator; kIsWhiteSpaceOrLineTerminator;
......
...@@ -1163,14 +1163,6 @@ bool WhiteSpace::Is(uchar c) { ...@@ -1163,14 +1163,6 @@ bool WhiteSpace::Is(uchar c) {
} }
#endif // !V8_INTL_SUPPORT #endif // !V8_INTL_SUPPORT
// LineTerminator: 'JS_Line_Terminator' in point.properties
// ES#sec-line-terminators lists exactly 4 code points:
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
bool LineTerminator::Is(uchar c) {
return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
}
#ifndef V8_INTL_SUPPORT #ifndef V8_INTL_SUPPORT
static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // NOLINT static const MultiCharacterSpecialCase<2> kToLowercaseMultiStrings0[2] = { // NOLINT
{{105, 775}}, {{kSentinel}} }; // NOLINT {{105, 775}}, {{kSentinel}} }; // NOLINT
......
...@@ -194,9 +194,14 @@ struct V8_EXPORT_PRIVATE WhiteSpace { ...@@ -194,9 +194,14 @@ struct V8_EXPORT_PRIVATE WhiteSpace {
static bool Is(uchar c); static bool Is(uchar c);
}; };
#endif // !V8_INTL_SUPPORT #endif // !V8_INTL_SUPPORT
struct V8_EXPORT_PRIVATE LineTerminator {
static bool Is(uchar c); // LineTerminator: 'JS_Line_Terminator' in point.properties
}; // ES#sec-line-terminators lists exactly 4 code points:
// LF (U+000A), CR (U+000D), LS(U+2028), PS(U+2029)
V8_INLINE bool IsLineTerminator(uchar c) {
return c == 0xA || c == 0xD || c == 0x2028 || c == 0x2029;
}
#ifndef V8_INTL_SUPPORT #ifndef V8_INTL_SUPPORT
struct ToLowercase { struct ToLowercase {
static const int kMaxWidth = 3; static const int kMaxWidth = 3;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment