Commit a6f226e8 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Speed up multiline comment scanning using AdvanceUntil + ScanFlags

This roughly takes 40% less time.

Change-Id: I7886bd85cd33c9b2d2225e0f207a3e3bf89f1ef3
Reviewed-on: https://chromium-review.googlesource.com/c/1491511
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#59898}
parent 78c7cf4b
......@@ -191,7 +191,8 @@ enum class ScanFlags : uint8_t {
kCannotBeKeyword = 1 << 1,
kCannotBeKeywordStart = 1 << 2,
kStringTerminator = 1 << 3,
kNeedsSlowPath = 1 << 4,
kIdentifierNeedsSlowPath = 1 << 4,
kMultilineCommentCharacterNeedsSlowPath = 1 << 5,
};
constexpr uint8_t GetScanFlags(char c) {
return
......@@ -215,7 +216,14 @@ constexpr uint8_t GetScanFlags(char c) {
? static_cast<uint8_t>(ScanFlags::kStringTerminator)
: 0) |
// Escapes are processed on the slow path.
(c == '\\' ? static_cast<uint8_t>(ScanFlags::kNeedsSlowPath) : 0);
(c == '\\' ? static_cast<uint8_t>(ScanFlags::kIdentifierNeedsSlowPath)
: 0) |
// Newlines and * are interesting characters for multiline comment
// scanning.
(c == '\n' || c == '\r' || c == '*'
? static_cast<uint8_t>(
ScanFlags::kMultilineCommentCharacterNeedsSlowPath)
: 0);
}
inline bool TerminatesLiteral(uint8_t scan_flags) {
return (scan_flags & static_cast<uint8_t>(ScanFlags::kTerminatesLiteral));
......@@ -223,8 +231,13 @@ inline bool TerminatesLiteral(uint8_t scan_flags) {
inline bool CanBeKeyword(uint8_t scan_flags) {
return !(scan_flags & static_cast<uint8_t>(ScanFlags::kCannotBeKeyword));
}
inline bool NeedsSlowPath(uint8_t scan_flags) {
return (scan_flags & static_cast<uint8_t>(ScanFlags::kNeedsSlowPath));
inline bool IdentifierNeedsSlowPath(uint8_t scan_flags) {
return (scan_flags &
static_cast<uint8_t>(ScanFlags::kIdentifierNeedsSlowPath));
}
inline bool MultilineCommentCharacterNeedsSlowPath(uint8_t scan_flags) {
return (scan_flags & static_cast<uint8_t>(
ScanFlags::kMultilineCommentCharacterNeedsSlowPath));
}
inline bool MayTerminateString(uint8_t scan_flags) {
return (scan_flags & static_cast<uint8_t>(ScanFlags::kStringTerminator));
......@@ -255,9 +268,9 @@ V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() {
STATIC_ASSERT(static_cast<uint8_t>(ScanFlags::kCannotBeKeywordStart) ==
static_cast<uint8_t>(ScanFlags::kCannotBeKeyword) << 1);
scan_flags >>= 1;
// Make sure the shifting above doesn't set NeedsSlowPath. Otherwise we'll
// fall into the slow path after scanning the identifier.
DCHECK(!NeedsSlowPath(scan_flags));
// Make sure the shifting above doesn't set IdentifierNeedsSlowPath.
// Otherwise we'll fall into the slow path after scanning the identifier.
DCHECK(!IdentifierNeedsSlowPath(scan_flags));
AddLiteralChar(static_cast<char>(c0_));
AdvanceUntil([this, &scan_flags](uc32 c0) {
if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) {
......@@ -265,7 +278,8 @@ V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() {
// path.
// TODO(leszeks): This would be most efficient as a goto to the slow
// path, check codegen and maybe use a bool instead.
scan_flags |= static_cast<uint8_t>(ScanFlags::kNeedsSlowPath);
scan_flags |=
static_cast<uint8_t>(ScanFlags::kIdentifierNeedsSlowPath);
return true;
}
uint8_t char_flags = character_scan_flags[c0];
......@@ -278,7 +292,7 @@ V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() {
}
});
if (V8_LIKELY(!NeedsSlowPath(scan_flags))) {
if (V8_LIKELY(!IdentifierNeedsSlowPath(scan_flags))) {
if (!CanBeKeyword(scan_flags)) return Token::IDENTIFIER;
// Could be a keyword or identifier.
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
......
......@@ -339,27 +339,46 @@ void Scanner::TryToParseSourceURLComment() {
Token::Value Scanner::SkipMultiLineComment() {
DCHECK_EQ(c0_, '*');
Advance();
// Until we see the first newline, check for * and newline characters.
if (!next().after_line_terminator) {
do {
AdvanceUntil([](uc32 c0) {
if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) {
return unibrow::IsLineTerminator(c0);
}
uint8_t char_flags = character_scan_flags[c0];
return MultilineCommentCharacterNeedsSlowPath(char_flags);
});
while (c0_ == '*') {
Advance();
if (c0_ == '/') {
Advance();
return Token::WHITESPACE;
}
}
if (unibrow::IsLineTerminator(c0_)) {
next().after_line_terminator = true;
break;
}
} while (c0_ != kEndOfInput);
}
// After we've seen newline, simply try to find '*/'.
while (c0_ != kEndOfInput) {
DCHECK(!unibrow::IsLineTerminator(kEndOfInput));
if (!HasLineTerminatorBeforeNext() && unibrow::IsLineTerminator(c0_)) {
// Following ECMA-262, section 7.4, a comment containing
// a newline will make the comment count as a line-terminator.
next().after_line_terminator = true;
}
AdvanceUntil([](uc32 c0) { return c0 == '*'; });
while (V8_UNLIKELY(c0_ == '*')) {
while (c0_ == '*') {
Advance();
if (c0_ == '/') {
Advance();
return Token::WHITESPACE;
}
}
Advance();
}
// Unterminated multi-line comment.
return Token::ILLEGAL;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment