Commit 1756e1c9 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Micro-optimize string scanning

For small strings there's not much of a difference, but allows faster scanning
over long strings. By directly accessing the underlying buffer and by typically
only needing 2 branches per non-special character.

Change-Id: I9c2c513177d9bf5e3f10c14a07bf6ecc2872b2f0
Reviewed-on: https://chromium-review.googlesource.com/c/1329206
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Commit-Queue: Igor Sheludko <ishell@chromium.org>
Reviewed-by: 's avatarIgor Sheludko <ishell@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57402}
parent e6e301d5
......@@ -294,7 +294,8 @@ enum class ScanFlags : uint8_t {
// "Cannot" rather than "can" so that this flag can be ORed together across
// multiple characters.
kCannotBeKeyword = 1 << 1,
kNeedsSlowPath = 1 << 2,
kStringTerminator = 1 << 2,
kNeedsSlowPath = 1 << 3,
};
constexpr uint8_t GetScanFlags(char c) {
return
......@@ -312,6 +313,10 @@ constexpr uint8_t GetScanFlags(char c) {
(!IsAsciiIdentifier(c)
? static_cast<uint8_t>(ScanFlags::kTerminatesLiteral)
: 0) |
// Possible string termination characters.
((c == '\'' || c == '"' || c == '\n' || c == '\r' || c == '\\')
? static_cast<uint8_t>(ScanFlags::kStringTerminator)
: 0) |
// Escapes are processed on the slow path.
(c == '\\' ? static_cast<uint8_t>(ScanFlags::kNeedsSlowPath) : 0);
}
......@@ -324,6 +329,9 @@ inline bool CanBeKeyword(uint8_t scan_flags) {
inline bool NeedsSlowPath(uint8_t scan_flags) {
return (scan_flags & static_cast<uint8_t>(ScanFlags::kNeedsSlowPath));
}
inline bool MayTerminateString(uint8_t scan_flags) {
return (scan_flags & static_cast<uint8_t>(ScanFlags::kStringTerminator));
}
// Table of precomputed scan flags for the 128 ASCII characters, for branchless
// flag calculation during the scan.
static constexpr const uint8_t character_scan_flags[128] = {
......
......@@ -548,22 +548,43 @@ Token::Value Scanner::ScanString() {
LiteralScope literal(this);
while (true) {
if ((V8_UNLIKELY(static_cast<uint32_t>(c0_) >= kMaxAscii) &&
!unibrow::IsStringLiteralLineTerminator(c0_)) ||
!MayTerminateString(character_scan_flags[c0_])) {
AddLiteralChar(c0_);
AdvanceUntil([this](uc32 c0) {
if (V8_UNLIKELY(static_cast<uint32_t>(c0) > kMaxAscii)) {
if (V8_UNLIKELY(unibrow::IsStringLiteralLineTerminator(c0))) {
return true;
}
AddLiteralChar(c0);
return false;
}
uint8_t char_flags = character_scan_flags[c0];
if (MayTerminateString(char_flags)) return true;
AddLiteralChar(c0);
return false;
});
}
if (c0_ == quote) {
literal.Complete();
Advance();
return Token::STRING;
}
if (c0_ == kEndOfInput || unibrow::IsStringLiteralLineTerminator(c0_)) {
return Token::ILLEGAL;
}
if (c0_ == '\\') {
Advance();
// TODO(verwaest): Check whether we can remove the additional check.
if (c0_ == kEndOfInput || !ScanEscape<false>()) {
if (V8_UNLIKELY(c0_ == kEndOfInput || !ScanEscape<false>())) {
return Token::ILLEGAL;
}
continue;
}
if (V8_UNLIKELY(c0_ == kEndOfInput ||
unibrow::IsStringLiteralLineTerminator(c0_))) {
return Token::ILLEGAL;
}
DCHECK_NE(quote, c0_);
DCHECK((c0_ == '\'' || c0_ == '"'));
AddLiteralCharAdvance();
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment