Commit 02b6178c authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Push surrogate pair handling down into identifier scanning

Most scanner logic doesn't need to care about surrogate pairs, so we can just
push it down to identifier scanning.

This CL additionally drops some explicit kEndOfInput checks that are subsumed
by predicates seemlessly returning false for kEndOfInput (-1).

Change-Id: If031a9355ab5fbca0c3b647045e3034f42923979
Reviewed-on: https://chromium-review.googlesource.com/1124447Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54172}
parent 9bbb0cd2
This diff is collapsed.
......@@ -608,26 +608,26 @@ class Scanner {
}
// Low-level scanning support.
template <bool capture_raw = false, bool check_surrogate = true>
template <bool capture_raw = false>
void Advance() {
if (capture_raw) {
AddRawLiteralChar(c0_);
}
c0_ = source_->Advance();
if (check_surrogate) HandleLeadSurrogate();
}
void HandleLeadSurrogate() {
bool CombineSurrogatePair() {
DCHECK(!unibrow::Utf16::IsLeadSurrogate(kEndOfInput));
if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
uc32 c1 = source_->Advance();
DCHECK(!unibrow::Utf16::IsTrailSurrogate(kEndOfInput));
if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
source_->Back();
} else {
if (unibrow::Utf16::IsTrailSurrogate(c1)) {
c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
return true;
}
source_->Back();
}
return false;
}
void PushBack(uc32 ch) {
......@@ -752,7 +752,6 @@ class Scanner {
Token::Value ScanNumber(bool seen_period);
Token::Value ScanIdentifierOrKeyword();
Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal);
Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);
Token::Value ScanString();
Token::Value ScanPrivateName();
......
......@@ -39,8 +39,12 @@ class Predicate {
inline CacheEntry()
: bit_field_(CodePointField::encode(0) | ValueField::encode(0)) {}
inline CacheEntry(uchar code_point, bool value)
: bit_field_(CodePointField::encode(code_point) |
ValueField::encode(value)) {}
: bit_field_(
CodePointField::encode(CodePointField::kMask & code_point) |
ValueField::encode(value)) {
DCHECK_IMPLIES((CodePointField::kMask & code_point) != code_point,
code_point == static_cast<uchar>(-1));
}
uchar code_point() const { return CodePointField::decode(bit_field_); }
bool value() const { return ValueField::decode(bit_field_); }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment