Commit 06587445 authored by Toon Verwaest's avatar Toon Verwaest Committed by Commit Bot

[scanner] Optimize lifetime management of literal buffers

Change-Id: I084b57a3a32c9ecb01bb17c9339dfeada436ea66
Reviewed-on: https://chromium-review.googlesource.com/c/1335449
Commit-Queue: Toon Verwaest <verwaest@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#57493}
parent 680cdd0c
......@@ -262,8 +262,8 @@ V8_INLINE Token::Value KeywordOrIdentifierToken(const uint8_t* input,
}
V8_INLINE Token::Value Scanner::ScanIdentifierOrKeyword() {
LiteralScope literal(this);
return ScanIdentifierOrKeywordInner(&literal);
next().literal_chars.Start();
return ScanIdentifierOrKeywordInner();
}
// Character flags for the fast path of scanning a keyword or identifier token.
......@@ -318,8 +318,7 @@ static constexpr const uint8_t character_scan_flags[128] = {
#undef CALL_GET_SCAN_FLAGS
};
V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner(
LiteralScope* literal) {
V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner() {
DCHECK(unicode_cache_->IsIdentifierStart(c0_));
bool escaped = false;
......@@ -349,20 +348,10 @@ V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner(
});
if (V8_LIKELY(!NeedsSlowPath(scan_flags))) {
if (CanBeKeyword(scan_flags)) {
// Could be a keyword or identifier.
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
Token::Value token =
KeywordOrIdentifierToken(chars.start(), chars.length());
if (token == Token::IDENTIFIER ||
token == Token::FUTURE_STRICT_RESERVED_WORD) {
literal->Complete();
}
return token;
} else {
literal->Complete();
return Token::IDENTIFIER;
}
if (!CanBeKeyword(scan_flags)) return Token::IDENTIFIER;
// Could be a keyword or identifier.
Vector<const uint8_t> chars = next().literal_chars.one_byte_literal();
return KeywordOrIdentifierToken(chars.start(), chars.length());
}
} else {
// Special case for escapes at the start of an identifier.
......@@ -376,7 +365,7 @@ V8_INLINE Token::Value Scanner::ScanIdentifierOrKeywordInner(
}
}
return ScanIdentifierOrKeywordInnerSlow(literal, escaped);
return ScanIdentifierOrKeywordInnerSlow(escaped);
}
V8_INLINE Token::Value Scanner::SkipWhiteSpace() {
......@@ -572,8 +561,6 @@ V8_INLINE Token::Value Scanner::ScanSingleToken() {
void Scanner::Scan(TokenDesc* next_desc) {
DCHECK_EQ(next_desc, &next());
next_desc->literal_chars.Drop();
next_desc->raw_literal_chars.Drop();
next_desc->invalid_template_escape_message = MessageTemplate::kNone;
next_desc->token = ScanSingleToken();
......
......@@ -58,7 +58,6 @@ class Scanner::ErrorState {
// Scanner::LiteralBuffer
Handle<String> Scanner::LiteralBuffer::Internalize(Isolate* isolate) const {
DCHECK(is_used());
if (is_one_byte()) {
return isolate->factory()->InternalizeOneByteString(one_byte_literal());
}
......@@ -99,7 +98,7 @@ void Scanner::LiteralBuffer::ConvertToTwoByte() {
backing_store_ = new_store;
}
position_ = new_content_size;
flags_ = IsOneByte::update(flags_, false);
is_one_byte_ = false;
}
void Scanner::LiteralBuffer::AddTwoByteChar(uc32 code_unit) {
......@@ -328,7 +327,6 @@ void Scanner::TryToParseSourceURLComment() {
}
if (c0_ != '=')
return;
value->Drop();
value->Start();
Advance();
while (unicode_cache_->IsWhiteSpace(c0_)) {
......@@ -337,7 +335,7 @@ void Scanner::TryToParseSourceURLComment() {
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
// Disallowed characters.
if (c0_ == '"' || c0_ == '\'') {
value->Drop();
value->Start();
return;
}
if (unicode_cache_->IsWhiteSpace(c0_)) {
......@@ -349,7 +347,7 @@ void Scanner::TryToParseSourceURLComment() {
// Allow whitespace at the end.
while (c0_ != kEndOfInput && !unibrow::IsLineTerminator(c0_)) {
if (!unicode_cache_->IsWhiteSpace(c0_)) {
value->Drop();
value->Start();
break;
}
Advance();
......@@ -398,42 +396,17 @@ Token::Value Scanner::ScanHtmlComment() {
#ifdef DEBUG
void Scanner::SanityCheckTokenDesc(const TokenDesc& token) const {
// Most tokens should not have literal_chars or even raw_literal chars.
// The rules are:
// - UNINITIALIZED: we don't care.
// - TEMPLATE_*: need both literal + raw literal chars.
// - IDENTIFIERS, STRINGS, etc.: need a literal, but no raw literal.
// - all others: should have neither.
// Furthermore, only TEMPLATE_* tokens can have a
// invalid_template_escape_message.
// Only TEMPLATE_* tokens can have a invalid_template_escape_message.
// ILLEGAL and UNINITIALIZED can have garbage for the field.
switch (token.token) {
case Token::UNINITIALIZED:
case Token::ILLEGAL:
// token.literal_chars & other members might be garbage. That's ok.
break;
case Token::TEMPLATE_SPAN:
case Token::TEMPLATE_TAIL:
DCHECK(token.raw_literal_chars.is_used());
DCHECK(token.literal_chars.is_used());
break;
case Token::ESCAPED_KEYWORD:
case Token::ESCAPED_STRICT_RESERVED_WORD:
case Token::FUTURE_STRICT_RESERVED_WORD:
case Token::IDENTIFIER:
case Token::NUMBER:
case Token::BIGINT:
case Token::REGEXP_LITERAL:
case Token::SMI:
case Token::STRING:
case Token::PRIVATE_NAME:
DCHECK(token.literal_chars.is_used());
DCHECK(!token.raw_literal_chars.is_used());
DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
break;
default:
DCHECK(!token.literal_chars.is_used());
DCHECK(!token.raw_literal_chars.is_used());
DCHECK_EQ(token.invalid_template_escape_message, MessageTemplate::kNone);
break;
}
......@@ -540,7 +513,7 @@ Token::Value Scanner::ScanString() {
uc32 quote = c0_;
Advance(); // consume quote
LiteralScope literal(this);
next().literal_chars.Start();
while (true) {
if (V8_UNLIKELY(c0_ == kEndOfInput)) return Token::ILLEGAL;
if ((V8_UNLIKELY(static_cast<uint32_t>(c0_) >= kMaxAscii) &&
......@@ -562,7 +535,6 @@ Token::Value Scanner::ScanString() {
});
}
if (c0_ == quote) {
literal.Complete();
Advance();
return Token::STRING;
}
......@@ -591,7 +563,7 @@ Token::Value Scanner::ScanPrivateName() {
return Token::ILLEGAL;
}
LiteralScope literal(this);
next().literal_chars.Start();
DCHECK_EQ(c0_, '#');
DCHECK(!unicode_cache_->IsIdentifierStart(kEndOfInput));
if (!unicode_cache_->IsIdentifierStart(Peek())) {
......@@ -601,7 +573,7 @@ Token::Value Scanner::ScanPrivateName() {
}
AddLiteralCharAdvance();
Token::Value token = ScanIdentifierOrKeywordInner(&literal);
Token::Value token = ScanIdentifierOrKeywordInner();
return token == Token::ILLEGAL ? Token::ILLEGAL : Token::PRIVATE_NAME;
}
......@@ -626,7 +598,7 @@ Token::Value Scanner::ScanTemplateSpan() {
ErrorState octal_error_state(&octal_message_, &octal_pos_);
Token::Value result = Token::TEMPLATE_SPAN;
LiteralScope literal(this);
next().literal_chars.Start();
next().raw_literal_chars.Start();
const bool capture_raw = true;
while (true) {
......@@ -679,7 +651,6 @@ Token::Value Scanner::ScanTemplateSpan() {
AddLiteralChar(c);
}
}
literal.Complete();
next().location.end_pos = source_pos();
next().token = result;
......@@ -689,7 +660,6 @@ Token::Value Scanner::ScanTemplateSpan() {
Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_url_.length() > 0) {
DCHECK(source_url_.is_used());
tmp = source_url_.Internalize(isolate);
}
return tmp;
......@@ -698,7 +668,6 @@ Handle<String> Scanner::SourceUrl(Isolate* isolate) const {
Handle<String> Scanner::SourceMappingUrl(Isolate* isolate) const {
Handle<String> tmp;
if (source_mapping_url_.length() > 0) {
DCHECK(source_mapping_url_.is_used());
tmp = source_mapping_url_.Internalize(isolate);
}
return tmp;
......@@ -867,7 +836,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
NumberKind kind = DECIMAL;
LiteralScope literal(this);
next().literal_chars.Start();
bool at_start = !seen_period;
int start_pos = source_pos(); // For reporting octal positions.
if (seen_period) {
......@@ -928,7 +897,6 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
value <= Smi::kMaxValue && c0_ != '.' &&
!unicode_cache_->IsIdentifierStart(c0_)) {
next().smi_value_ = static_cast<uint32_t>(value);
literal.Complete();
if (kind == DECIMAL_WITH_LEADING_ZERO) {
octal_pos_ = Location(start_pos, source_pos());
......@@ -987,8 +955,6 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
return Token::ILLEGAL;
}
literal.Complete();
if (kind == DECIMAL_WITH_LEADING_ZERO) {
octal_pos_ = Location(start_pos, source_pos());
octal_message_ = MessageTemplate::kStrictDecimalWithLeadingZero;
......@@ -1024,8 +990,7 @@ uc32 Scanner::ScanUnicodeEscape() {
return ScanHexNumber<capture_raw, unicode>(4);
}
Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(LiteralScope* literal,
bool escaped) {
Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(bool escaped) {
while (true) {
if (c0_ == '\\') {
escaped = true;
......@@ -1053,25 +1018,19 @@ Token::Value Scanner::ScanIdentifierOrKeywordInnerSlow(LiteralScope* literal,
KeywordOrIdentifierToken(chars.start(), chars.length());
/* TODO(adamk): YIELD should be handled specially. */
if (token == Token::FUTURE_STRICT_RESERVED_WORD) {
literal->Complete();
if (escaped) return Token::ESCAPED_STRICT_RESERVED_WORD;
return token;
}
if (token == Token::IDENTIFIER) {
literal->Complete();
return token;
}
if (token == Token::IDENTIFIER) return token;
if (!escaped) return token;
literal->Complete();
if (token == Token::LET || token == Token::STATIC) {
return Token::ESCAPED_STRICT_RESERVED_WORD;
}
return Token::ESCAPED_KEYWORD;
}
literal->Complete();
return Token::IDENTIFIER;
}
......@@ -1085,7 +1044,7 @@ bool Scanner::ScanRegExpPattern() {
// Scan regular expression body: According to ECMA-262, 3rd, 7.8.5,
// the scanner should pass uninterpreted bodies to the RegExp
// constructor.
LiteralScope literal(this);
next().literal_chars.Start();
if (next().token == Token::ASSIGN_DIV) {
AddLiteralChar('=');
}
......@@ -1118,7 +1077,6 @@ bool Scanner::ScanRegExpPattern() {
}
Advance(); // consume '/'
literal.Complete();
next().token = Token::REGEXP_LITERAL;
return true;
}
......
......@@ -416,21 +416,16 @@ class Scanner {
// LiteralBuffer - Collector of chars of literals.
class LiteralBuffer {
public:
LiteralBuffer()
: backing_store_(),
position_(0),
flags_(IsOneByte::encode(true) | IsUsedByte::encode(false)) {}
LiteralBuffer() : backing_store_(), position_(0), is_one_byte_(true) {}
~LiteralBuffer() { backing_store_.Dispose(); }
V8_INLINE void AddChar(char code_unit) {
DCHECK(is_used());
DCHECK(IsValidAscii(code_unit));
AddOneByteChar(static_cast<byte>(code_unit));
}
V8_INLINE void AddChar(uc32 code_unit) {
DCHECK(is_used());
if (is_one_byte()) {
if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
AddOneByteChar(static_cast<byte>(code_unit));
......@@ -441,17 +436,15 @@ class Scanner {
AddTwoByteChar(code_unit);
}
bool is_one_byte() const { return IsOneByte::decode(flags_); }
bool is_one_byte() const { return is_one_byte_; }
bool Equals(Vector<const char> keyword) const {
DCHECK(is_used());
return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.start(), backing_store_.start(), position_) == 0);
}
Vector<const uint16_t> two_byte_literal() const {
DCHECK(!is_one_byte());
DCHECK(is_used());
DCHECK_EQ(position_ & 0x1, 0);
return Vector<const uint16_t>(
reinterpret_cast<const uint16_t*>(backing_store_.start()),
......@@ -460,7 +453,6 @@ class Scanner {
Vector<const uint8_t> one_byte_literal() const {
DCHECK(is_one_byte());
DCHECK(is_used());
return Vector<const uint8_t>(
reinterpret_cast<const uint8_t*>(backing_store_.start()), position_);
}
......@@ -468,16 +460,8 @@ class Scanner {
int length() const { return is_one_byte() ? position_ : (position_ >> 1); }
void Start() {
DCHECK(!is_used());
DCHECK_EQ(0, position_);
flags_ = IsUsedByte::update(flags_, true);
}
bool is_used() const { return IsUsedByte::decode(flags_); }
void Drop() {
position_ = 0;
flags_ = IsOneByte::encode(true) | IsUsedByte::encode(false);
is_one_byte_ = true;
}
Handle<String> Internalize(Isolate* isolate) const;
......@@ -510,34 +494,12 @@ class Scanner {
Vector<byte> backing_store_;
int position_;
uint8_t flags_;
// Flags
typedef BitField8<bool, 0, 1> IsOneByte;
typedef BitField8<bool, 1, 2> IsUsedByte;
bool is_one_byte_;
DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
};
// Scoped helper for literal recording. Automatically drops the literal
// if aborting the scanning before it's complete.
class LiteralScope {
public:
explicit LiteralScope(Scanner* scanner)
: buffer_and_complete_(&scanner->next().literal_chars, false) {
buffer()->Start();
}
~LiteralScope() {
if (!buffer_and_complete_.GetPayload()) buffer()->Drop();
}
void Complete() { buffer_and_complete_.SetPayload(true); }
private:
LiteralBuffer* buffer() const { return buffer_and_complete_.GetPointer(); }
PointerWithPayload<LiteralBuffer, bool, 1> buffer_and_complete_;
};
// The current and look-ahead token.
struct TokenDesc {
Location location = {0, 0};
......@@ -548,6 +510,21 @@ class Scanner {
Location invalid_template_escape_location;
uint32_t smi_value_ = 0;
bool after_line_terminator = false;
#ifdef DEBUG
bool CanAccessLiteral() const {
return token == Token::PRIVATE_NAME || token == Token::ILLEGAL ||
token == Token::UNINITIALIZED || token == Token::REGEXP_LITERAL ||
token == Token::ESCAPED_KEYWORD ||
IsInRange(token, Token::NUMBER, Token::STRING) ||
(Token::IsAnyIdentifier(token) && !Token::IsKeyword(token)) ||
IsInRange(token, Token::TEMPLATE_SPAN, Token::TEMPLATE_TAIL);
}
bool CanAccessRawLiteral() const {
return token == Token::ILLEGAL || token == Token::UNINITIALIZED ||
IsInRange(token, Token::TEMPLATE_SPAN, Token::TEMPLATE_TAIL);
}
#endif // DEBUG
};
enum NumberKind {
......@@ -672,45 +649,41 @@ class Scanner {
// token as a one-byte literal. E.g. Token::FUNCTION pretends to have a
// literal "function".
Vector<const uint8_t> literal_one_byte_string() const {
if (current().literal_chars.is_used())
return current().literal_chars.one_byte_literal();
const char* str = Token::String(current().token);
const uint8_t* str_as_uint8 = reinterpret_cast<const uint8_t*>(str);
return Vector<const uint8_t>(str_as_uint8,
Token::StringLength(current().token));
DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
return current().literal_chars.one_byte_literal();
}
Vector<const uint16_t> literal_two_byte_string() const {
DCHECK(current().literal_chars.is_used());
DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
return current().literal_chars.two_byte_literal();
}
bool is_literal_one_byte() const {
return !current().literal_chars.is_used() ||
current().literal_chars.is_one_byte();
DCHECK(current().CanAccessLiteral() || Token::IsKeyword(current().token));
return current().literal_chars.is_one_byte();
}
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
Vector<const uint8_t> next_literal_one_byte_string() const {
DCHECK(next().literal_chars.is_used());
DCHECK(next().CanAccessLiteral());
return next().literal_chars.one_byte_literal();
}
Vector<const uint16_t> next_literal_two_byte_string() const {
DCHECK(next().literal_chars.is_used());
DCHECK(next().CanAccessLiteral());
return next().literal_chars.two_byte_literal();
}
bool is_next_literal_one_byte() const {
DCHECK(next().literal_chars.is_used());
DCHECK(next().CanAccessLiteral());
return next().literal_chars.is_one_byte();
}
Vector<const uint8_t> raw_literal_one_byte_string() const {
DCHECK(current().raw_literal_chars.is_used());
DCHECK(current().CanAccessRawLiteral());
return current().raw_literal_chars.one_byte_literal();
}
Vector<const uint16_t> raw_literal_two_byte_string() const {
DCHECK(current().raw_literal_chars.is_used());
DCHECK(current().CanAccessRawLiteral());
return current().raw_literal_chars.two_byte_literal();
}
bool is_raw_literal_one_byte() const {
DCHECK(current().raw_literal_chars.is_used());
DCHECK(current().CanAccessRawLiteral());
return current().raw_literal_chars.is_one_byte();
}
......@@ -754,9 +727,8 @@ class Scanner {
Token::Value ScanNumber(bool seen_period);
V8_INLINE Token::Value ScanIdentifierOrKeyword();
V8_INLINE Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal);
Token::Value ScanIdentifierOrKeywordInnerSlow(LiteralScope* literal,
bool escaped);
V8_INLINE Token::Value ScanIdentifierOrKeywordInner();
Token::Value ScanIdentifierOrKeywordInnerSlow(bool escaped);
Token::Value ScanString();
Token::Value ScanPrivateName();
......@@ -788,8 +760,7 @@ class Scanner {
// Subtract delimiters.
source_length -= 2;
}
return token.literal_chars.is_used() &&
(token.literal_chars.length() != source_length);
return token.literal_chars.length() != source_length;
}
#ifdef DEBUG
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment