Commit ae1e8759 authored by Daniel Vogelheim's avatar Daniel Vogelheim Committed by Commit Bot

[parser] Introduce 'contextual keyword tokens'.

Introduce 'contextual keyword' tokens, which are parsed as identifiers but
in some contexts are treated by the parser like proper keywords. These are
usually keywords introduced by recent ECMAScript versions, which for reasons
of backwards compatibility are still permissible as regular identifiers in
most contexts.

Current usage is to check for Token::IDENTIFIER and then do a string
compare. With this change the initial scan will scan them as usual, but
will then record the token as IDENTIFIER plus a secondary token with the
'contextual' value.

BUG=v8:6902

Change-Id: I6ae390382998cf756a23720bd481cb9c0eb78a72
Reviewed-on: https://chromium-review.googlesource.com/459479
Commit-Queue: Daniel Vogelheim <vogelheim@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#44189}
parent 1826f09c
...@@ -742,25 +742,27 @@ class ParserBase { ...@@ -742,25 +742,27 @@ class ParserBase {
} }
bool peek_any_identifier() { return is_any_identifier(peek()); } bool peek_any_identifier() { return is_any_identifier(peek()); }
bool CheckContextualKeyword(Vector<const char> keyword) { bool CheckContextualKeyword(Token::Value token) {
if (PeekContextualKeyword(keyword)) { if (PeekContextualKeyword(token)) {
Consume(Token::IDENTIFIER); Consume(Token::IDENTIFIER);
return true; return true;
} }
return false; return false;
} }
bool PeekContextualKeyword(Vector<const char> keyword) { bool PeekContextualKeyword(Token::Value token) {
DCHECK(Token::IsContextualKeyword(token));
return peek() == Token::IDENTIFIER && return peek() == Token::IDENTIFIER &&
scanner()->is_next_contextual_keyword(keyword); scanner()->next_contextual_token() == token;
} }
void ExpectMetaProperty(Vector<const char> property_name, void ExpectMetaProperty(Token::Value property_name, const char* full_name,
const char* full_name, int pos, bool* ok); int pos, bool* ok);
void ExpectContextualKeyword(Vector<const char> keyword, bool* ok) { void ExpectContextualKeyword(Token::Value token, bool* ok) {
DCHECK(Token::IsContextualKeyword(token));
Expect(Token::IDENTIFIER, CHECK_OK_CUSTOM(Void)); Expect(Token::IDENTIFIER, CHECK_OK_CUSTOM(Void));
if (!scanner()->is_literal_contextual_keyword(keyword)) { if (scanner()->current_contextual_token() != token) {
ReportUnexpectedToken(scanner()->current_token()); ReportUnexpectedToken(scanner()->current_token());
*ok = false; *ok = false;
} }
...@@ -770,7 +772,7 @@ class ParserBase { ...@@ -770,7 +772,7 @@ class ParserBase {
if (Check(Token::IN)) { if (Check(Token::IN)) {
*visit_mode = ForEachStatement::ENUMERATE; *visit_mode = ForEachStatement::ENUMERATE;
return true; return true;
} else if (CheckContextualKeyword(CStrVector("of"))) { } else if (CheckContextualKeyword(Token::OF)) {
*visit_mode = ForEachStatement::ITERATE; *visit_mode = ForEachStatement::ITERATE;
return true; return true;
} }
...@@ -778,7 +780,7 @@ class ParserBase { ...@@ -778,7 +780,7 @@ class ParserBase {
} }
bool PeekInOrOf() { bool PeekInOrOf() {
return peek() == Token::IN || PeekContextualKeyword(CStrVector("of")); return peek() == Token::IN || PeekContextualKeyword(Token::OF);
} }
// Checks whether an octal literal was last seen between beg_pos and end_pos. // Checks whether an octal literal was last seen between beg_pos and end_pos.
...@@ -1365,7 +1367,10 @@ class ParserBase { ...@@ -1365,7 +1367,10 @@ class ParserBase {
void CheckDuplicateProto(Token::Value property); void CheckDuplicateProto(Token::Value property);
private: private:
bool IsProto() { return this->scanner()->LiteralMatches("__proto__", 9); } bool IsProto() const {
return this->scanner()->CurrentMatchesContextualEscaped(
Token::PROTO_UNDERSCORED);
}
ParserBase* parser() const { return parser_; } ParserBase* parser() const { return parser_; }
Scanner* scanner() const { return parser_->scanner(); } Scanner* scanner() const { return parser_->scanner(); }
...@@ -1386,10 +1391,11 @@ class ParserBase { ...@@ -1386,10 +1391,11 @@ class ParserBase {
private: private:
bool IsConstructor() { bool IsConstructor() {
return this->scanner()->LiteralMatches("constructor", 11); return this->scanner()->CurrentMatchesContextualEscaped(
Token::CONSTRUCTOR);
} }
bool IsPrototype() { bool IsPrototype() {
return this->scanner()->LiteralMatches("prototype", 9); return this->scanner()->CurrentMatchesContextualEscaped(Token::PROTOTYPE);
} }
ParserBase* parser() const { return parser_; } ParserBase* parser() const { return parser_; }
...@@ -1653,9 +1659,7 @@ ParserBase<Impl>::ParseAndClassifyIdentifier(bool* ok) { ...@@ -1653,9 +1659,7 @@ ParserBase<Impl>::ParseAndClassifyIdentifier(bool* ok) {
*ok = false; *ok = false;
return impl()->EmptyIdentifier(); return impl()->EmptyIdentifier();
} }
if (next == Token::LET || if (scanner()->IsLet()) {
(next == Token::ESCAPED_STRICT_RESERVED_WORD &&
scanner()->is_literal_contextual_keyword(CStrVector("let")))) {
classifier()->RecordLetPatternError( classifier()->RecordLetPatternError(
scanner()->location(), MessageTemplate::kLetInLexicalBinding); scanner()->location(), MessageTemplate::kLetInLexicalBinding);
} }
...@@ -3369,7 +3373,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseMemberExpression( ...@@ -3369,7 +3373,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseMemberExpression(
if (allow_harmony_function_sent() && peek() == Token::PERIOD) { if (allow_harmony_function_sent() && peek() == Token::PERIOD) {
// function.sent // function.sent
int pos = position(); int pos = position();
ExpectMetaProperty(CStrVector("sent"), "function.sent", pos, CHECK_OK); ExpectMetaProperty(Token::SENT, "function.sent", pos, CHECK_OK);
if (!is_generator()) { if (!is_generator()) {
// TODO(neis): allow escaping into closures? // TODO(neis): allow escaping into closures?
...@@ -3394,7 +3398,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseMemberExpression( ...@@ -3394,7 +3398,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseMemberExpression(
// We don't want dynamic functions to actually declare their name // We don't want dynamic functions to actually declare their name
// "anonymous". We just want that name in the toString(). // "anonymous". We just want that name in the toString().
Consume(Token::IDENTIFIER); Consume(Token::IDENTIFIER);
DCHECK(scanner()->UnescapedLiteralMatches("anonymous", 9)); DCHECK(scanner()->CurrentMatchesContextual(Token::ANONYMOUS));
} else if (peek_any_identifier()) { } else if (peek_any_identifier()) {
name = ParseIdentifierOrStrictReservedWord( name = ParseIdentifierOrStrictReservedWord(
function_kind, &is_strict_reserved_name, CHECK_OK); function_kind, &is_strict_reserved_name, CHECK_OK);
...@@ -3464,7 +3468,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseSuperExpression( ...@@ -3464,7 +3468,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseSuperExpression(
} }
template <typename Impl> template <typename Impl>
void ParserBase<Impl>::ExpectMetaProperty(Vector<const char> property_name, void ParserBase<Impl>::ExpectMetaProperty(Token::Value property_name,
const char* full_name, int pos, const char* full_name, int pos,
bool* ok) { bool* ok) {
Consume(Token::PERIOD); Consume(Token::PERIOD);
...@@ -3481,7 +3485,7 @@ template <typename Impl> ...@@ -3481,7 +3485,7 @@ template <typename Impl>
typename ParserBase<Impl>::ExpressionT typename ParserBase<Impl>::ExpressionT
ParserBase<Impl>::ParseNewTargetExpression(bool* ok) { ParserBase<Impl>::ParseNewTargetExpression(bool* ok) {
int pos = position(); int pos = position();
ExpectMetaProperty(CStrVector("target"), "new.target", pos, CHECK_OK); ExpectMetaProperty(Token::TARGET, "new.target", pos, CHECK_OK);
if (!GetReceiverScope()->is_function_scope()) { if (!GetReceiverScope()->is_function_scope()) {
impl()->ReportMessageAt(scanner()->location(), impl()->ReportMessageAt(scanner()->location(),
...@@ -4419,7 +4423,7 @@ ParserBase<Impl>::ParseAsyncFunctionLiteral(bool* ok) { ...@@ -4419,7 +4423,7 @@ ParserBase<Impl>::ParseAsyncFunctionLiteral(bool* ok) {
// We don't want dynamic functions to actually declare their name // We don't want dynamic functions to actually declare their name
// "anonymous". We just want that name in the toString(). // "anonymous". We just want that name in the toString().
Consume(Token::IDENTIFIER); Consume(Token::IDENTIFIER);
DCHECK(scanner()->UnescapedLiteralMatches("anonymous", 9)); DCHECK(scanner()->CurrentMatchesContextual(Token::ANONYMOUS));
} else if (peek_any_identifier()) { } else if (peek_any_identifier()) {
type = FunctionLiteral::kNamedExpression; type = FunctionLiteral::kNamedExpression;
name = ParseIdentifierOrStrictReservedWord(kind, &is_strict_reserved, name = ParseIdentifierOrStrictReservedWord(kind, &is_strict_reserved,
...@@ -5781,7 +5785,7 @@ typename ParserBase<Impl>::StatementT ParserBase<Impl>::ParseForAwaitStatement( ...@@ -5781,7 +5785,7 @@ typename ParserBase<Impl>::StatementT ParserBase<Impl>::ParseForAwaitStatement(
} }
} }
ExpectContextualKeyword(CStrVector("of"), CHECK_OK); ExpectContextualKeyword(Token::OF, CHECK_OK);
int each_keyword_pos = scanner()->location().beg_pos; int each_keyword_pos = scanner()->location().beg_pos;
const bool kAllowIn = true; const bool kAllowIn = true;
......
...@@ -1051,7 +1051,7 @@ void Parser::ParseExportClause(ZoneList<const AstRawString*>* export_names, ...@@ -1051,7 +1051,7 @@ void Parser::ParseExportClause(ZoneList<const AstRawString*>* export_names,
const AstRawString* local_name = ParseIdentifierName(CHECK_OK_VOID); const AstRawString* local_name = ParseIdentifierName(CHECK_OK_VOID);
const AstRawString* export_name = NULL; const AstRawString* export_name = NULL;
Scanner::Location location = scanner()->location(); Scanner::Location location = scanner()->location();
if (CheckContextualKeyword(CStrVector("as"))) { if (CheckContextualKeyword(Token::AS)) {
export_name = ParseIdentifierName(CHECK_OK_VOID); export_name = ParseIdentifierName(CHECK_OK_VOID);
// Set the location to the whole "a as b" string, so that it makes sense // Set the location to the whole "a as b" string, so that it makes sense
// both for errors due to "a" and for errors due to "b". // both for errors due to "a" and for errors due to "b".
...@@ -1096,7 +1096,7 @@ ZoneList<const Parser::NamedImport*>* Parser::ParseNamedImports( ...@@ -1096,7 +1096,7 @@ ZoneList<const Parser::NamedImport*>* Parser::ParseNamedImports(
// In the presence of 'as', the left-side of the 'as' can // In the presence of 'as', the left-side of the 'as' can
// be any IdentifierName. But without 'as', it must be a valid // be any IdentifierName. But without 'as', it must be a valid
// BindingIdentifier. // BindingIdentifier.
if (CheckContextualKeyword(CStrVector("as"))) { if (CheckContextualKeyword(Token::AS)) {
local_name = ParseIdentifierName(CHECK_OK); local_name = ParseIdentifierName(CHECK_OK);
} }
if (!Token::IsIdentifier(scanner()->current_token(), STRICT, false, if (!Token::IsIdentifier(scanner()->current_token(), STRICT, false,
...@@ -1173,7 +1173,7 @@ void Parser::ParseImportDeclaration(bool* ok) { ...@@ -1173,7 +1173,7 @@ void Parser::ParseImportDeclaration(bool* ok) {
switch (peek()) { switch (peek()) {
case Token::MUL: { case Token::MUL: {
Consume(Token::MUL); Consume(Token::MUL);
ExpectContextualKeyword(CStrVector("as"), CHECK_OK_VOID); ExpectContextualKeyword(Token::AS, CHECK_OK_VOID);
module_namespace_binding = module_namespace_binding =
ParseIdentifier(kDontAllowRestrictedIdentifiers, CHECK_OK_VOID); ParseIdentifier(kDontAllowRestrictedIdentifiers, CHECK_OK_VOID);
module_namespace_binding_loc = scanner()->location(); module_namespace_binding_loc = scanner()->location();
...@@ -1193,7 +1193,7 @@ void Parser::ParseImportDeclaration(bool* ok) { ...@@ -1193,7 +1193,7 @@ void Parser::ParseImportDeclaration(bool* ok) {
} }
} }
ExpectContextualKeyword(CStrVector("from"), CHECK_OK_VOID); ExpectContextualKeyword(Token::FROM, CHECK_OK_VOID);
const AstRawString* module_specifier = ParseModuleSpecifier(CHECK_OK_VOID); const AstRawString* module_specifier = ParseModuleSpecifier(CHECK_OK_VOID);
ExpectSemicolon(CHECK_OK_VOID); ExpectSemicolon(CHECK_OK_VOID);
...@@ -1316,7 +1316,7 @@ Statement* Parser::ParseExportDeclaration(bool* ok) { ...@@ -1316,7 +1316,7 @@ Statement* Parser::ParseExportDeclaration(bool* ok) {
case Token::MUL: { case Token::MUL: {
Consume(Token::MUL); Consume(Token::MUL);
loc = scanner()->location(); loc = scanner()->location();
ExpectContextualKeyword(CStrVector("from"), CHECK_OK); ExpectContextualKeyword(Token::FROM, CHECK_OK);
const AstRawString* module_specifier = ParseModuleSpecifier(CHECK_OK); const AstRawString* module_specifier = ParseModuleSpecifier(CHECK_OK);
ExpectSemicolon(CHECK_OK); ExpectSemicolon(CHECK_OK);
module()->AddStarExport(module_specifier, loc, zone()); module()->AddStarExport(module_specifier, loc, zone());
...@@ -1342,7 +1342,7 @@ Statement* Parser::ParseExportDeclaration(bool* ok) { ...@@ -1342,7 +1342,7 @@ Statement* Parser::ParseExportDeclaration(bool* ok) {
ParseExportClause(&export_names, &export_locations, &original_names, ParseExportClause(&export_names, &export_locations, &original_names,
&reserved_loc, CHECK_OK); &reserved_loc, CHECK_OK);
const AstRawString* module_specifier = nullptr; const AstRawString* module_specifier = nullptr;
if (CheckContextualKeyword(CStrVector("from"))) { if (CheckContextualKeyword(Token::FROM)) {
module_specifier = ParseModuleSpecifier(CHECK_OK); module_specifier = ParseModuleSpecifier(CHECK_OK);
} else if (reserved_loc.IsValid()) { } else if (reserved_loc.IsValid()) {
// No FromClause, so reserved words are invalid in ExportClause. // No FromClause, so reserved words are invalid in ExportClause.
......
...@@ -41,6 +41,10 @@ namespace internal { ...@@ -41,6 +41,10 @@ namespace internal {
namespace { namespace {
PreParserIdentifier GetSymbolHelper(Scanner* scanner) { PreParserIdentifier GetSymbolHelper(Scanner* scanner) {
// These symbols require slightly different treatement:
// - regular keywords (enum, await, etc.; treated in 1st switch.)
// - 'contextual' keywords (and may contain escaped; treated in 2nd switch.)
// - 'contextual' keywords, but may not be escaped (3rd switch).
switch (scanner->current_token()) { switch (scanner->current_token()) {
case Token::ENUM: case Token::ENUM:
return PreParserIdentifier::Enum(); return PreParserIdentifier::Enum();
...@@ -57,20 +61,31 @@ PreParserIdentifier GetSymbolHelper(Scanner* scanner) { ...@@ -57,20 +61,31 @@ PreParserIdentifier GetSymbolHelper(Scanner* scanner) {
case Token::ASYNC: case Token::ASYNC:
return PreParserIdentifier::Async(); return PreParserIdentifier::Async();
default: default:
if (scanner->UnescapedLiteralMatches("eval", 4)) break;
return PreParserIdentifier::Eval();
if (scanner->UnescapedLiteralMatches("arguments", 9))
return PreParserIdentifier::Arguments();
if (scanner->UnescapedLiteralMatches("undefined", 9))
return PreParserIdentifier::Undefined();
if (scanner->LiteralMatches("prototype", 9))
return PreParserIdentifier::Prototype();
if (scanner->LiteralMatches("constructor", 11))
return PreParserIdentifier::Constructor();
if (scanner->LiteralMatches("name", 4))
return PreParserIdentifier::Name();
return PreParserIdentifier::Default();
} }
switch (scanner->current_contextual_token()) {
case Token::PROTOTYPE:
return PreParserIdentifier::Prototype();
case Token::CONSTRUCTOR:
return PreParserIdentifier::Constructor();
case Token::NAME:
return PreParserIdentifier::Name();
default:
break;
}
if (scanner->literal_contains_escapes())
return PreParserIdentifier::Default();
switch (scanner->current_contextual_token()) {
case Token::EVAL:
return PreParserIdentifier::Eval();
case Token::ARGUMENTS:
return PreParserIdentifier::Arguments();
case Token::UNDEFINED:
return PreParserIdentifier::Undefined();
default:
break;
}
return PreParserIdentifier::Default();
} }
} // unnamed namespace } // unnamed namespace
......
...@@ -1531,7 +1531,7 @@ class PreParser : public ParserBase<PreParser> { ...@@ -1531,7 +1531,7 @@ class PreParser : public ParserBase<PreParser> {
InferName infer = InferName::kYes); InferName infer = InferName::kYes);
V8_INLINE PreParserExpression ExpressionFromString(int pos) { V8_INLINE PreParserExpression ExpressionFromString(int pos) {
if (scanner()->UnescapedLiteralMatches("use strict", 10)) { if (scanner()->IsUseStrict()) {
return PreParserExpression::UseStrictStringLiteral(); return PreParserExpression::UseStrictStringLiteral();
} }
return PreParserExpression::StringLiteral(); return PreParserExpression::StringLiteral();
......
This diff is collapsed.
...@@ -205,6 +205,10 @@ class Scanner { ...@@ -205,6 +205,10 @@ class Scanner {
Token::Value PeekAhead(); Token::Value PeekAhead();
// Returns the current token again. // Returns the current token again.
Token::Value current_token() { return current_.token; } Token::Value current_token() { return current_.token; }
Token::Value current_contextual_token() { return current_.contextual_token; }
Token::Value next_contextual_token() { return next_.contextual_token; }
// Returns the location information for the current token // Returns the location information for the current token
// (the token last returned by Next()). // (the token last returned by Next()).
Location location() const { return current_.location; } Location location() const { return current_.location; }
...@@ -236,16 +240,6 @@ class Scanner { ...@@ -236,16 +240,6 @@ class Scanner {
bool literal_contains_escapes() const { bool literal_contains_escapes() const {
return LiteralContainsEscapes(current_); return LiteralContainsEscapes(current_);
} }
bool is_literal_contextual_keyword(Vector<const char> keyword) {
DCHECK(current_.token == Token::IDENTIFIER ||
current_.token == Token::ESCAPED_STRICT_RESERVED_WORD);
DCHECK_NOT_NULL(current_.literal_chars);
return current_.literal_chars->is_contextual_keyword(keyword);
}
bool is_next_contextual_keyword(Vector<const char> keyword) {
DCHECK_NOT_NULL(next_.literal_chars);
return next_.literal_chars->is_contextual_keyword(keyword);
}
const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory) const; const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory) const;
const AstRawString* NextSymbol(AstValueFactory* ast_value_factory) const; const AstRawString* NextSymbol(AstValueFactory* ast_value_factory) const;
...@@ -254,32 +248,44 @@ class Scanner { ...@@ -254,32 +248,44 @@ class Scanner {
double DoubleValue(); double DoubleValue();
bool ContainsDot(); bool ContainsDot();
bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
if (!current_.literal_chars) { inline bool CurrentMatches(Token::Value token) const {
return !strncmp(Token::Name(current_.token), data, length); DCHECK(Token::IsKeyword(token));
} else if (is_literal_one_byte() && literal_length() == length && return current_.token == token;
(allow_escapes || !literal_contains_escapes())) {
const char* token =
reinterpret_cast<const char*>(literal_one_byte_string().start());
return !strncmp(token, data, length);
}
return false;
} }
inline bool UnescapedLiteralMatches(const char* data, int length) {
return LiteralMatches(data, length, false); inline bool CurrentMatchesContextual(Token::Value token) const {
DCHECK(Token::IsContextualKeyword(token));
return current_.contextual_token == token;
} }
bool IsGetOrSet(bool* is_get, bool* is_set) { // Match the token against the contextual keyword or literal buffer.
if (is_literal_one_byte() && inline bool CurrentMatchesContextualEscaped(Token::Value token) const {
literal_length() == 3 && DCHECK(Token::IsContextualKeyword(token) || token == Token::LET);
!literal_contains_escapes()) { // Escaped keywords are not matched as tokens. So if we require escape
const char* token = // and/or string processing we need to look at the literal content
reinterpret_cast<const char*>(literal_one_byte_string().start()); // (which was escape-processed already).
*is_get = strncmp(token, "get", 3) == 0; // Conveniently, current_.literal_chars == nullptr for all proper keywords,
*is_set = !*is_get && strncmp(token, "set", 3) == 0; // so this second condition should exit early in common cases.
return *is_get || *is_set; return (current_.contextual_token == token) ||
} (current_.literal_chars &&
return false; current_.literal_chars->Equals(Vector<const char>(
Token::String(token), Token::StringLength(token))));
}
bool IsUseStrict() const {
return current_.token == Token::STRING &&
current_.literal_chars->Equals(
Vector<const char>("use strict", strlen("use strict")));
}
bool IsGetOrSet(bool* is_get, bool* is_set) const {
*is_get = CurrentMatchesContextual(Token::GET);
*is_set = CurrentMatchesContextual(Token::SET);
return *is_get || *is_set;
}
bool IsLet() const {
return CurrentMatches(Token::LET) ||
CurrentMatchesContextualEscaped(Token::LET);
} }
// Check whether the CurrentSymbol() has already been seen. // Check whether the CurrentSymbol() has already been seen.
...@@ -381,7 +387,7 @@ class Scanner { ...@@ -381,7 +387,7 @@ class Scanner {
bool is_one_byte() const { return is_one_byte_; } bool is_one_byte() const { return is_one_byte_; }
bool is_contextual_keyword(Vector<const char> keyword) const { bool Equals(Vector<const char> keyword) const {
return is_one_byte() && keyword.length() == position_ && return is_one_byte() && keyword.length() == position_ &&
(memcmp(keyword.start(), backing_store_.start(), position_) == 0); (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
} }
...@@ -455,6 +461,7 @@ class Scanner { ...@@ -455,6 +461,7 @@ class Scanner {
Token::Value token; Token::Value token;
MessageTemplate::Template invalid_template_escape_message; MessageTemplate::Template invalid_template_escape_message;
Location invalid_template_escape_location; Location invalid_template_escape_location;
Token::Value contextual_token;
}; };
static const int kCharacterLookaheadBufferSize = 1; static const int kCharacterLookaheadBufferSize = 1;
...@@ -471,14 +478,17 @@ class Scanner { ...@@ -471,14 +478,17 @@ class Scanner {
Advance(); Advance();
// Initialize current_ to not refer to a literal. // Initialize current_ to not refer to a literal.
current_.token = Token::UNINITIALIZED; current_.token = Token::UNINITIALIZED;
current_.contextual_token = Token::UNINITIALIZED;
current_.literal_chars = NULL; current_.literal_chars = NULL;
current_.raw_literal_chars = NULL; current_.raw_literal_chars = NULL;
current_.invalid_template_escape_message = MessageTemplate::kNone; current_.invalid_template_escape_message = MessageTemplate::kNone;
next_.token = Token::UNINITIALIZED; next_.token = Token::UNINITIALIZED;
next_.contextual_token = Token::UNINITIALIZED;
next_.literal_chars = NULL; next_.literal_chars = NULL;
next_.raw_literal_chars = NULL; next_.raw_literal_chars = NULL;
next_.invalid_template_escape_message = MessageTemplate::kNone; next_.invalid_template_escape_message = MessageTemplate::kNone;
next_next_.token = Token::UNINITIALIZED; next_next_.token = Token::UNINITIALIZED;
next_next_.contextual_token = Token::UNINITIALIZED;
next_next_.literal_chars = NULL; next_next_.literal_chars = NULL;
next_next_.raw_literal_chars = NULL; next_next_.raw_literal_chars = NULL;
next_next_.invalid_template_escape_message = MessageTemplate::kNone; next_next_.invalid_template_escape_message = MessageTemplate::kNone;
...@@ -609,7 +619,6 @@ class Scanner { ...@@ -609,7 +619,6 @@ class Scanner {
return else_; return else_;
} }
} }
// Returns the literal string, if any, for the current token (the // Returns the literal string, if any, for the current token (the
// token last returned by Next()). The string is 0-terminated. // token last returned by Next()). The string is 0-terminated.
// Literal strings are collected for identifiers, strings, numbers as well // Literal strings are collected for identifiers, strings, numbers as well
......
...@@ -10,39 +10,33 @@ namespace v8 { ...@@ -10,39 +10,33 @@ namespace v8 {
namespace internal { namespace internal {
#define T(name, string, precedence) #name, #define T(name, string, precedence) #name,
const char* const Token::name_[NUM_TOKENS] = { const char* const Token::name_[NUM_TOKENS] = {TOKEN_LIST(T, T, T)};
TOKEN_LIST(T, T)
};
#undef T #undef T
#define T(name, string, precedence) string, #define T(name, string, precedence) string,
const char* const Token::string_[NUM_TOKENS] = { const char* const Token::string_[NUM_TOKENS] = {TOKEN_LIST(T, T, T)};
TOKEN_LIST(T, T)
};
#undef T #undef T
constexpr uint8_t length(const char* str) { constexpr uint8_t length(const char* str) {
return str ? static_cast<uint8_t>(strlen(str)) : 0; return str ? static_cast<uint8_t>(strlen(str)) : 0;
} }
#define T(name, string, precedence) length(string), #define T(name, string, precedence) length(string),
const uint8_t Token::string_length_[NUM_TOKENS] = {TOKEN_LIST(T, T)}; const uint8_t Token::string_length_[NUM_TOKENS] = {TOKEN_LIST(T, T, T)};
#undef T #undef T
#define T(name, string, precedence) precedence, #define T(name, string, precedence) precedence,
const int8_t Token::precedence_[NUM_TOKENS] = { const int8_t Token::precedence_[NUM_TOKENS] = {TOKEN_LIST(T, T, T)};
TOKEN_LIST(T, T)
};
#undef T #undef T
#define KT(a, b, c) 'T', #define KT(a, b, c) 'T',
#define KK(a, b, c) 'K', #define KK(a, b, c) 'K',
const char Token::token_type[] = { #define KC(a, b, c) 'C',
TOKEN_LIST(KT, KK) const char Token::token_type[] = {TOKEN_LIST(KT, KK, KC)};
};
#undef KT #undef KT
#undef KK #undef KK
#undef KC
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -19,6 +19,12 @@ namespace internal { ...@@ -19,6 +19,12 @@ namespace internal {
// //
// T: Non-keyword tokens // T: Non-keyword tokens
// K: Keyword tokens // K: Keyword tokens
// C: Contextual keyword token
//
// Contextual keyword tokens are tokens that are scanned as Token::IDENTIFIER,
// but that in some contexts are treated as keywords. This mostly happens
// when ECMAScript introduces new keywords, but for backwards compatibility
// allows them to still be used as indentifiers in most contexts.
// IGNORE_TOKEN is a convenience macro that can be supplied as // IGNORE_TOKEN is a convenience macro that can be supplied as
// an argument (at any position) for a TOKEN_LIST call. It does // an argument (at any position) for a TOKEN_LIST call. It does
...@@ -26,7 +32,7 @@ namespace internal { ...@@ -26,7 +32,7 @@ namespace internal {
#define IGNORE_TOKEN(name, string, precedence) #define IGNORE_TOKEN(name, string, precedence)
#define TOKEN_LIST(T, K) \ #define TOKEN_LIST(T, K, C) \
/* End of source indicator. */ \ /* End of source indicator. */ \
T(EOS, "EOS", 0) \ T(EOS, "EOS", 0) \
\ \
...@@ -175,16 +181,30 @@ namespace internal { ...@@ -175,16 +181,30 @@ namespace internal {
\ \
/* ES6 Template Literals */ \ /* ES6 Template Literals */ \
T(TEMPLATE_SPAN, NULL, 0) \ T(TEMPLATE_SPAN, NULL, 0) \
T(TEMPLATE_TAIL, NULL, 0) T(TEMPLATE_TAIL, NULL, 0) \
\
/* Contextual keyword tokens */ \
C(GET, "get", 0) \
C(SET, "set", 0) \
C(OF, "of", 0) \
C(TARGET, "target", 0) \
C(SENT, "sent", 0) \
C(AS, "as", 0) \
C(FROM, "from", 0) \
C(NAME, "name", 0) \
C(PROTO_UNDERSCORED, "__proto__", 0) \
C(CONSTRUCTOR, "constructor", 0) \
C(PROTOTYPE, "prototype", 0) \
C(EVAL, "eval", 0) \
C(ARGUMENTS, "arguments", 0) \
C(UNDEFINED, "undefined", 0) \
C(ANONYMOUS, "anonymous", 0)
class Token { class Token {
public: public:
// All token values. // All token values.
#define T(name, string, precedence) name, #define T(name, string, precedence) name,
enum Value { enum Value { TOKEN_LIST(T, T, T) NUM_TOKENS };
TOKEN_LIST(T, T)
NUM_TOKENS
};
#undef T #undef T
// Returns a string corresponding to the C++ token name // Returns a string corresponding to the C++ token name
...@@ -198,6 +218,7 @@ class Token { ...@@ -198,6 +218,7 @@ class Token {
static bool IsKeyword(Value tok) { static bool IsKeyword(Value tok) {
return token_type[tok] == 'K'; return token_type[tok] == 'K';
} }
static bool IsContextualKeyword(Value tok) { return token_type[tok] == 'C'; }
static bool IsIdentifier(Value tok, LanguageMode language_mode, static bool IsIdentifier(Value tok, LanguageMode language_mode,
bool is_generator, bool disallow_await) { bool is_generator, bool disallow_await) {
......
...@@ -45,9 +45,9 @@ ScannerTestHelper make_scanner(const char* src) { ...@@ -45,9 +45,9 @@ ScannerTestHelper make_scanner(const char* src) {
} // anonymous namespace } // anonymous namespace
// DCHECK_TOK checks token equality, but by checking for equality of the token // CHECK_TOK checks token equality, but by checking for equality of the token
// names. That should have the same result, but has much nicer error messaages. // names. That should have the same result, but has much nicer error messaages.
#define DCHECK_TOK(a, b) DCHECK_EQ(Token::Name(a), Token::Name(b)) #define CHECK_TOK(a, b) CHECK_EQ(Token::Name(a), Token::Name(b))
TEST(Bookmarks) { TEST(Bookmarks) {
// Scan through the given source and record the tokens for use as reference // Scan through the given source and record the tokens for use as reference
...@@ -75,12 +75,12 @@ TEST(Bookmarks) { ...@@ -75,12 +75,12 @@ TEST(Bookmarks) {
if (i == bookmark_pos) { if (i == bookmark_pos) {
bookmark.Set(); bookmark.Set();
} }
DCHECK_TOK(tokens[i], scanner->Next()); CHECK_TOK(tokens[i], scanner->Next());
} }
bookmark.Apply(); bookmark.Apply();
for (size_t i = bookmark_pos; i < tokens.size(); i++) { for (size_t i = bookmark_pos; i < tokens.size(); i++) {
DCHECK_TOK(tokens[i], scanner->Next()); CHECK_TOK(tokens[i], scanner->Next());
} }
} }
} }
...@@ -100,8 +100,32 @@ TEST(AllThePushbacks) { ...@@ -100,8 +100,32 @@ TEST(AllThePushbacks) {
for (const auto& test_case : test_cases) { for (const auto& test_case : test_cases) {
auto scanner = make_scanner(test_case.src); auto scanner = make_scanner(test_case.src);
for (size_t i = 0; test_case.tokens[i] != Token::EOS; i++) { for (size_t i = 0; test_case.tokens[i] != Token::EOS; i++) {
DCHECK_TOK(test_case.tokens[i], scanner->Next()); CHECK_TOK(test_case.tokens[i], scanner->Next());
} }
DCHECK_TOK(Token::EOS, scanner->Next()); CHECK_TOK(Token::EOS, scanner->Next());
} }
} }
TEST(ContextualKeywordTokens) {
auto scanner = make_scanner("function of get bla");
// function (regular keyword)
scanner->Next();
CHECK_TOK(Token::FUNCTION, scanner->current_token());
CHECK_TOK(Token::UNINITIALIZED, scanner->current_contextual_token());
// of (contextual keyword)
scanner->Next();
CHECK_TOK(Token::IDENTIFIER, scanner->current_token());
CHECK_TOK(Token::OF, scanner->current_contextual_token());
// get (contextual keyword)
scanner->Next();
CHECK_TOK(Token::IDENTIFIER, scanner->current_token());
CHECK_TOK(Token::GET, scanner->current_contextual_token());
// bla (identfier, not any sort of keyword)
scanner->Next();
CHECK_TOK(Token::IDENTIFIER, scanner->current_token());
CHECK_TOK(Token::UNINITIALIZED, scanner->current_contextual_token());
}
...@@ -64,10 +64,9 @@ TEST(ScanKeywords) { ...@@ -64,10 +64,9 @@ TEST(ScanKeywords) {
static const KeywordToken keywords[] = { static const KeywordToken keywords[] = {
#define KEYWORD(t, s, d) { s, i::Token::t }, #define KEYWORD(t, s, d) { s, i::Token::t },
TOKEN_LIST(IGNORE_TOKEN, KEYWORD) TOKEN_LIST(IGNORE_TOKEN, KEYWORD, IGNORE_TOKEN)
#undef KEYWORD #undef KEYWORD
{ NULL, i::Token::IDENTIFIER } {NULL, i::Token::IDENTIFIER}};
};
KeywordToken key_token; KeywordToken key_token;
i::UnicodeCache unicode_cache; i::UnicodeCache unicode_cache;
...@@ -9625,13 +9624,16 @@ TEST(EscapedStrictReservedWord) { ...@@ -9625,13 +9624,16 @@ TEST(EscapedStrictReservedWord) {
// strict mode are accepted in non-strict mode. // strict mode are accepted in non-strict mode.
const char* context_data[][2] = {{"", ""}, {NULL, NULL}}; const char* context_data[][2] = {{"", ""}, {NULL, NULL}};
const char* statement_data[] = {"if (true) l\u0065t: ;", const char* statement_data[] = {"if (true) l\\u0065t: ;",
"function l\u0065t() { }", "function l\\u0065t() { }",
"(function l\u0065t() { })", "(function l\\u0065t() { })",
"async function l\u0065t() { }", "async function l\\u0065t() { }",
"(async function l\u0065t() { })", "(async function l\\u0065t() { })",
"l\u0065t => 42", "l\\u0065t => 42",
"async l\u0065t => 42", "async l\\u0065t => 42",
"function packag\\u0065() {}",
"function impl\\u0065ments() {}",
"function privat\\u0065() {}",
NULL}; NULL};
RunParserSyncTest(context_data, statement_data, kSuccess); RunParserSyncTest(context_data, statement_data, kSuccess);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment