Commit e6f4b749 authored by caitpotter88's avatar caitpotter88 Committed by Commit bot

[parser] implement error reporting for Scanner

Enables the Scanner to provide a better error message when errors occur
in escape sequences, numbers, strings, etc.

BUG=v8:4829, v8:3230
LOG=N
R=adamk@chromium.org, littledan@chromium.org

Review URL: https://codereview.chromium.org/1793913002

Cr-Commit-Position: refs/heads/master@{#34966}
parent 34fe5ee9
......@@ -464,6 +464,9 @@ class CallSite {
T(UnterminatedTemplate, "Unterminated template literal") \
T(UnterminatedTemplateExpr, "Missing } in template expression") \
T(FoundNonCallableHasInstance, "Found non-callable @@hasInstance") \
T(InvalidHexEscapeSequence, "Invalid hexadecimal escape sequence") \
T(InvalidUnicodeEscapeSequence, "Invalid Unicode escape sequence") \
T(UndefinedUnicodeCodePoint, "Undefined Unicode code-point") \
/* EvalError */ \
T(CodeGenFromStrings, "%") \
/* URIError */ \
......
......@@ -586,7 +586,8 @@ class ParserBase : public Traits {
}
void GetUnexpectedTokenMessage(
Token::Value token, MessageTemplate::Template* message, const char** arg,
Token::Value token, MessageTemplate::Template* message,
Scanner::Location* location, const char** arg,
MessageTemplate::Template default_ = MessageTemplate::kUnexpectedToken);
void ReportUnexpectedToken(Token::Value token);
......@@ -687,33 +688,34 @@ class ParserBase : public Traits {
void ExpressionUnexpectedToken(ExpressionClassifier* classifier) {
MessageTemplate::Template message = MessageTemplate::kUnexpectedToken;
const char* arg;
GetUnexpectedTokenMessage(peek(), &message, &arg);
classifier->RecordExpressionError(scanner()->peek_location(), message, arg);
Scanner::Location location = scanner()->peek_location();
GetUnexpectedTokenMessage(peek(), &message, &location, &arg);
classifier->RecordExpressionError(location, message, arg);
}
void BindingPatternUnexpectedToken(ExpressionClassifier* classifier) {
MessageTemplate::Template message = MessageTemplate::kUnexpectedToken;
const char* arg;
GetUnexpectedTokenMessage(peek(), &message, &arg);
classifier->RecordBindingPatternError(scanner()->peek_location(), message,
arg);
Scanner::Location location = scanner()->peek_location();
GetUnexpectedTokenMessage(peek(), &message, &location, &arg);
classifier->RecordBindingPatternError(location, message, arg);
}
void ArrowFormalParametersUnexpectedToken(ExpressionClassifier* classifier) {
MessageTemplate::Template message = MessageTemplate::kUnexpectedToken;
const char* arg;
GetUnexpectedTokenMessage(peek(), &message, &arg);
classifier->RecordArrowFormalParametersError(scanner()->peek_location(),
message, arg);
Scanner::Location location = scanner()->peek_location();
GetUnexpectedTokenMessage(peek(), &message, &location, &arg);
classifier->RecordArrowFormalParametersError(location, message, arg);
}
void FormalParameterInitializerUnexpectedToken(
ExpressionClassifier* classifier) {
MessageTemplate::Template message = MessageTemplate::kUnexpectedToken;
const char* arg;
GetUnexpectedTokenMessage(peek(), &message, &arg);
classifier->RecordFormalParameterInitializerError(
scanner()->peek_location(), message, arg);
Scanner::Location location = scanner()->peek_location();
GetUnexpectedTokenMessage(peek(), &message, &location, &arg);
classifier->RecordFormalParameterInitializerError(location, message, arg);
}
// Recursive descent functions:
......@@ -962,10 +964,10 @@ ParserBase<Traits>::FunctionState::~FunctionState() {
*function_state_stack_ = outer_function_state_;
}
template <class Traits>
void ParserBase<Traits>::GetUnexpectedTokenMessage(
Token::Value token, MessageTemplate::Template* message, const char** arg,
Token::Value token, MessageTemplate::Template* message,
Scanner::Location* location, const char** arg,
MessageTemplate::Template default_) {
*arg = nullptr;
switch (token) {
......@@ -1002,7 +1004,12 @@ void ParserBase<Traits>::GetUnexpectedTokenMessage(
*message = MessageTemplate::kInvalidEscapedReservedWord;
break;
case Token::ILLEGAL:
*message = MessageTemplate::kInvalidOrUnexpectedToken;
if (scanner()->has_error()) {
*message = scanner()->error();
*location = scanner()->error_location();
} else {
*message = MessageTemplate::kInvalidOrUnexpectedToken;
}
break;
default:
const char* name = Token::String(token);
......@@ -1024,7 +1031,7 @@ void ParserBase<Traits>::ReportUnexpectedTokenAt(
Scanner::Location source_location, Token::Value token,
MessageTemplate::Template message) {
const char* arg;
GetUnexpectedTokenMessage(token, &message, &arg);
GetUnexpectedTokenMessage(token, &message, &source_location, &arg);
Traits::ReportMessageAt(source_location, message, arg);
}
......
......@@ -61,15 +61,19 @@ void Scanner::Initialize(Utf16CharacterStream* source) {
Scan();
}
template <bool capture_raw>
template <bool capture_raw, bool unicode>
uc32 Scanner::ScanHexNumber(int expected_length) {
DCHECK(expected_length <= 4); // prevent overflow
int begin = source_pos() - 2;
uc32 x = 0;
for (int i = 0; i < expected_length; i++) {
int d = HexValue(c0_);
if (d < 0) {
ReportScannerError(Location(begin, begin + expected_length + 2),
unicode
? MessageTemplate::kInvalidUnicodeEscapeSequence
: MessageTemplate::kInvalidHexEscapeSequence);
return -1;
}
x = x * 16 + d;
......@@ -79,20 +83,23 @@ uc32 Scanner::ScanHexNumber(int expected_length) {
return x;
}
template <bool capture_raw>
uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) {
uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value, int beg_pos) {
uc32 x = 0;
int d = HexValue(c0_);
if (d < 0) {
return -1;
}
if (d < 0) return -1;
while (d >= 0) {
x = x * 16 + d;
if (x > max_value) return -1;
if (x > max_value) {
ReportScannerError(Location(beg_pos, source_pos() + 1),
MessageTemplate::kUndefinedUnicodeCodePoint);
return -1;
}
Advance<capture_raw>();
d = HexValue(c0_);
}
return x;
}
......@@ -855,7 +862,9 @@ Token::Value Scanner::ScanString() {
uc32 c = c0_;
Advance();
if (c == '\\') {
if (c0_ < 0 || !ScanEscape<false, false>()) return Token::ILLEGAL;
if (c0_ < 0 || !ScanEscape<false, false>()) {
return Token::ILLEGAL;
}
} else {
AddLiteralChar(c);
}
......@@ -887,7 +896,6 @@ Token::Value Scanner::ScanTemplateSpan() {
StartRawLiteral();
const bool capture_raw = true;
const bool in_template_literal = true;
while (true) {
uc32 c = c0_;
Advance<capture_raw>();
......@@ -1107,18 +1115,19 @@ uc32 Scanner::ScanUnicodeEscape() {
// Accept both \uxxxx and \u{xxxxxx}. In the latter case, the number of
// hex digits between { } is arbitrary. \ and u have already been read.
if (c0_ == '{') {
int begin = source_pos() - 2;
Advance<capture_raw>();
uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff);
if (cp < 0) {
return -1;
}
if (c0_ != '}') {
uc32 cp = ScanUnlimitedLengthHexNumber<capture_raw>(0x10ffff, begin);
if (cp < 0 || c0_ != '}') {
ReportScannerError(source_pos(),
MessageTemplate::kInvalidUnicodeEscapeSequence);
return -1;
}
Advance<capture_raw>();
return cp;
}
return ScanHexNumber<capture_raw>(4);
const bool unicode = true;
return ScanHexNumber<capture_raw, unicode>(4);
}
......
......@@ -14,6 +14,7 @@
#include "src/globals.h"
#include "src/hashmap.h"
#include "src/list.h"
#include "src/messages.h"
#include "src/parsing/token.h"
#include "src/unicode.h"
#include "src/unicode-decoder.h"
......@@ -354,6 +355,10 @@ class Scanner {
// (the token last returned by Next()).
Location location() const { return current_.location; }
bool has_error() const { return scanner_error_ != MessageTemplate::kNone; }
MessageTemplate::Template error() const { return scanner_error_; }
Location error_location() const { return scanner_error_location_; }
// Similar functions for the upcoming token.
// One token look-ahead (past the token returned by Next()).
......@@ -482,6 +487,7 @@ class Scanner {
current_.raw_literal_chars = NULL;
next_next_.token = Token::UNINITIALIZED;
found_html_comment_ = false;
scanner_error_ = MessageTemplate::kNone;
}
// Support BookmarkScope functionality.
......@@ -492,6 +498,19 @@ class Scanner {
void DropBookmark();
static void CopyTokenDesc(TokenDesc* to, TokenDesc* from);
void ReportScannerError(const Location& location,
MessageTemplate::Template error) {
if (has_error()) return;
scanner_error_ = error;
scanner_error_location_ = location;
}
void ReportScannerError(int pos, MessageTemplate::Template error) {
if (has_error()) return;
scanner_error_ = error;
scanner_error_location_ = Location(pos, pos + 1);
}
// Literal buffer support
inline void StartLiteral() {
LiteralBuffer* free_buffer =
......@@ -637,13 +656,13 @@ class Scanner {
return current_.raw_literal_chars->is_one_byte();
}
template <bool capture_raw>
template <bool capture_raw, bool unicode = false>
uc32 ScanHexNumber(int expected_length);
// Scan a number of any length but not bigger than max_value. For example, the
// number can be 000000001, so it's very long in characters but its value is
// small.
template <bool capture_raw>
uc32 ScanUnlimitedLengthHexNumber(int max_value);
uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
// Scans a single JavaScript token.
void Scan();
......@@ -766,6 +785,9 @@ class Scanner {
bool found_html_comment_;
bool allow_harmony_exponentiation_operator_;
MessageTemplate::Template scanner_error_;
Location scanner_error_location_;
};
} // namespace internal
......
......@@ -2,7 +2,7 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
*%(basename)s:7: SyntaxError: Invalid or unexpected token
*%(basename)s:7: SyntaxError: Invalid hexadecimal escape sequence
tag(tag`\xyy`);
^^^
SyntaxError: Invalid or unexpected token
\ No newline at end of file
^^^^
SyntaxError: Invalid hexadecimal escape sequence
......@@ -2,7 +2,7 @@
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
*%(basename)s:7: SyntaxError: Invalid or unexpected token
*%(basename)s:7: SyntaxError: Invalid hexadecimal escape sequence
`${tag`\xyy`}`;
^^^
SyntaxError: Invalid or unexpected token
\ No newline at end of file
^^^^
SyntaxError: Invalid hexadecimal escape sequence
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
print("\u162P");
# Copyright 2016 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
*%(basename)s:5: SyntaxError: Invalid Unicode escape sequence
print("\u162P");
^^^^^^
SyntaxError: Invalid Unicode escape sequence
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
print("\u{FFYZ}");
# Copyright 2016 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
*%(basename)s:5: SyntaxError: Invalid Unicode escape sequence
print("\u{FFYZ}");
^
SyntaxError: Invalid Unicode escape sequence
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
print("\u{110000}");
# Copyright 2016 the V8 project authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
*%(basename)s:5: SyntaxError: Undefined Unicode code-point
print("\u{110000}");
^^^^^^^^^
SyntaxError: Undefined Unicode code-point
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment