Commit a56874d3 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[regexp] Early errors 🤯

This CL implements early SyntaxErrors for regular expressions. Early
errors are thrown when a malformed pattern is parsed, rather than when
the code first runs.

We do this by having the JS parser call into the regexp parser when
a regexp pattern is found. Regexps are expected to be relatively
rare, small, and cheap to parse - that's why we currently accept that
the regexp parser does unnecessary work (e.g. creating the AST
structures).

If needed, we can optimize in the future. Ideas:

- Split up the regexp parser to avoid useless work for syntax validation.
- Preserve parser results to avoid reparsing later.

Bug: v8:896
Change-Id: I3d1ec18c980ba94439576ac3764138552418b85d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3106647
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Reviewed-by: 's avatarPatrick Thier <pthier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76502}
parent 862391b9
...@@ -118,17 +118,17 @@ base::Optional<JSRegExp::Flags> JSRegExp::FlagsFromString( ...@@ -118,17 +118,17 @@ base::Optional<JSRegExp::Flags> JSRegExp::FlagsFromString(
// A longer flags string cannot be valid. // A longer flags string cannot be valid.
if (length > JSRegExp::kFlagCount) return {}; if (length > JSRegExp::kFlagCount) return {};
JSRegExp::Flags value; RegExpFlags value;
FlatStringReader reader(isolate, String::Flatten(isolate, flags)); FlatStringReader reader(isolate, String::Flatten(isolate, flags));
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
base::Optional<JSRegExp::Flag> flag = JSRegExp::FlagFromChar(reader.Get(i)); base::Optional<RegExpFlag> flag = JSRegExp::FlagFromChar(reader.Get(i));
if (!flag.has_value()) return {}; if (!flag.has_value()) return {};
if (value & flag.value()) return {}; // Duplicate. if (value & flag.value()) return {}; // Duplicate.
value |= flag.value(); value |= flag.value();
} }
return value; return JSRegExp::AsJSRegExpFlags(value);
} }
// static // static
......
...@@ -48,11 +48,6 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -48,11 +48,6 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
static constexpr Flag AsJSRegExpFlag(RegExpFlag f) { static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
return static_cast<Flag>(f); return static_cast<Flag>(f);
} }
static constexpr base::Optional<Flag> AsOptionalJSRegExpFlag(
base::Optional<RegExpFlag> f) {
return f.has_value() ? base::Optional<Flag>{AsJSRegExpFlag(f.value())}
: base::Optional<Flag>{};
}
static constexpr Flags AsJSRegExpFlags(RegExpFlags f) { static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
return Flags{static_cast<int>(f)}; return Flags{static_cast<int>(f)};
} }
...@@ -60,10 +55,11 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -60,10 +55,11 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
return RegExpFlags{static_cast<int>(f)}; return RegExpFlags{static_cast<int>(f)};
} }
static base::Optional<Flag> FlagFromChar(char c) { static base::Optional<RegExpFlag> FlagFromChar(char c) {
base::Optional<Flag> f = AsOptionalJSRegExpFlag(TryRegExpFlagFromChar(c)); base::Optional<RegExpFlag> f = TryRegExpFlagFromChar(c);
if (!f.has_value()) return f; if (!f.has_value()) return f;
if (f.value() == kLinear && !FLAG_enable_experimental_regexp_engine) { if (f.value() == RegExpFlag::kLinear &&
!FLAG_enable_experimental_regexp_engine) {
return {}; return {};
} }
return f; return f;
......
...@@ -27,12 +27,15 @@ ...@@ -27,12 +27,15 @@
#include "src/parsing/parse-info.h" #include "src/parsing/parse-info.h"
#include "src/parsing/scanner.h" #include "src/parsing/scanner.h"
#include "src/parsing/token.h" #include "src/parsing/token.h"
#include "src/regexp/regexp.h"
#include "src/utils/pointer-with-payload.h" #include "src/utils/pointer-with-payload.h"
#include "src/zone/zone-chunk-list.h" #include "src/zone/zone-chunk-list.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
class PreParserIdentifier;
enum FunctionNameValidity { enum FunctionNameValidity {
kFunctionNameIsStrictReserved, kFunctionNameIsStrictReserved,
kSkipFunctionNameCheck, kSkipFunctionNameCheck,
...@@ -1074,22 +1077,24 @@ class ParserBase { ...@@ -1074,22 +1077,24 @@ class ParserBase {
} }
// Report syntax errors. // Report syntax errors.
V8_NOINLINE void ReportMessage(MessageTemplate message) { template <typename... Ts>
Scanner::Location source_location = scanner()->location(); V8_NOINLINE void ReportMessage(MessageTemplate message, const Ts&... args) {
impl()->ReportMessageAt(source_location, message, ReportMessageAt(scanner()->location(), message, args...);
static_cast<const char*>(nullptr));
} }
template <typename T> template <typename... Ts>
V8_NOINLINE void ReportMessage(MessageTemplate message, T arg) { V8_NOINLINE void ReportMessageAt(Scanner::Location source_location,
Scanner::Location source_location = scanner()->location(); MessageTemplate message, const Ts&... args) {
impl()->ReportMessageAt(source_location, message, arg); impl()->pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, args...);
scanner()->set_parser_error();
} }
V8_NOINLINE void ReportMessageAt(Scanner::Location location, V8_NOINLINE void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message) { MessageTemplate message,
impl()->ReportMessageAt(location, message, const PreParserIdentifier& arg0) {
static_cast<const char*>(nullptr)); ReportMessageAt(source_location, message,
impl()->PreParserIdentifierToAstRawString(arg0));
} }
V8_NOINLINE void ReportUnexpectedToken(Token::Value token); V8_NOINLINE void ReportUnexpectedToken(Token::Value token);
...@@ -1140,6 +1145,11 @@ class ParserBase { ...@@ -1140,6 +1145,11 @@ class ParserBase {
ExpressionT ParsePropertyOrPrivatePropertyName(); ExpressionT ParsePropertyOrPrivatePropertyName();
const AstRawString* GetNextSymbolForRegExpLiteral() const {
return scanner()->NextSymbol(ast_value_factory());
}
bool ValidateRegExpLiteral(const AstRawString* pattern, RegExpFlags flags,
const char** error_message);
ExpressionT ParseRegExpLiteral(); ExpressionT ParseRegExpLiteral();
ExpressionT ParseBindingPattern(); ExpressionT ParseBindingPattern();
...@@ -1745,6 +1755,25 @@ ParserBase<Impl>::ParsePropertyOrPrivatePropertyName() { ...@@ -1745,6 +1755,25 @@ ParserBase<Impl>::ParsePropertyOrPrivatePropertyName() {
return key; return key;
} }
template <typename Impl>
bool ParserBase<Impl>::ValidateRegExpLiteral(const AstRawString* pattern,
RegExpFlags flags,
const char** error_message) {
// TODO(jgruber): If already validated in the preparser, skip validation in
// the parser.
DisallowGarbageCollection no_gc;
const unsigned char* d = pattern->raw_data();
if (pattern->is_one_byte()) {
return RegExp::VerifySyntax(zone(), stack_limit(),
static_cast<const uint8_t*>(d),
pattern->length(), flags, error_message, no_gc);
} else {
return RegExp::VerifySyntax(zone(), stack_limit(),
reinterpret_cast<const uint16_t*>(d),
pattern->length(), flags, error_message, no_gc);
}
}
template <typename Impl> template <typename Impl>
typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() { typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() {
int pos = peek_position(); int pos = peek_position();
...@@ -1754,15 +1783,20 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() { ...@@ -1754,15 +1783,20 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() {
return impl()->FailureExpression(); return impl()->FailureExpression();
} }
IdentifierT js_pattern = impl()->GetNextSymbol(); const AstRawString* js_pattern = GetNextSymbolForRegExpLiteral();
Maybe<int> flags = scanner()->ScanRegExpFlags(); base::Optional<RegExpFlags> flags = scanner()->ScanRegExpFlags();
if (flags.IsNothing()) { if (!flags.has_value()) {
Next(); Next();
ReportMessage(MessageTemplate::kMalformedRegExpFlags); ReportMessage(MessageTemplate::kMalformedRegExpFlags);
return impl()->FailureExpression(); return impl()->FailureExpression();
} }
Next(); Next();
return factory()->NewRegExpLiteral(js_pattern, flags.FromJust(), pos); const char* error_message;
if (!ValidateRegExpLiteral(js_pattern, flags.value(), &error_message)) {
ReportMessage(MessageTemplate::kMalformedRegExp, js_pattern, error_message);
return impl()->FailureExpression();
}
return factory()->NewRegExpLiteral(js_pattern, flags.value(), pos);
} }
template <typename Impl> template <typename Impl>
......
...@@ -701,25 +701,10 @@ class V8_EXPORT_PRIVATE Parser : public NON_EXPORTED_BASE(ParserBase<Parser>) { ...@@ -701,25 +701,10 @@ class V8_EXPORT_PRIVATE Parser : public NON_EXPORTED_BASE(ParserBase<Parser>) {
return NewThrowError(Runtime::kNewTypeError, message, arg, pos); return NewThrowError(Runtime::kNewTypeError, message, arg, pos);
} }
// Reporting errors.
void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message, const char* arg = nullptr) {
pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, arg);
scanner_.set_parser_error();
}
// Dummy implementation. The parser should never have a unidentifiable // Dummy implementation. The parser should never have a unidentifiable
// error. // error.
V8_INLINE void ReportUnidentifiableError() { UNREACHABLE(); } V8_INLINE void ReportUnidentifiableError() { UNREACHABLE(); }
void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message, const AstRawString* arg) {
pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, arg);
scanner_.set_parser_error();
}
const AstRawString* GetRawNameFromIdentifier(const AstRawString* arg) { const AstRawString* GetRawNameFromIdentifier(const AstRawString* arg) {
return arg; return arg;
} }
......
...@@ -19,28 +19,28 @@ namespace internal { ...@@ -19,28 +19,28 @@ namespace internal {
void PendingCompilationErrorHandler::MessageDetails::SetString( void PendingCompilationErrorHandler::MessageDetails::SetString(
Handle<String> string, Isolate* isolate) { Handle<String> string, Isolate* isolate) {
DCHECK_NE(type_, kMainThreadHandle); DCHECK_NE(arg0_type_, kMainThreadHandle);
type_ = kMainThreadHandle; arg0_type_ = kMainThreadHandle;
arg_handle_ = string; arg0_handle_ = string;
} }
void PendingCompilationErrorHandler::MessageDetails::SetString( void PendingCompilationErrorHandler::MessageDetails::SetString(
Handle<String> string, LocalIsolate* isolate) { Handle<String> string, LocalIsolate* isolate) {
DCHECK_NE(type_, kMainThreadHandle); DCHECK_NE(arg0_type_, kMainThreadHandle);
type_ = kMainThreadHandle; arg0_type_ = kMainThreadHandle;
arg_handle_ = isolate->heap()->NewPersistentHandle(string); arg0_handle_ = isolate->heap()->NewPersistentHandle(string);
} }
template <typename IsolateT> template <typename IsolateT>
void PendingCompilationErrorHandler::MessageDetails::Prepare( void PendingCompilationErrorHandler::MessageDetails::Prepare(
IsolateT* isolate) { IsolateT* isolate) {
switch (type_) { switch (arg0_type_) {
case kAstRawString: case kAstRawString:
return SetString(arg_->string(), isolate); return SetString(arg0_->string(), isolate);
case kNone: case kNone:
case kConstCharString: case kConstCharString:
// We can delay allocation until ArgumentString(isolate). // We can delay allocation until Arg0String(isolate).
// TODO(leszeks): We don't actually have to transfer this string, since // TODO(leszeks): We don't actually have to transfer this string, since
// it's a root. // it's a root.
return; return;
...@@ -52,22 +52,31 @@ void PendingCompilationErrorHandler::MessageDetails::Prepare( ...@@ -52,22 +52,31 @@ void PendingCompilationErrorHandler::MessageDetails::Prepare(
} }
} }
Handle<String> PendingCompilationErrorHandler::MessageDetails::ArgumentString( Handle<String> PendingCompilationErrorHandler::MessageDetails::Arg0String(
Isolate* isolate) const { Isolate* isolate) const {
switch (type_) { switch (arg0_type_) {
case kMainThreadHandle: case kMainThreadHandle:
return arg_handle_; return arg0_handle_;
case kNone: case kNone:
return isolate->factory()->undefined_string(); return isolate->factory()->undefined_string();
case kConstCharString: case kConstCharString:
return isolate->factory() return isolate->factory()
->NewStringFromUtf8(base::CStrVector(char_arg_), AllocationType::kOld) ->NewStringFromUtf8(base::CStrVector(char_arg0_),
AllocationType::kOld)
.ToHandleChecked(); .ToHandleChecked();
case kAstRawString: case kAstRawString:
UNREACHABLE(); UNREACHABLE();
} }
} }
Handle<String> PendingCompilationErrorHandler::MessageDetails::Arg1String(
Isolate* isolate) const {
if (arg1_ == nullptr) return Handle<String>::null();
return isolate->factory()
->NewStringFromUtf8(base::CStrVector(arg1_), AllocationType::kOld)
.ToHandleChecked();
}
MessageLocation PendingCompilationErrorHandler::MessageDetails::GetLocation( MessageLocation PendingCompilationErrorHandler::MessageDetails::GetLocation(
Handle<Script> script) const { Handle<Script> script) const {
return MessageLocation(script, start_position_, end_position_); return MessageLocation(script, start_position_, end_position_);
...@@ -93,6 +102,17 @@ void PendingCompilationErrorHandler::ReportMessageAt(int start_position, ...@@ -93,6 +102,17 @@ void PendingCompilationErrorHandler::ReportMessageAt(int start_position,
error_details_ = MessageDetails(start_position, end_position, message, arg); error_details_ = MessageDetails(start_position, end_position, message, arg);
} }
void PendingCompilationErrorHandler::ReportMessageAt(int start_position,
int end_position,
MessageTemplate message,
const AstRawString* arg0,
const char* arg1) {
if (has_pending_error_) return;
has_pending_error_ = true;
error_details_ =
MessageDetails(start_position, end_position, message, arg0, arg1);
}
void PendingCompilationErrorHandler::ReportWarningAt(int start_position, void PendingCompilationErrorHandler::ReportWarningAt(int start_position,
int end_position, int end_position,
MessageTemplate message, MessageTemplate message,
...@@ -119,7 +139,8 @@ void PendingCompilationErrorHandler::ReportWarnings( ...@@ -119,7 +139,8 @@ void PendingCompilationErrorHandler::ReportWarnings(
for (const MessageDetails& warning : warning_messages_) { for (const MessageDetails& warning : warning_messages_) {
MessageLocation location = warning.GetLocation(script); MessageLocation location = warning.GetLocation(script);
Handle<String> argument = warning.ArgumentString(isolate); Handle<String> argument = warning.Arg0String(isolate);
DCHECK(warning.Arg1String(isolate).is_null()); // Only used for errors.
Handle<JSMessageObject> message = Handle<JSMessageObject> message =
MessageHandler::MakeMessageObject(isolate, warning.message(), &location, MessageHandler::MakeMessageObject(isolate, warning.message(), &location,
argument, Handle<FixedArray>::null()); argument, Handle<FixedArray>::null());
...@@ -160,12 +181,13 @@ void PendingCompilationErrorHandler::ThrowPendingError( ...@@ -160,12 +181,13 @@ void PendingCompilationErrorHandler::ThrowPendingError(
if (!has_pending_error_) return; if (!has_pending_error_) return;
MessageLocation location = error_details_.GetLocation(script); MessageLocation location = error_details_.GetLocation(script);
Handle<String> argument = error_details_.ArgumentString(isolate); Handle<String> arg0 = error_details_.Arg0String(isolate);
Handle<String> arg1 = error_details_.Arg1String(isolate);
isolate->debug()->OnCompileError(script); isolate->debug()->OnCompileError(script);
Factory* factory = isolate->factory(); Factory* factory = isolate->factory();
Handle<JSObject> error = Handle<JSObject> error =
factory->NewSyntaxError(error_details_.message(), argument); factory->NewSyntaxError(error_details_.message(), arg0, arg1);
isolate->ThrowAt(error, &location); isolate->ThrowAt(error, &location);
} }
...@@ -173,7 +195,8 @@ Handle<String> PendingCompilationErrorHandler::FormatErrorMessageForTest( ...@@ -173,7 +195,8 @@ Handle<String> PendingCompilationErrorHandler::FormatErrorMessageForTest(
Isolate* isolate) { Isolate* isolate) {
error_details_.Prepare(isolate); error_details_.Prepare(isolate);
return MessageFormatter::Format(isolate, error_details_.message(), return MessageFormatter::Format(isolate, error_details_.message(),
error_details_.ArgumentString(isolate)); error_details_.Arg0String(isolate),
error_details_.Arg1String(isolate));
} }
} // namespace internal } // namespace internal
......
...@@ -39,6 +39,10 @@ class PendingCompilationErrorHandler { ...@@ -39,6 +39,10 @@ class PendingCompilationErrorHandler {
void ReportMessageAt(int start_position, int end_position, void ReportMessageAt(int start_position, int end_position,
MessageTemplate message, const AstRawString* arg); MessageTemplate message, const AstRawString* arg);
void ReportMessageAt(int start_position, int end_position,
MessageTemplate message, const AstRawString* arg0,
const char* arg1);
void ReportWarningAt(int start_position, int end_position, void ReportWarningAt(int start_position, int end_position,
MessageTemplate message, const char* arg = nullptr); MessageTemplate message, const char* arg = nullptr);
...@@ -86,23 +90,39 @@ class PendingCompilationErrorHandler { ...@@ -86,23 +90,39 @@ class PendingCompilationErrorHandler {
: start_position_(-1), : start_position_(-1),
end_position_(-1), end_position_(-1),
message_(MessageTemplate::kNone), message_(MessageTemplate::kNone),
type_(kNone) {} arg1_(nullptr),
arg0_type_(kNone) {}
MessageDetails(int start_position, int end_position, MessageDetails(int start_position, int end_position,
MessageTemplate message, const AstRawString* arg) MessageTemplate message, const AstRawString* arg)
: start_position_(start_position), : start_position_(start_position),
end_position_(end_position), end_position_(end_position),
message_(message), message_(message),
arg_(arg), arg0_(arg),
type_(arg ? kAstRawString : kNone) {} arg1_(nullptr),
arg0_type_(arg ? kAstRawString : kNone) {}
MessageDetails(int start_position, int end_position,
MessageTemplate message, const AstRawString* arg0,
const char* arg1)
: start_position_(start_position),
end_position_(end_position),
message_(message),
arg0_(arg0),
arg1_(arg1),
arg0_type_(kAstRawString) {
DCHECK_NOT_NULL(arg0);
DCHECK_NOT_NULL(arg1);
}
MessageDetails(int start_position, int end_position, MessageDetails(int start_position, int end_position,
MessageTemplate message, const char* char_arg) MessageTemplate message, const char* char_arg)
: start_position_(start_position), : start_position_(start_position),
end_position_(end_position), end_position_(end_position),
message_(message), message_(message),
char_arg_(char_arg), char_arg0_(char_arg),
type_(char_arg_ ? kConstCharString : kNone) {} arg1_(nullptr),
arg0_type_(char_arg0_ ? kConstCharString : kNone) {}
Handle<String> ArgumentString(Isolate* isolate) const; Handle<String> Arg0String(Isolate* isolate) const;
Handle<String> Arg1String(Isolate* isolate) const;
MessageLocation GetLocation(Handle<Script> script) const; MessageLocation GetLocation(Handle<Script> script) const;
MessageTemplate message() const { return message_; } MessageTemplate message() const { return message_; }
...@@ -119,11 +139,14 @@ class PendingCompilationErrorHandler { ...@@ -119,11 +139,14 @@ class PendingCompilationErrorHandler {
int end_position_; int end_position_;
MessageTemplate message_; MessageTemplate message_;
union { union {
const AstRawString* arg_; const AstRawString* arg0_;
const char* char_arg_; const char* char_arg0_;
Handle<String> arg_handle_; Handle<String> arg0_handle_;
}; };
Type type_; // TODO(jgruber): If we ever extend functionality of arg1, refactor it to
// be more consistent with arg0.
const char* arg1_;
Type arg0_type_;
}; };
void ThrowPendingError(Isolate* isolate, Handle<Script> script) const; void ThrowPendingError(Isolate* isolate, Handle<Script> script) const;
......
...@@ -537,7 +537,7 @@ class PreParserFactory { ...@@ -537,7 +537,7 @@ class PreParserFactory {
PreParserExpression NewTheHoleLiteral() { PreParserExpression NewTheHoleLiteral() {
return PreParserExpression::Default(); return PreParserExpression::Default();
} }
PreParserExpression NewRegExpLiteral(const PreParserIdentifier& js_pattern, PreParserExpression NewRegExpLiteral(const AstRawString* js_pattern,
int js_flags, int pos) { int js_flags, int pos) {
return PreParserExpression::Default(); return PreParserExpression::Default();
} }
...@@ -1455,12 +1455,9 @@ class PreParser : public ParserBase<PreParser> { ...@@ -1455,12 +1455,9 @@ class PreParser : public ParserBase<PreParser> {
return PreParserExpression::Default(); return PreParserExpression::Default();
} }
// Reporting errors. V8_INLINE const AstRawString* PreParserIdentifierToAstRawString(
void ReportMessageAt(Scanner::Location source_location, const PreParserIdentifier& x) {
MessageTemplate message, const char* arg = nullptr) { return x.string_;
pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, arg);
scanner()->set_parser_error();
} }
V8_INLINE void ReportUnidentifiableError() { V8_INLINE void ReportUnidentifiableError() {
...@@ -1468,19 +1465,6 @@ class PreParser : public ParserBase<PreParser> { ...@@ -1468,19 +1465,6 @@ class PreParser : public ParserBase<PreParser> {
scanner()->set_parser_error(); scanner()->set_parser_error();
} }
V8_INLINE void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message,
const PreParserIdentifier& arg) {
ReportMessageAt(source_location, message, arg.string_);
}
void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message, const AstRawString* arg) {
pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, arg);
scanner()->set_parser_error();
}
const AstRawString* GetRawNameFromIdentifier(const PreParserIdentifier& arg) { const AstRawString* GetRawNameFromIdentifier(const PreParserIdentifier& arg) {
return arg.string_; return arg.string_;
} }
......
...@@ -978,9 +978,6 @@ bool Scanner::ScanRegExpPattern() { ...@@ -978,9 +978,6 @@ bool Scanner::ScanRegExpPattern() {
// worrying whether the following characters are part of the escape // worrying whether the following characters are part of the escape
// or not, since any '/', '\\' or '[' is guaranteed to not be part // or not, since any '/', '\\' or '[' is guaranteed to not be part
// of the escape sequence. // of the escape sequence.
// TODO(896): At some point, parse RegExps more thoroughly to capture
// octal esacpes in strict mode.
} else { // Unescaped character. } else { // Unescaped character.
if (c0_ == '[') in_character_class = true; if (c0_ == '[') in_character_class = true;
if (c0_ == ']') in_character_class = false; if (c0_ == ']') in_character_class = false;
...@@ -993,22 +990,21 @@ bool Scanner::ScanRegExpPattern() { ...@@ -993,22 +990,21 @@ bool Scanner::ScanRegExpPattern() {
return true; return true;
} }
Maybe<int> Scanner::ScanRegExpFlags() { base::Optional<RegExpFlags> Scanner::ScanRegExpFlags() {
DCHECK_EQ(Token::REGEXP_LITERAL, next().token); DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
// Scan regular expression flags. RegExpFlags flags;
JSRegExp::Flags flags;
while (IsIdentifierPart(c0_)) { while (IsIdentifierPart(c0_)) {
base::Optional<JSRegExp::Flags> maybe_flag = JSRegExp::FlagFromChar(c0_); base::Optional<RegExpFlag> maybe_flag = JSRegExp::FlagFromChar(c0_);
if (!maybe_flag.has_value()) return Nothing<int>(); if (!maybe_flag.has_value()) return {};
JSRegExp::Flags flag = *maybe_flag; RegExpFlag flag = maybe_flag.value();
if (flags & flag) return Nothing<int>(); if (flags & flag) return {};
Advance(); Advance();
flags |= flag; flags |= flag;
} }
next().location.end_pos = source_pos(); next().location.end_pos = source_pos();
return Just<int>(flags); return flags;
} }
const AstRawString* Scanner::CurrentSymbol( const AstRawString* Scanner::CurrentSymbol(
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "src/parsing/literal-buffer.h" #include "src/parsing/literal-buffer.h"
#include "src/parsing/parse-info.h" #include "src/parsing/parse-info.h"
#include "src/parsing/token.h" #include "src/parsing/token.h"
#include "src/regexp/regexp-flags.h"
#include "src/strings/char-predicates.h" #include "src/strings/char-predicates.h"
#include "src/strings/unicode.h" #include "src/strings/unicode.h"
#include "src/utils/allocation.h" #include "src/utils/allocation.h"
...@@ -398,7 +399,7 @@ class V8_EXPORT_PRIVATE Scanner { ...@@ -398,7 +399,7 @@ class V8_EXPORT_PRIVATE Scanner {
// Returns true if a pattern is scanned. // Returns true if a pattern is scanned.
bool ScanRegExpPattern(); bool ScanRegExpPattern();
// Scans the input as regular expression flags. Returns the flags on success. // Scans the input as regular expression flags. Returns the flags on success.
Maybe<int> ScanRegExpFlags(); base::Optional<RegExpFlags> ScanRegExpFlags();
// Scans the input as a template literal // Scans the input as a template literal
Token::Value ScanTemplateContinuation() { Token::Value ScanTemplateContinuation() {
......
...@@ -242,7 +242,7 @@ template <class CharT> ...@@ -242,7 +242,7 @@ template <class CharT>
class RegExpParserImpl final { class RegExpParserImpl final {
private: private:
RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags, RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags,
Isolate* isolate, Zone* zone, uintptr_t stack_limit, Zone* zone,
const DisallowGarbageCollection& no_gc); const DisallowGarbageCollection& no_gc);
bool Parse(RegExpCompileData* result); bool Parse(RegExpCompileData* result);
...@@ -337,7 +337,6 @@ class RegExpParserImpl final { ...@@ -337,7 +337,6 @@ class RegExpParserImpl final {
// ScanForCaptures to look ahead at the remaining pattern. // ScanForCaptures to look ahead at the remaining pattern.
bool HasNamedCaptures(); bool HasNamedCaptures();
Isolate* isolate() { return isolate_; }
Zone* zone() const { return zone_; } Zone* zone() const { return zone_; }
base::uc32 current() { return current_; } base::uc32 current() { return current_; }
...@@ -362,7 +361,6 @@ class RegExpParserImpl final { ...@@ -362,7 +361,6 @@ class RegExpParserImpl final {
}; };
const DisallowGarbageCollection no_gc_; const DisallowGarbageCollection no_gc_;
Isolate* const isolate_;
Zone* const zone_; Zone* const zone_;
RegExpError error_ = RegExpError::kNone; RegExpError error_ = RegExpError::kNone;
int error_pos_ = 0; int error_pos_ = 0;
...@@ -382,19 +380,22 @@ class RegExpParserImpl final { ...@@ -382,19 +380,22 @@ class RegExpParserImpl final {
bool is_scanned_for_captures_; bool is_scanned_for_captures_;
bool has_named_captures_; // Only valid after we have scanned for captures. bool has_named_captures_; // Only valid after we have scanned for captures.
bool failed_; bool failed_;
const uintptr_t stack_limit_;
friend bool RegExpParser::ParseRegExpFromHeapString(Isolate*, Zone*, friend bool RegExpParser::ParseRegExpFromHeapString(Isolate*, Zone*,
Handle<String>, Handle<String>,
RegExpFlags, RegExpFlags,
RegExpCompileData*); RegExpCompileData*);
friend bool RegExpParser::VerifyRegExpSyntax<CharT>(
Zone*, uintptr_t, const CharT*, int, RegExpFlags, RegExpCompileData*,
const DisallowGarbageCollection&);
}; };
template <class CharT> template <class CharT>
RegExpParserImpl<CharT>::RegExpParserImpl( RegExpParserImpl<CharT>::RegExpParserImpl(
const CharT* input, int input_length, RegExpFlags flags, Isolate* isolate, const CharT* input, int input_length, RegExpFlags flags,
Zone* zone, const DisallowGarbageCollection& no_gc) uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc)
: isolate_(isolate), : zone_(zone),
zone_(zone),
captures_(nullptr), captures_(nullptr),
named_captures_(nullptr), named_captures_(nullptr),
named_back_references_(nullptr), named_back_references_(nullptr),
...@@ -410,7 +411,8 @@ RegExpParserImpl<CharT>::RegExpParserImpl( ...@@ -410,7 +411,8 @@ RegExpParserImpl<CharT>::RegExpParserImpl(
contains_anchor_(false), contains_anchor_(false),
is_scanned_for_captures_(false), is_scanned_for_captures_(false),
has_named_captures_(false), has_named_captures_(false),
failed_(false) { failed_(false),
stack_limit_(stack_limit) {
Advance(); Advance();
} }
...@@ -457,8 +459,7 @@ base::uc32 RegExpParserImpl<CharT>::Next() { ...@@ -457,8 +459,7 @@ base::uc32 RegExpParserImpl<CharT>::Next() {
template <class CharT> template <class CharT>
void RegExpParserImpl<CharT>::Advance() { void RegExpParserImpl<CharT>::Advance() {
if (has_next()) { if (has_next()) {
StackLimitCheck check(isolate()); if (GetCurrentStackPosition() < stack_limit_) {
if (check.HasOverflowed()) {
if (FLAG_correctness_fuzzer_suppressions) { if (FLAG_correctness_fuzzer_suppressions) {
FATAL("Aborting on stack overflow"); FATAL("Aborting on stack overflow");
} }
...@@ -2400,20 +2401,40 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone, ...@@ -2400,20 +2401,40 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
RegExpFlags flags, RegExpFlags flags,
RegExpCompileData* result) { RegExpCompileData* result) {
DisallowGarbageCollection no_gc; DisallowGarbageCollection no_gc;
uintptr_t stack_limit = isolate->stack_guard()->real_climit();
String::FlatContent content = input->GetFlatContent(no_gc); String::FlatContent content = input->GetFlatContent(no_gc);
if (content.IsOneByte()) { if (content.IsOneByte()) {
base::Vector<const uint8_t> v = content.ToOneByteVector(); base::Vector<const uint8_t> v = content.ToOneByteVector();
return RegExpParserImpl<uint8_t>{v.begin(), v.length(), flags, return RegExpParserImpl<uint8_t>{v.begin(), v.length(), flags,
isolate, zone, no_gc} stack_limit, zone, no_gc}
.Parse(result); .Parse(result);
} else { } else {
base::Vector<const base::uc16> v = content.ToUC16Vector(); base::Vector<const base::uc16> v = content.ToUC16Vector();
return RegExpParserImpl<base::uc16>{v.begin(), v.length(), flags, return RegExpParserImpl<base::uc16>{v.begin(), v.length(), flags,
isolate, zone, no_gc} stack_limit, zone, no_gc}
.Parse(result); .Parse(result);
} }
} }
// static
template <class CharT>
bool RegExpParser::VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit,
const CharT* input, int input_length,
RegExpFlags flags,
RegExpCompileData* result,
const DisallowGarbageCollection& no_gc) {
return RegExpParserImpl<CharT>{input, input_length, flags,
stack_limit, zone, no_gc}
.Parse(result);
}
template bool RegExpParser::VerifyRegExpSyntax<uint8_t>(
Zone*, uintptr_t, const uint8_t*, int, RegExpFlags, RegExpCompileData*,
const DisallowGarbageCollection&);
template bool RegExpParser::VerifyRegExpSyntax<base::uc16>(
Zone*, uintptr_t, const base::uc16*, int, RegExpFlags, RegExpCompileData*,
const DisallowGarbageCollection&);
// static // static
bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone, bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
Handle<String> input, RegExpFlags flags, Handle<String> input, RegExpFlags flags,
......
...@@ -23,6 +23,12 @@ class V8_EXPORT_PRIVATE RegExpParser : public AllStatic { ...@@ -23,6 +23,12 @@ class V8_EXPORT_PRIVATE RegExpParser : public AllStatic {
Handle<String> input, RegExpFlags flags, Handle<String> input, RegExpFlags flags,
RegExpCompileData* result); RegExpCompileData* result);
template <class CharT>
static bool VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit,
const CharT* input, int input_length,
RegExpFlags flags, RegExpCompileData* result,
const DisallowGarbageCollection& no_gc);
// Used by the SpiderMonkey embedding of irregexp. // Used by the SpiderMonkey embedding of irregexp.
static bool VerifyRegExpSyntax(Isolate* isolate, Zone* zone, static bool VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
Handle<String> input, RegExpFlags flags, Handle<String> input, RegExpFlags flags,
......
...@@ -107,6 +107,27 @@ bool RegExp::CanGenerateBytecode() { ...@@ -107,6 +107,27 @@ bool RegExp::CanGenerateBytecode() {
return FLAG_regexp_interpret_all || FLAG_regexp_tier_up; return FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
} }
// static
template <class CharT>
bool RegExp::VerifySyntax(Zone* zone, uintptr_t stack_limit, const CharT* input,
int input_length, RegExpFlags flags,
const char** error_message_out,
const DisallowGarbageCollection& no_gc) {
RegExpCompileData data;
bool pattern_is_valid = RegExpParser::VerifyRegExpSyntax(
zone, stack_limit, input, input_length, flags, &data, no_gc);
if (!pattern_is_valid) *error_message_out = RegExpErrorString(data.error);
return pattern_is_valid;
}
template bool RegExp::VerifySyntax<uint8_t>(Zone*, uintptr_t, const uint8_t*,
int, RegExpFlags,
const char** error_message_out,
const DisallowGarbageCollection&);
template bool RegExp::VerifySyntax<base::uc16>(
Zone*, uintptr_t, const base::uc16*, int, RegExpFlags,
const char** error_message_out, const DisallowGarbageCollection&);
MaybeHandle<Object> RegExp::ThrowRegExpException(Isolate* isolate, MaybeHandle<Object> RegExp::ThrowRegExpException(Isolate* isolate,
Handle<JSRegExp> re, Handle<JSRegExp> re,
Handle<String> pattern, Handle<String> pattern,
......
...@@ -5,6 +5,7 @@ ...@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_H_ #ifndef V8_REGEXP_REGEXP_H_
#define V8_REGEXP_REGEXP_H_ #define V8_REGEXP_REGEXP_H_
#include "src/common/assert-scope.h"
#include "src/handles/handles.h" #include "src/handles/handles.h"
#include "src/regexp/regexp-error.h" #include "src/regexp/regexp-error.h"
#include "src/regexp/regexp-flags.h" #include "src/regexp/regexp-flags.h"
...@@ -69,6 +70,14 @@ class RegExp final : public AllStatic { ...@@ -69,6 +70,14 @@ class RegExp final : public AllStatic {
// Whether the irregexp engine generates interpreter bytecode. // Whether the irregexp engine generates interpreter bytecode.
static bool CanGenerateBytecode(); static bool CanGenerateBytecode();
// Verify the given pattern, i.e. check that parsing succeeds. If
// verification fails, `error_message_out` is set.
template <class CharT>
static bool VerifySyntax(Zone* zone, uintptr_t stack_limit,
const CharT* input, int input_length,
RegExpFlags flags, const char** error_message_out,
const DisallowGarbageCollection& no_gc);
// Parses the RegExp pattern and prepares the JSRegExp object with // Parses the RegExp pattern and prepares the JSRegExp object with
// generic data and choice of implementation - as well as what // generic data and choice of implementation - as well as what
// the implementation wants to store in the data field. // the implementation wants to store in the data field.
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --validate-asm --lazy-inner-functions
function outer() {
"use asm";
function inner() {
/f(/
}
}
outer();
...@@ -496,7 +496,7 @@ test(function() { ...@@ -496,7 +496,7 @@ test(function() {
// kMalformedRegExp // kMalformedRegExp
test(function() { test(function() {
/(/.test("a"); new Function('/(/.test("a");');
}, "Invalid regular expression: /(/: Unterminated group", SyntaxError); }, "Invalid regular expression: /(/: Unterminated group", SyntaxError);
// kParenthesisInArgString // kParenthesisInArgString
......
...@@ -280,14 +280,8 @@ re.compile(void 0); ...@@ -280,14 +280,8 @@ re.compile(void 0);
assertEquals('/(?:)/', re.toString()); assertEquals('/(?:)/', re.toString());
// Check for lazy RegExp literal creation // Check for early syntax errors.
function lazyLiteral(doit) { assertThrows("/foo(/gi");
if (doit) return "".replace(/foo(/gi, "");
return true;
}
assertTrue(lazyLiteral(false));
assertThrows("lazyLiteral(true)");
// Check $01 and $10 // Check $01 and $10
re = new RegExp("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)"); re = new RegExp("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)");
......
...@@ -5,7 +5,8 @@ ...@@ -5,7 +5,8 @@
Error.prepareStackTrace = function (a,b) { return b; }; Error.prepareStackTrace = function (a,b) { return b; };
try { try {
/(invalid regexp/; eval("/(invalid regexp/;");
assertUnreachable();
} catch (e) { } catch (e) {
assertEquals("[object global]", e.stack[0].getThis().toString()); assertEquals("[object global]", e.stack[0].getThis().toString());
} }
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment