Commit a56874d3 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[regexp] Early errors 🤯

This CL implements early SyntaxErrors for regular expressions. Early
errors are thrown when a malformed pattern is parsed, rather than when
the code first runs.

We do this by having the JS parser call into the regexp parser when
a regexp pattern is found. Regexps are expected to be relatively
rare, small, and cheap to parse - that's why we currently accept that
the regexp parser does unnecessary work (e.g. creating the AST
structures).

If needed, we can optimize in the future. Ideas:

- Split up the regexp parser to avoid useless work for syntax validation.
- Preserve parser results to avoid reparsing later.

Bug: v8:896
Change-Id: I3d1ec18c980ba94439576ac3764138552418b85d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3106647
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Reviewed-by: 's avatarPatrick Thier <pthier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76502}
parent 862391b9
......@@ -118,17 +118,17 @@ base::Optional<JSRegExp::Flags> JSRegExp::FlagsFromString(
// A longer flags string cannot be valid.
if (length > JSRegExp::kFlagCount) return {};
JSRegExp::Flags value;
RegExpFlags value;
FlatStringReader reader(isolate, String::Flatten(isolate, flags));
for (int i = 0; i < length; i++) {
base::Optional<JSRegExp::Flag> flag = JSRegExp::FlagFromChar(reader.Get(i));
base::Optional<RegExpFlag> flag = JSRegExp::FlagFromChar(reader.Get(i));
if (!flag.has_value()) return {};
if (value & flag.value()) return {}; // Duplicate.
value |= flag.value();
}
return value;
return JSRegExp::AsJSRegExpFlags(value);
}
// static
......
......@@ -48,11 +48,6 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
return static_cast<Flag>(f);
}
static constexpr base::Optional<Flag> AsOptionalJSRegExpFlag(
base::Optional<RegExpFlag> f) {
return f.has_value() ? base::Optional<Flag>{AsJSRegExpFlag(f.value())}
: base::Optional<Flag>{};
}
static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
return Flags{static_cast<int>(f)};
}
......@@ -60,10 +55,11 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
return RegExpFlags{static_cast<int>(f)};
}
static base::Optional<Flag> FlagFromChar(char c) {
base::Optional<Flag> f = AsOptionalJSRegExpFlag(TryRegExpFlagFromChar(c));
static base::Optional<RegExpFlag> FlagFromChar(char c) {
base::Optional<RegExpFlag> f = TryRegExpFlagFromChar(c);
if (!f.has_value()) return f;
if (f.value() == kLinear && !FLAG_enable_experimental_regexp_engine) {
if (f.value() == RegExpFlag::kLinear &&
!FLAG_enable_experimental_regexp_engine) {
return {};
}
return f;
......
......@@ -27,12 +27,15 @@
#include "src/parsing/parse-info.h"
#include "src/parsing/scanner.h"
#include "src/parsing/token.h"
#include "src/regexp/regexp.h"
#include "src/utils/pointer-with-payload.h"
#include "src/zone/zone-chunk-list.h"
namespace v8 {
namespace internal {
class PreParserIdentifier;
enum FunctionNameValidity {
kFunctionNameIsStrictReserved,
kSkipFunctionNameCheck,
......@@ -1074,22 +1077,24 @@ class ParserBase {
}
// Report syntax errors.
V8_NOINLINE void ReportMessage(MessageTemplate message) {
Scanner::Location source_location = scanner()->location();
impl()->ReportMessageAt(source_location, message,
static_cast<const char*>(nullptr));
template <typename... Ts>
V8_NOINLINE void ReportMessage(MessageTemplate message, const Ts&... args) {
ReportMessageAt(scanner()->location(), message, args...);
}
template <typename T>
V8_NOINLINE void ReportMessage(MessageTemplate message, T arg) {
Scanner::Location source_location = scanner()->location();
impl()->ReportMessageAt(source_location, message, arg);
template <typename... Ts>
V8_NOINLINE void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message, const Ts&... args) {
impl()->pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, args...);
scanner()->set_parser_error();
}
V8_NOINLINE void ReportMessageAt(Scanner::Location location,
MessageTemplate message) {
impl()->ReportMessageAt(location, message,
static_cast<const char*>(nullptr));
V8_NOINLINE void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message,
const PreParserIdentifier& arg0) {
ReportMessageAt(source_location, message,
impl()->PreParserIdentifierToAstRawString(arg0));
}
V8_NOINLINE void ReportUnexpectedToken(Token::Value token);
......@@ -1140,6 +1145,11 @@ class ParserBase {
ExpressionT ParsePropertyOrPrivatePropertyName();
const AstRawString* GetNextSymbolForRegExpLiteral() const {
return scanner()->NextSymbol(ast_value_factory());
}
bool ValidateRegExpLiteral(const AstRawString* pattern, RegExpFlags flags,
const char** error_message);
ExpressionT ParseRegExpLiteral();
ExpressionT ParseBindingPattern();
......@@ -1745,6 +1755,25 @@ ParserBase<Impl>::ParsePropertyOrPrivatePropertyName() {
return key;
}
template <typename Impl>
bool ParserBase<Impl>::ValidateRegExpLiteral(const AstRawString* pattern,
RegExpFlags flags,
const char** error_message) {
// TODO(jgruber): If already validated in the preparser, skip validation in
// the parser.
DisallowGarbageCollection no_gc;
const unsigned char* d = pattern->raw_data();
if (pattern->is_one_byte()) {
return RegExp::VerifySyntax(zone(), stack_limit(),
static_cast<const uint8_t*>(d),
pattern->length(), flags, error_message, no_gc);
} else {
return RegExp::VerifySyntax(zone(), stack_limit(),
reinterpret_cast<const uint16_t*>(d),
pattern->length(), flags, error_message, no_gc);
}
}
template <typename Impl>
typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() {
int pos = peek_position();
......@@ -1754,15 +1783,20 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() {
return impl()->FailureExpression();
}
IdentifierT js_pattern = impl()->GetNextSymbol();
Maybe<int> flags = scanner()->ScanRegExpFlags();
if (flags.IsNothing()) {
const AstRawString* js_pattern = GetNextSymbolForRegExpLiteral();
base::Optional<RegExpFlags> flags = scanner()->ScanRegExpFlags();
if (!flags.has_value()) {
Next();
ReportMessage(MessageTemplate::kMalformedRegExpFlags);
return impl()->FailureExpression();
}
Next();
return factory()->NewRegExpLiteral(js_pattern, flags.FromJust(), pos);
const char* error_message;
if (!ValidateRegExpLiteral(js_pattern, flags.value(), &error_message)) {
ReportMessage(MessageTemplate::kMalformedRegExp, js_pattern, error_message);
return impl()->FailureExpression();
}
return factory()->NewRegExpLiteral(js_pattern, flags.value(), pos);
}
template <typename Impl>
......
......@@ -701,25 +701,10 @@ class V8_EXPORT_PRIVATE Parser : public NON_EXPORTED_BASE(ParserBase<Parser>) {
return NewThrowError(Runtime::kNewTypeError, message, arg, pos);
}
// Reporting errors.
void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message, const char* arg = nullptr) {
pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, arg);
scanner_.set_parser_error();
}
// Dummy implementation. The parser should never have a unidentifiable
// error.
V8_INLINE void ReportUnidentifiableError() { UNREACHABLE(); }
void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message, const AstRawString* arg) {
pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, arg);
scanner_.set_parser_error();
}
const AstRawString* GetRawNameFromIdentifier(const AstRawString* arg) {
return arg;
}
......
......@@ -19,28 +19,28 @@ namespace internal {
void PendingCompilationErrorHandler::MessageDetails::SetString(
Handle<String> string, Isolate* isolate) {
DCHECK_NE(type_, kMainThreadHandle);
type_ = kMainThreadHandle;
arg_handle_ = string;
DCHECK_NE(arg0_type_, kMainThreadHandle);
arg0_type_ = kMainThreadHandle;
arg0_handle_ = string;
}
void PendingCompilationErrorHandler::MessageDetails::SetString(
Handle<String> string, LocalIsolate* isolate) {
DCHECK_NE(type_, kMainThreadHandle);
type_ = kMainThreadHandle;
arg_handle_ = isolate->heap()->NewPersistentHandle(string);
DCHECK_NE(arg0_type_, kMainThreadHandle);
arg0_type_ = kMainThreadHandle;
arg0_handle_ = isolate->heap()->NewPersistentHandle(string);
}
template <typename IsolateT>
void PendingCompilationErrorHandler::MessageDetails::Prepare(
IsolateT* isolate) {
switch (type_) {
switch (arg0_type_) {
case kAstRawString:
return SetString(arg_->string(), isolate);
return SetString(arg0_->string(), isolate);
case kNone:
case kConstCharString:
// We can delay allocation until ArgumentString(isolate).
// We can delay allocation until Arg0String(isolate).
// TODO(leszeks): We don't actually have to transfer this string, since
// it's a root.
return;
......@@ -52,22 +52,31 @@ void PendingCompilationErrorHandler::MessageDetails::Prepare(
}
}
Handle<String> PendingCompilationErrorHandler::MessageDetails::ArgumentString(
Handle<String> PendingCompilationErrorHandler::MessageDetails::Arg0String(
Isolate* isolate) const {
switch (type_) {
switch (arg0_type_) {
case kMainThreadHandle:
return arg_handle_;
return arg0_handle_;
case kNone:
return isolate->factory()->undefined_string();
case kConstCharString:
return isolate->factory()
->NewStringFromUtf8(base::CStrVector(char_arg_), AllocationType::kOld)
->NewStringFromUtf8(base::CStrVector(char_arg0_),
AllocationType::kOld)
.ToHandleChecked();
case kAstRawString:
UNREACHABLE();
}
}
Handle<String> PendingCompilationErrorHandler::MessageDetails::Arg1String(
Isolate* isolate) const {
if (arg1_ == nullptr) return Handle<String>::null();
return isolate->factory()
->NewStringFromUtf8(base::CStrVector(arg1_), AllocationType::kOld)
.ToHandleChecked();
}
MessageLocation PendingCompilationErrorHandler::MessageDetails::GetLocation(
Handle<Script> script) const {
return MessageLocation(script, start_position_, end_position_);
......@@ -93,6 +102,17 @@ void PendingCompilationErrorHandler::ReportMessageAt(int start_position,
error_details_ = MessageDetails(start_position, end_position, message, arg);
}
void PendingCompilationErrorHandler::ReportMessageAt(int start_position,
int end_position,
MessageTemplate message,
const AstRawString* arg0,
const char* arg1) {
if (has_pending_error_) return;
has_pending_error_ = true;
error_details_ =
MessageDetails(start_position, end_position, message, arg0, arg1);
}
void PendingCompilationErrorHandler::ReportWarningAt(int start_position,
int end_position,
MessageTemplate message,
......@@ -119,7 +139,8 @@ void PendingCompilationErrorHandler::ReportWarnings(
for (const MessageDetails& warning : warning_messages_) {
MessageLocation location = warning.GetLocation(script);
Handle<String> argument = warning.ArgumentString(isolate);
Handle<String> argument = warning.Arg0String(isolate);
DCHECK(warning.Arg1String(isolate).is_null()); // Only used for errors.
Handle<JSMessageObject> message =
MessageHandler::MakeMessageObject(isolate, warning.message(), &location,
argument, Handle<FixedArray>::null());
......@@ -160,12 +181,13 @@ void PendingCompilationErrorHandler::ThrowPendingError(
if (!has_pending_error_) return;
MessageLocation location = error_details_.GetLocation(script);
Handle<String> argument = error_details_.ArgumentString(isolate);
Handle<String> arg0 = error_details_.Arg0String(isolate);
Handle<String> arg1 = error_details_.Arg1String(isolate);
isolate->debug()->OnCompileError(script);
Factory* factory = isolate->factory();
Handle<JSObject> error =
factory->NewSyntaxError(error_details_.message(), argument);
factory->NewSyntaxError(error_details_.message(), arg0, arg1);
isolate->ThrowAt(error, &location);
}
......@@ -173,7 +195,8 @@ Handle<String> PendingCompilationErrorHandler::FormatErrorMessageForTest(
Isolate* isolate) {
error_details_.Prepare(isolate);
return MessageFormatter::Format(isolate, error_details_.message(),
error_details_.ArgumentString(isolate));
error_details_.Arg0String(isolate),
error_details_.Arg1String(isolate));
}
} // namespace internal
......
......@@ -39,6 +39,10 @@ class PendingCompilationErrorHandler {
void ReportMessageAt(int start_position, int end_position,
MessageTemplate message, const AstRawString* arg);
void ReportMessageAt(int start_position, int end_position,
MessageTemplate message, const AstRawString* arg0,
const char* arg1);
void ReportWarningAt(int start_position, int end_position,
MessageTemplate message, const char* arg = nullptr);
......@@ -86,23 +90,39 @@ class PendingCompilationErrorHandler {
: start_position_(-1),
end_position_(-1),
message_(MessageTemplate::kNone),
type_(kNone) {}
arg1_(nullptr),
arg0_type_(kNone) {}
MessageDetails(int start_position, int end_position,
MessageTemplate message, const AstRawString* arg)
: start_position_(start_position),
end_position_(end_position),
message_(message),
arg_(arg),
type_(arg ? kAstRawString : kNone) {}
arg0_(arg),
arg1_(nullptr),
arg0_type_(arg ? kAstRawString : kNone) {}
MessageDetails(int start_position, int end_position,
MessageTemplate message, const AstRawString* arg0,
const char* arg1)
: start_position_(start_position),
end_position_(end_position),
message_(message),
arg0_(arg0),
arg1_(arg1),
arg0_type_(kAstRawString) {
DCHECK_NOT_NULL(arg0);
DCHECK_NOT_NULL(arg1);
}
MessageDetails(int start_position, int end_position,
MessageTemplate message, const char* char_arg)
: start_position_(start_position),
end_position_(end_position),
message_(message),
char_arg_(char_arg),
type_(char_arg_ ? kConstCharString : kNone) {}
char_arg0_(char_arg),
arg1_(nullptr),
arg0_type_(char_arg0_ ? kConstCharString : kNone) {}
Handle<String> ArgumentString(Isolate* isolate) const;
Handle<String> Arg0String(Isolate* isolate) const;
Handle<String> Arg1String(Isolate* isolate) const;
MessageLocation GetLocation(Handle<Script> script) const;
MessageTemplate message() const { return message_; }
......@@ -119,11 +139,14 @@ class PendingCompilationErrorHandler {
int end_position_;
MessageTemplate message_;
union {
const AstRawString* arg_;
const char* char_arg_;
Handle<String> arg_handle_;
const AstRawString* arg0_;
const char* char_arg0_;
Handle<String> arg0_handle_;
};
Type type_;
// TODO(jgruber): If we ever extend functionality of arg1, refactor it to
// be more consistent with arg0.
const char* arg1_;
Type arg0_type_;
};
void ThrowPendingError(Isolate* isolate, Handle<Script> script) const;
......
......@@ -537,7 +537,7 @@ class PreParserFactory {
PreParserExpression NewTheHoleLiteral() {
return PreParserExpression::Default();
}
PreParserExpression NewRegExpLiteral(const PreParserIdentifier& js_pattern,
PreParserExpression NewRegExpLiteral(const AstRawString* js_pattern,
int js_flags, int pos) {
return PreParserExpression::Default();
}
......@@ -1455,12 +1455,9 @@ class PreParser : public ParserBase<PreParser> {
return PreParserExpression::Default();
}
// Reporting errors.
void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message, const char* arg = nullptr) {
pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, arg);
scanner()->set_parser_error();
V8_INLINE const AstRawString* PreParserIdentifierToAstRawString(
const PreParserIdentifier& x) {
return x.string_;
}
V8_INLINE void ReportUnidentifiableError() {
......@@ -1468,19 +1465,6 @@ class PreParser : public ParserBase<PreParser> {
scanner()->set_parser_error();
}
V8_INLINE void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message,
const PreParserIdentifier& arg) {
ReportMessageAt(source_location, message, arg.string_);
}
void ReportMessageAt(Scanner::Location source_location,
MessageTemplate message, const AstRawString* arg) {
pending_error_handler()->ReportMessageAt(
source_location.beg_pos, source_location.end_pos, message, arg);
scanner()->set_parser_error();
}
const AstRawString* GetRawNameFromIdentifier(const PreParserIdentifier& arg) {
return arg.string_;
}
......
......@@ -978,9 +978,6 @@ bool Scanner::ScanRegExpPattern() {
// worrying whether the following characters are part of the escape
// or not, since any '/', '\\' or '[' is guaranteed to not be part
// of the escape sequence.
// TODO(896): At some point, parse RegExps more thoroughly to capture
// octal esacpes in strict mode.
} else { // Unescaped character.
if (c0_ == '[') in_character_class = true;
if (c0_ == ']') in_character_class = false;
......@@ -993,22 +990,21 @@ bool Scanner::ScanRegExpPattern() {
return true;
}
Maybe<int> Scanner::ScanRegExpFlags() {
base::Optional<RegExpFlags> Scanner::ScanRegExpFlags() {
DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
// Scan regular expression flags.
JSRegExp::Flags flags;
RegExpFlags flags;
while (IsIdentifierPart(c0_)) {
base::Optional<JSRegExp::Flags> maybe_flag = JSRegExp::FlagFromChar(c0_);
if (!maybe_flag.has_value()) return Nothing<int>();
JSRegExp::Flags flag = *maybe_flag;
if (flags & flag) return Nothing<int>();
base::Optional<RegExpFlag> maybe_flag = JSRegExp::FlagFromChar(c0_);
if (!maybe_flag.has_value()) return {};
RegExpFlag flag = maybe_flag.value();
if (flags & flag) return {};
Advance();
flags |= flag;
}
next().location.end_pos = source_pos();
return Just<int>(flags);
return flags;
}
const AstRawString* Scanner::CurrentSymbol(
......
......@@ -17,6 +17,7 @@
#include "src/parsing/literal-buffer.h"
#include "src/parsing/parse-info.h"
#include "src/parsing/token.h"
#include "src/regexp/regexp-flags.h"
#include "src/strings/char-predicates.h"
#include "src/strings/unicode.h"
#include "src/utils/allocation.h"
......@@ -398,7 +399,7 @@ class V8_EXPORT_PRIVATE Scanner {
// Returns true if a pattern is scanned.
bool ScanRegExpPattern();
// Scans the input as regular expression flags. Returns the flags on success.
Maybe<int> ScanRegExpFlags();
base::Optional<RegExpFlags> ScanRegExpFlags();
// Scans the input as a template literal
Token::Value ScanTemplateContinuation() {
......
......@@ -242,7 +242,7 @@ template <class CharT>
class RegExpParserImpl final {
private:
RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags,
Isolate* isolate, Zone* zone,
uintptr_t stack_limit, Zone* zone,
const DisallowGarbageCollection& no_gc);
bool Parse(RegExpCompileData* result);
......@@ -337,7 +337,6 @@ class RegExpParserImpl final {
// ScanForCaptures to look ahead at the remaining pattern.
bool HasNamedCaptures();
Isolate* isolate() { return isolate_; }
Zone* zone() const { return zone_; }
base::uc32 current() { return current_; }
......@@ -362,7 +361,6 @@ class RegExpParserImpl final {
};
const DisallowGarbageCollection no_gc_;
Isolate* const isolate_;
Zone* const zone_;
RegExpError error_ = RegExpError::kNone;
int error_pos_ = 0;
......@@ -382,19 +380,22 @@ class RegExpParserImpl final {
bool is_scanned_for_captures_;
bool has_named_captures_; // Only valid after we have scanned for captures.
bool failed_;
const uintptr_t stack_limit_;
friend bool RegExpParser::ParseRegExpFromHeapString(Isolate*, Zone*,
Handle<String>,
RegExpFlags,
RegExpCompileData*);
friend bool RegExpParser::VerifyRegExpSyntax<CharT>(
Zone*, uintptr_t, const CharT*, int, RegExpFlags, RegExpCompileData*,
const DisallowGarbageCollection&);
};
template <class CharT>
RegExpParserImpl<CharT>::RegExpParserImpl(
const CharT* input, int input_length, RegExpFlags flags, Isolate* isolate,
Zone* zone, const DisallowGarbageCollection& no_gc)
: isolate_(isolate),
zone_(zone),
const CharT* input, int input_length, RegExpFlags flags,
uintptr_t stack_limit, Zone* zone, const DisallowGarbageCollection& no_gc)
: zone_(zone),
captures_(nullptr),
named_captures_(nullptr),
named_back_references_(nullptr),
......@@ -410,7 +411,8 @@ RegExpParserImpl<CharT>::RegExpParserImpl(
contains_anchor_(false),
is_scanned_for_captures_(false),
has_named_captures_(false),
failed_(false) {
failed_(false),
stack_limit_(stack_limit) {
Advance();
}
......@@ -457,8 +459,7 @@ base::uc32 RegExpParserImpl<CharT>::Next() {
template <class CharT>
void RegExpParserImpl<CharT>::Advance() {
if (has_next()) {
StackLimitCheck check(isolate());
if (check.HasOverflowed()) {
if (GetCurrentStackPosition() < stack_limit_) {
if (FLAG_correctness_fuzzer_suppressions) {
FATAL("Aborting on stack overflow");
}
......@@ -2400,20 +2401,40 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
RegExpFlags flags,
RegExpCompileData* result) {
DisallowGarbageCollection no_gc;
uintptr_t stack_limit = isolate->stack_guard()->real_climit();
String::FlatContent content = input->GetFlatContent(no_gc);
if (content.IsOneByte()) {
base::Vector<const uint8_t> v = content.ToOneByteVector();
return RegExpParserImpl<uint8_t>{v.begin(), v.length(), flags,
isolate, zone, no_gc}
return RegExpParserImpl<uint8_t>{v.begin(), v.length(), flags,
stack_limit, zone, no_gc}
.Parse(result);
} else {
base::Vector<const base::uc16> v = content.ToUC16Vector();
return RegExpParserImpl<base::uc16>{v.begin(), v.length(), flags,
isolate, zone, no_gc}
return RegExpParserImpl<base::uc16>{v.begin(), v.length(), flags,
stack_limit, zone, no_gc}
.Parse(result);
}
}
// static
template <class CharT>
bool RegExpParser::VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit,
const CharT* input, int input_length,
RegExpFlags flags,
RegExpCompileData* result,
const DisallowGarbageCollection& no_gc) {
return RegExpParserImpl<CharT>{input, input_length, flags,
stack_limit, zone, no_gc}
.Parse(result);
}
template bool RegExpParser::VerifyRegExpSyntax<uint8_t>(
Zone*, uintptr_t, const uint8_t*, int, RegExpFlags, RegExpCompileData*,
const DisallowGarbageCollection&);
template bool RegExpParser::VerifyRegExpSyntax<base::uc16>(
Zone*, uintptr_t, const base::uc16*, int, RegExpFlags, RegExpCompileData*,
const DisallowGarbageCollection&);
// static
bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
Handle<String> input, RegExpFlags flags,
......
......@@ -23,6 +23,12 @@ class V8_EXPORT_PRIVATE RegExpParser : public AllStatic {
Handle<String> input, RegExpFlags flags,
RegExpCompileData* result);
template <class CharT>
static bool VerifyRegExpSyntax(Zone* zone, uintptr_t stack_limit,
const CharT* input, int input_length,
RegExpFlags flags, RegExpCompileData* result,
const DisallowGarbageCollection& no_gc);
// Used by the SpiderMonkey embedding of irregexp.
static bool VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
Handle<String> input, RegExpFlags flags,
......
......@@ -107,6 +107,27 @@ bool RegExp::CanGenerateBytecode() {
return FLAG_regexp_interpret_all || FLAG_regexp_tier_up;
}
// static
template <class CharT>
bool RegExp::VerifySyntax(Zone* zone, uintptr_t stack_limit, const CharT* input,
int input_length, RegExpFlags flags,
const char** error_message_out,
const DisallowGarbageCollection& no_gc) {
RegExpCompileData data;
bool pattern_is_valid = RegExpParser::VerifyRegExpSyntax(
zone, stack_limit, input, input_length, flags, &data, no_gc);
if (!pattern_is_valid) *error_message_out = RegExpErrorString(data.error);
return pattern_is_valid;
}
template bool RegExp::VerifySyntax<uint8_t>(Zone*, uintptr_t, const uint8_t*,
int, RegExpFlags,
const char** error_message_out,
const DisallowGarbageCollection&);
template bool RegExp::VerifySyntax<base::uc16>(
Zone*, uintptr_t, const base::uc16*, int, RegExpFlags,
const char** error_message_out, const DisallowGarbageCollection&);
MaybeHandle<Object> RegExp::ThrowRegExpException(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> pattern,
......
......@@ -5,6 +5,7 @@
#ifndef V8_REGEXP_REGEXP_H_
#define V8_REGEXP_REGEXP_H_
#include "src/common/assert-scope.h"
#include "src/handles/handles.h"
#include "src/regexp/regexp-error.h"
#include "src/regexp/regexp-flags.h"
......@@ -69,6 +70,14 @@ class RegExp final : public AllStatic {
// Whether the irregexp engine generates interpreter bytecode.
static bool CanGenerateBytecode();
// Verify the given pattern, i.e. check that parsing succeeds. If
// verification fails, `error_message_out` is set.
template <class CharT>
static bool VerifySyntax(Zone* zone, uintptr_t stack_limit,
const CharT* input, int input_length,
RegExpFlags flags, const char** error_message_out,
const DisallowGarbageCollection& no_gc);
// Parses the RegExp pattern and prepares the JSRegExp object with
// generic data and choice of implementation - as well as what
// the implementation wants to store in the data field.
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --validate-asm --lazy-inner-functions
function outer() {
"use asm";
function inner() {
/f(/
}
}
outer();
......@@ -496,7 +496,7 @@ test(function() {
// kMalformedRegExp
test(function() {
/(/.test("a");
new Function('/(/.test("a");');
}, "Invalid regular expression: /(/: Unterminated group", SyntaxError);
// kParenthesisInArgString
......
......@@ -280,14 +280,8 @@ re.compile(void 0);
assertEquals('/(?:)/', re.toString());
// Check for lazy RegExp literal creation
function lazyLiteral(doit) {
if (doit) return "".replace(/foo(/gi, "");
return true;
}
assertTrue(lazyLiteral(false));
assertThrows("lazyLiteral(true)");
// Check for early syntax errors.
assertThrows("/foo(/gi");
// Check $01 and $10
re = new RegExp("(.)(.)(.)(.)(.)(.)(.)(.)(.)(.)");
......
......@@ -5,7 +5,8 @@
Error.prepareStackTrace = function (a,b) { return b; };
try {
/(invalid regexp/;
eval("/(invalid regexp/;");
assertUnreachable();
} catch (e) {
assertEquals("[object global]", e.stack[0].getThis().toString());
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment