Commit 2193f691 authored by Leszek Swirski's avatar Leszek Swirski Committed by Commit Bot

Revert "[regexp] Rewrite error handling"

This reverts commit e80ca24c.

Reason for revert: Causes failures in the fast/regex/non-pattern-characters.html Blink web test (https://ci.chromium.org/p/v8/builders/ci/V8%20Blink%20Linux/3679)

Original change's description:
> [regexp] Rewrite error handling
> 
> This patch modifies irregexp's error handling. Instead of representing
> errors as C strings, they are represented as an enumeration value
> (RegExpError), and only converted to strings when throwing the error
> object in regexp.cc. This makes it significantly easier to integrate
> into SpiderMonkey. A few notes:
> 
> 1. Depending on whether the stack overflows during parsing or
>    analysis, the stack overflow message can vary ("Stack overflow" or
>    "Maximum call stack size exceeded"). I kept that behaviour in this
>    patch, under the assumption that stack overflow messages are
>    (sadly) the sorts of things that real world code ends up depending
>    on.
> 
> 2. Depending on the point in code where the error was identified,
>    invalid unicode escapes could be reported as "Invalid Unicode
>    escape", "Invalid unicode escape", or "Invalid Unicode escape
>    sequence". I fervently hope that nobody depends on the specific
>    wording of a syntax error, so I standardized on the first one. (It
>    was both the most common, and the most consistent with other
>    "Invalid X escape" messages.)
> 
> 3. In addition to changing the representation, this patch also adds an
>    error_pos field to RegExpParser and RegExpCompileData, which stores
>    the position at which an error occurred. This is used by
>    SpiderMonkey to provide more helpful messages about where a syntax
>    error occurred in large regular expressions.
> 
> 4. This model is closer to V8's existing MessageTemplate
>    infrastructure. I considered trying to integrate it more closely
>    with MessageTemplate, but since one of our stated goals for this
>    project was to make it easier to use irregexp outside of V8, I
>    decided to hold off.
> 
> R=​jgruber@chromium.org
> 
> Bug: v8:10303
> Change-Id: I62605fd2def2fc539f38a7e0eefa04d36e14bbde
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2091863
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#66784}

TBR=jgruber@chromium.org,iireland@mozilla.com

Change-Id: I9247635f3c5b17c943b9c4abaf82ebe7b2de165e
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:10303
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2108550Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66786}
parent 38c3bd48
......@@ -2833,8 +2833,6 @@ v8_source_set("v8_base_without_compiler") {
"src/regexp/regexp-compiler.h",
"src/regexp/regexp-dotprinter.cc",
"src/regexp/regexp-dotprinter.h",
"src/regexp/regexp-error.cc",
"src/regexp/regexp-error.h",
"src/regexp/regexp-interpreter.cc",
"src/regexp/regexp-interpreter.h",
"src/regexp/regexp-macro-assembler-arch.h",
......
......@@ -3594,9 +3594,7 @@ template <typename... Propagators>
class Analysis : public NodeVisitor {
public:
Analysis(Isolate* isolate, bool is_one_byte)
: isolate_(isolate),
is_one_byte_(is_one_byte),
error_(RegExpError::kNone) {}
: isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
void EnsureAnalyzed(RegExpNode* that) {
StackLimitCheck check(isolate());
......@@ -3604,7 +3602,7 @@ class Analysis : public NodeVisitor {
if (FLAG_correctness_fuzzer_suppressions) {
FATAL("Analysis: Aborting on stack overflow");
}
fail(RegExpError::kAnalysisStackOverflow);
fail("Stack overflow");
return;
}
if (that->info()->been_analyzed || that->info()->being_analyzed) return;
......@@ -3614,12 +3612,12 @@ class Analysis : public NodeVisitor {
that->info()->been_analyzed = true;
}
bool has_failed() { return error_ != RegExpError::kNone; }
RegExpError error() {
DCHECK(error_ != RegExpError::kNone);
return error_;
bool has_failed() { return error_message_ != nullptr; }
const char* error_message() {
DCHECK(error_message_ != nullptr);
return error_message_;
}
void fail(RegExpError error) { error_ = error; }
void fail(const char* error_message) { error_message_ = error_message; }
Isolate* isolate() const { return isolate_; }
......@@ -3704,19 +3702,19 @@ class Analysis : public NodeVisitor {
private:
Isolate* isolate_;
bool is_one_byte_;
RegExpError error_;
const char* error_message_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
RegExpNode* node) {
Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(isolate,
is_one_byte);
DCHECK_EQ(node->info()->been_analyzed, false);
analysis.EnsureAnalyzed(node);
DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone);
return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
DCHECK_IMPLIES(analysis.has_failed(), analysis.error_message() != nullptr);
return analysis.has_failed() ? analysis.error_message() : nullptr;
}
void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
......
......@@ -423,7 +423,10 @@ struct PreloadState {
// Analysis performs assertion propagation and computes eats_at_least_ values.
// See the comments on AssertionPropagator and EatsAtLeastPropagator for more
// details.
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
//
// This method returns nullptr on success or a null-terminated failure message
// on failure.
const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
class FrequencyCollator {
public:
......@@ -500,17 +503,18 @@ class RegExpCompiler {
}
struct CompilationResult final {
explicit CompilationResult(RegExpError err) : error(err) {}
explicit CompilationResult(const char* error_message)
: error_message(error_message) {}
CompilationResult(Object code, int registers)
: code(code), num_registers(registers) {}
static CompilationResult RegExpTooBig() {
return CompilationResult(RegExpError::kTooLarge);
return CompilationResult("RegExp too big");
}
bool Succeeded() const { return error == RegExpError::kNone; }
bool Succeeded() const { return error_message == nullptr; }
const RegExpError error = RegExpError::kNone;
const char* const error_message = nullptr;
Object code;
int num_registers = 0;
};
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/regexp/regexp-error.h"
namespace v8 {
namespace internal {
const char* kRegExpErrorStrings[] = {
#define TEMPLATE(NAME, STRING) STRING,
REGEXP_ERROR_MESSAGES(TEMPLATE)
#undef TEMPLATE
};
const char* RegExpErrorString(RegExpError error) {
DCHECK_LT(error, RegExpError::NumErrors);
return kRegExpErrorStrings[static_cast<int>(error)];
}
} // namespace internal
} // namespace v8
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_REGEXP_ERROR_H_
#define V8_REGEXP_REGEXP_ERROR_H_
#include "src/base/logging.h"
#include "src/base/macros.h"
namespace v8 {
namespace internal {
#define REGEXP_ERROR_MESSAGES(T) \
T(None, "") \
T(StackOverflow, "Maximum call stack size exceeded") \
T(AnalysisStackOverflow, "Stack overflow") \
T(TooLarge, "Regular expression too large") \
T(UnterminatedGroup, "Unterminated group") \
T(UnmatchedParen, "Unmatched ')'") \
T(EscapeAtEndOfPattern, "\\ at end of pattern") \
T(InvalidPropertyName, "Invalid property name") \
T(InvalidEscape, "Invalid escape") \
T(InvalidDecimalEscape, "Invalid decimal escape") \
T(InvalidUnicodeEscape, "Invalid Unicode escape") \
T(NothingToRepeat, "Nothing to repeat") \
T(LoneQuantifierBrackets, "Lone quantifier brackets") \
T(RangeOutOfOrder, "Numbers out of order in {} quantifier") \
T(IncompleteQuantifier, "Incomplete quantifier") \
T(InvalidQuantifier, "Invalid quantifier") \
T(InvalidGroup, "Invalid group") \
T(MultipleFlagDashes, "Multiple dashes in flag group") \
T(RepeatedFlag, "Repeated flag in flag group") \
T(InvalidFlagGroup, "Invalid flag group") \
T(TooManyCaptures, "Too many captures") \
T(InvalidCaptureGroupName, "Invalid capture group name") \
T(DuplicateCaptureGroupName, "Duplicate capture group name") \
T(InvalidNamedReference, "Invalid named reference") \
T(InvalidNamedCaptureReference, "Invalid named capture referenced") \
T(InvalidClassEscape, "Invalid class escape") \
T(InvalidClassPropertyName, "Invalid property name in character class") \
T(InvalidCharacterClass, "Invalid character class") \
T(UnterminatedCharacterClass, "Unterminated character class") \
T(OutOfOrderCharacterClass, "Range out of order in character class")
enum class RegExpError : uint32_t {
#define TEMPLATE(NAME, STRING) k##NAME,
REGEXP_ERROR_MESSAGES(TEMPLATE)
#undef TEMPLATE
NumErrors
};
V8_EXPORT_PRIVATE const char* RegExpErrorString(RegExpError error);
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_REGEXP_ERROR_H_
......@@ -24,10 +24,11 @@
namespace v8 {
namespace internal {
RegExpParser::RegExpParser(FlatStringReader* in, JSRegExp::Flags flags,
Isolate* isolate, Zone* zone)
RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
JSRegExp::Flags flags, Isolate* isolate, Zone* zone)
: isolate_(isolate),
zone_(zone),
error_(error),
captures_(nullptr),
named_captures_(nullptr),
named_back_references_(nullptr),
......@@ -80,12 +81,13 @@ void RegExpParser::Advance() {
if (FLAG_correctness_fuzzer_suppressions) {
FATAL("Aborting on stack overflow");
}
ReportError(RegExpError::kStackOverflow);
ReportError(CStrVector(
MessageFormatter::TemplateString(MessageTemplate::kStackOverflow)));
} else if (zone()->excess_allocation()) {
if (FLAG_correctness_fuzzer_suppressions) {
FATAL("Aborting on excess zone allocation");
}
ReportError(RegExpError::kTooLarge);
ReportError(CStrVector("Regular expression too large"));
} else {
current_ = ReadNext<true>();
}
......@@ -137,12 +139,15 @@ bool RegExpParser::IsSyntaxCharacterOrSlash(uc32 c) {
return false;
}
RegExpTree* RegExpParser::ReportError(RegExpError error) {
RegExpTree* RegExpParser::ReportError(Vector<const char> message) {
if (failed_) return nullptr; // Do not overwrite any existing error.
failed_ = true;
error_ = error;
error_pos_ = position();
// Zip to the end to make sure no more input is read.
*error_ = isolate()
->factory()
->NewStringFromOneByte(Vector<const uint8_t>::cast(message))
.ToHandleChecked();
// Zip to the end to make sure the no more input is read.
current_ = kEndMarker;
next_pos_ = in()->length();
return nullptr;
......@@ -189,14 +194,14 @@ RegExpTree* RegExpParser::ParseDisjunction() {
case kEndMarker:
if (state->IsSubexpression()) {
// Inside a parenthesized group when hitting end of input.
return ReportError(RegExpError::kUnterminatedGroup);
return ReportError(CStrVector("Unterminated group"));
}
DCHECK_EQ(INITIAL, state->group_type());
// Parsing completed successfully.
return builder->ToRegExp();
case ')': {
if (!state->IsSubexpression()) {
return ReportError(RegExpError::kUnmatchedParen);
return ReportError(CStrVector("Unmatched ')'"));
}
DCHECK_NE(INITIAL, state->group_type());
......@@ -247,7 +252,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
case '*':
case '+':
case '?':
return ReportError(RegExpError::kNothingToRepeat);
return ReportError(CStrVector("Nothing to repeat"));
case '^': {
Advance();
if (builder->multiline()) {
......@@ -302,7 +307,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
case '\\':
switch (Next()) {
case kEndMarker:
return ReportError(RegExpError::kEscapeAtEndOfPattern);
return ReportError(CStrVector("\\ at end of pattern"));
case 'b':
Advance(2);
builder->AddAssertion(new (zone()) RegExpAssertion(
......@@ -359,7 +364,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
}
}
return ReportError(RegExpError::kInvalidPropertyName);
return ReportError(CStrVector("Invalid property name"));
} else {
builder->AddCharacter(p);
}
......@@ -395,7 +400,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
// With /u, no identity escapes except for syntax characters
// are allowed. Otherwise, all identity escapes are allowed.
if (unicode()) {
return ReportError(RegExpError::kInvalidEscape);
return ReportError(CStrVector("Invalid escape"));
}
uc32 first_digit = Next();
if (first_digit == '8' || first_digit == '9') {
......@@ -409,7 +414,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
Advance();
if (unicode() && Next() >= '0' && Next() <= '9') {
// With /u, decimal escape with leading 0 are not parsed as octal.
return ReportError(RegExpError::kInvalidDecimalEscape);
return ReportError(CStrVector("Invalid decimal escape"));
}
uc32 octal = ParseOctalLiteral();
builder->AddCharacter(octal);
......@@ -450,7 +455,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
// ES#prod-annexB-ExtendedPatternCharacter
if (unicode()) {
// With /u, invalid escapes are not treated as identity escapes.
return ReportError(RegExpError::kInvalidUnicodeEscape);
return ReportError(CStrVector("Invalid unicode escape"));
}
builder->AddCharacter('\\');
} else {
......@@ -468,7 +473,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
builder->AddCharacter('x');
} else {
// With /u, invalid escapes are not treated as identity escapes.
return ReportError(RegExpError::kInvalidEscape);
return ReportError(CStrVector("Invalid escape"));
}
break;
}
......@@ -481,7 +486,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
builder->AddCharacter('u');
} else {
// With /u, invalid escapes are not treated as identity escapes.
return ReportError(RegExpError::kInvalidUnicodeEscape);
return ReportError(CStrVector("Invalid Unicode escape"));
}
break;
}
......@@ -505,7 +510,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
builder->AddCharacter(current());
Advance();
} else {
return ReportError(RegExpError::kInvalidEscape);
return ReportError(CStrVector("Invalid escape"));
}
break;
}
......@@ -513,13 +518,13 @@ RegExpTree* RegExpParser::ParseDisjunction() {
case '{': {
int dummy;
bool parsed = ParseIntervalQuantifier(&dummy, &dummy CHECK_FAILED);
if (parsed) return ReportError(RegExpError::kNothingToRepeat);
if (parsed) return ReportError(CStrVector("Nothing to repeat"));
V8_FALLTHROUGH;
}
case '}':
case ']':
if (unicode()) {
return ReportError(RegExpError::kLoneQuantifierBrackets);
return ReportError(CStrVector("Lone quantifier brackets"));
}
V8_FALLTHROUGH;
default:
......@@ -554,12 +559,13 @@ RegExpTree* RegExpParser::ParseDisjunction() {
case '{':
if (ParseIntervalQuantifier(&min, &max)) {
if (max < min) {
return ReportError(RegExpError::kRangeOutOfOrder);
return ReportError(
CStrVector("numbers out of order in {} quantifier"));
}
break;
} else if (unicode()) {
// With /u, incomplete quantifiers are not allowed.
return ReportError(RegExpError::kIncompleteQuantifier);
return ReportError(CStrVector("Incomplete quantifier"));
}
continue;
default:
......@@ -575,7 +581,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
Advance();
}
if (!builder->AddQuantifierToAtom(min, max, quantifier_type)) {
return ReportError(RegExpError::kInvalidQuantifier);
return ReportError(CStrVector("Invalid quantifier"));
}
}
}
......@@ -610,7 +616,7 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
case 's':
case 'm': {
if (!FLAG_regexp_mode_modifiers) {
ReportError(RegExpError::kInvalidGroup);
ReportError(CStrVector("Invalid group"));
return nullptr;
}
Advance();
......@@ -619,7 +625,7 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
switch (current()) {
case '-':
if (!flags_sense) {
ReportError(RegExpError::kMultipleFlagDashes);
ReportError(CStrVector("Multiple dashes in flag group"));
return nullptr;
}
flags_sense = false;
......@@ -633,7 +639,7 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
if (current() == 'm') bit = JSRegExp::kMultiline;
if (current() == 's') bit = JSRegExp::kDotAll;
if (((switch_on | switch_off) & bit) != 0) {
ReportError(RegExpError::kRepeatedFlag);
ReportError(CStrVector("Repeated flag in flag group"));
return nullptr;
}
if (flags_sense) {
......@@ -661,7 +667,7 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
subexpr_type = GROUPING; // Will break us out of the outer loop.
continue;
default:
ReportError(RegExpError::kInvalidFlagGroup);
ReportError(CStrVector("Invalid flag group"));
return nullptr;
}
}
......@@ -685,13 +691,13 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
Advance();
break;
default:
ReportError(RegExpError::kInvalidGroup);
ReportError(CStrVector("Invalid group"));
return nullptr;
}
}
if (subexpr_type == CAPTURE) {
if (captures_started_ >= JSRegExp::kMaxCaptures) {
ReportError(RegExpError::kTooManyCaptures);
ReportError(CStrVector("Too many captures"));
return nullptr;
}
captures_started_++;
......@@ -840,20 +846,20 @@ const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {
if (c == '\\' && current() == 'u') {
Advance();
if (!ParseUnicodeEscape(&c)) {
ReportError(RegExpError::kInvalidUnicodeEscape);
ReportError(CStrVector("Invalid Unicode escape sequence"));
return nullptr;
}
}
// The backslash char is misclassified as both ID_Start and ID_Continue.
if (c == '\\') {
ReportError(RegExpError::kInvalidCaptureGroupName);
ReportError(CStrVector("Invalid capture group name"));
return nullptr;
}
if (at_start) {
if (!IsIdentifierStart(c)) {
ReportError(RegExpError::kInvalidCaptureGroupName);
ReportError(CStrVector("Invalid capture group name"));
return nullptr;
}
push_code_unit(name, c);
......@@ -864,7 +870,7 @@ const ZoneVector<uc16>* RegExpParser::ParseCaptureGroupName() {
} else if (IsIdentifierPart(c)) {
push_code_unit(name, c);
} else {
ReportError(RegExpError::kInvalidCaptureGroupName);
ReportError(CStrVector("Invalid capture group name"));
return nullptr;
}
}
......@@ -891,7 +897,7 @@ bool RegExpParser::CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name,
const auto& named_capture_it = named_captures_->find(capture);
if (named_capture_it != named_captures_->end()) {
ReportError(RegExpError::kDuplicateCaptureGroupName);
ReportError(CStrVector("Duplicate capture group name"));
return false;
}
}
......@@ -905,7 +911,7 @@ bool RegExpParser::ParseNamedBackReference(RegExpBuilder* builder,
RegExpParserState* state) {
// The parser is assumed to be on the '<' in \k<name>.
if (current() != '<') {
ReportError(RegExpError::kInvalidNamedReference);
ReportError(CStrVector("Invalid named reference"));
return false;
}
......@@ -938,7 +944,7 @@ void RegExpParser::PatchNamedBackReferences() {
if (named_back_references_ == nullptr) return;
if (named_captures_ == nullptr) {
ReportError(RegExpError::kInvalidNamedCaptureReference);
ReportError(CStrVector("Invalid named capture referenced"));
return;
}
......@@ -959,7 +965,7 @@ void RegExpParser::PatchNamedBackReferences() {
if (capture_it != named_captures_->end()) {
index = (*capture_it)->index();
} else {
ReportError(RegExpError::kInvalidNamedCaptureReference);
ReportError(CStrVector("Invalid named capture referenced"));
return;
}
......@@ -1600,7 +1606,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
}
if (unicode()) {
// With /u, invalid escapes are not treated as identity escapes.
ReportError(RegExpError::kInvalidClassEscape);
ReportError(CStrVector("Invalid class escape"));
return 0;
}
if ((controlLetter >= '0' && controlLetter <= '9') ||
......@@ -1633,7 +1639,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
// ES#prod-annexB-LegacyOctalEscapeSequence
if (unicode()) {
// With /u, decimal escape is not interpreted as octal character code.
ReportError(RegExpError::kInvalidClassEscape);
ReportError(CStrVector("Invalid class escape"));
return 0;
}
return ParseOctalLiteral();
......@@ -1643,7 +1649,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
if (ParseHexEscape(2, &value)) return value;
if (unicode()) {
// With /u, invalid escapes are not treated as identity escapes.
ReportError(RegExpError::kInvalidEscape);
ReportError(CStrVector("Invalid escape"));
return 0;
}
// If \x is not followed by a two-digit hexadecimal, treat it
......@@ -1656,7 +1662,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
if (ParseUnicodeEscape(&value)) return value;
if (unicode()) {
// With /u, invalid escapes are not treated as identity escapes.
ReportError(RegExpError::kInvalidUnicodeEscape);
ReportError(CStrVector("Invalid unicode escape"));
return 0;
}
// If \u is not followed by a two-digit hexadecimal, treat it
......@@ -1671,7 +1677,7 @@ uc32 RegExpParser::ParseClassCharacterEscape() {
Advance();
return result;
}
ReportError(RegExpError::kInvalidEscape);
ReportError(CStrVector("Invalid escape"));
return 0;
}
}
......@@ -1698,7 +1704,7 @@ void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
return;
}
case kEndMarker:
ReportError(RegExpError::kEscapeAtEndOfPattern);
ReportError(CStrVector("\\ at end of pattern"));
return;
case 'p':
case 'P':
......@@ -1709,7 +1715,7 @@ void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
ZoneVector<char> name_2(zone);
if (!ParsePropertyClassName(&name_1, &name_2) ||
!AddPropertyClassRange(ranges, negate, name_1, name_2)) {
ReportError(RegExpError::kInvalidClassPropertyName);
ReportError(CStrVector("Invalid property name in character class"));
}
*is_class_escape = true;
return;
......@@ -1728,6 +1734,10 @@ void RegExpParser::ParseClassEscape(ZoneList<CharacterRange>* ranges,
}
RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
static const char* kUnterminated = "Unterminated character class";
static const char* kRangeInvalid = "Invalid character class";
static const char* kRangeOutOfOrder = "Range out of order in character class";
DCHECK_EQ(current(), '[');
Advance();
bool is_negated = false;
......@@ -1760,7 +1770,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
// Either end is an escaped character class. Treat the '-' verbatim.
if (unicode()) {
// ES2015 21.2.2.15.1 step 1.
return ReportError(RegExpError::kInvalidCharacterClass);
return ReportError(CStrVector(kRangeInvalid));
}
if (!is_class_1) ranges->Add(CharacterRange::Singleton(char_1), zone());
ranges->Add(CharacterRange::Singleton('-'), zone());
......@@ -1769,7 +1779,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
}
// ES2015 21.2.2.15.1 step 6.
if (char_1 > char_2) {
return ReportError(RegExpError::kOutOfOrderCharacterClass);
return ReportError(CStrVector(kRangeOutOfOrder));
}
ranges->Add(CharacterRange::Range(char_1, char_2), zone());
} else {
......@@ -1777,7 +1787,7 @@ RegExpTree* RegExpParser::ParseCharacterClass(const RegExpBuilder* builder) {
}
}
if (!has_more()) {
return ReportError(RegExpError::kUnterminatedCharacterClass);
return ReportError(CStrVector(kUnterminated));
}
Advance();
RegExpCharacterClass::CharacterClassFlags character_class_flags;
......@@ -1794,16 +1804,14 @@ bool RegExpParser::ParseRegExp(Isolate* isolate, Zone* zone,
FlatStringReader* input, JSRegExp::Flags flags,
RegExpCompileData* result) {
DCHECK(result != nullptr);
RegExpParser parser(input, flags, isolate, zone);
RegExpParser parser(input, &result->error, flags, isolate, zone);
RegExpTree* tree = parser.ParsePattern();
if (parser.failed()) {
DCHECK(tree == nullptr);
DCHECK(parser.error_ != RegExpError::kNone);
result->error = parser.error_;
result->error_pos = parser.error_pos_;
DCHECK(!result->error.is_null());
} else {
DCHECK(tree != nullptr);
DCHECK(parser.error_ == RegExpError::kNone);
DCHECK(result->error.is_null());
if (FLAG_trace_regexp_parser) {
StdoutStream os;
tree->Print(os, zone);
......
......@@ -8,7 +8,6 @@
#include "src/objects/js-regexp.h"
#include "src/objects/objects.h"
#include "src/regexp/regexp-ast.h"
#include "src/regexp/regexp-error.h"
#include "src/zone/zone.h"
namespace v8 {
......@@ -154,8 +153,8 @@ class RegExpBuilder : public ZoneObject {
class V8_EXPORT_PRIVATE RegExpParser {
public:
RegExpParser(FlatStringReader* in, JSRegExp::Flags flags, Isolate* isolate,
Zone* zone);
RegExpParser(FlatStringReader* in, Handle<String>* error,
JSRegExp::Flags flags, Isolate* isolate, Zone* zone);
static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
JSRegExp::Flags flags, RegExpCompileData* result);
......@@ -203,7 +202,7 @@ class V8_EXPORT_PRIVATE RegExpParser {
char ParseClassEscape();
RegExpTree* ReportError(RegExpError error);
RegExpTree* ReportError(Vector<const char> message);
void Advance();
void Advance(int dist);
void Reset(int pos);
......@@ -336,8 +335,7 @@ class V8_EXPORT_PRIVATE RegExpParser {
Isolate* isolate_;
Zone* zone_;
RegExpError error_ = RegExpError::kNone;
int error_pos_ = 0;
Handle<String>* error_;
ZoneList<RegExpCapture*>* captures_;
ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
ZoneList<RegExpBackReference*>* named_back_references_;
......
......@@ -92,15 +92,9 @@ class RegExpImpl final : public AllStatic {
};
V8_WARN_UNUSED_RESULT
static inline MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> pattern,
RegExpError error) {
Vector<const char> error_data = CStrVector(RegExpErrorString(error));
Handle<String> error_text =
isolate->factory()
->NewStringFromOneByte(Vector<const uint8_t>::cast(error_data))
.ToHandleChecked();
static inline MaybeHandle<Object> ThrowRegExpException(
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
Handle<String> error_text) {
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kMalformedRegExp, pattern, error_text),
......@@ -108,7 +102,7 @@ static inline MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
}
inline void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
RegExpError error_text) {
Handle<String> error_text) {
USE(ThrowRegExpException(isolate, re, Handle<String>(re->Pattern(), isolate),
error_text));
}
......@@ -414,7 +408,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject,
is_one_byte, re->BacktrackLimit());
if (!compilation_succeeded) {
DCHECK(compile_data.error != RegExpError::kNone);
DCHECK(!compile_data.error.is_null());
ThrowRegExpException(isolate, re, compile_data.error);
return false;
}
......@@ -747,7 +741,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
Handle<String> sample_subject, bool is_one_byte,
uint32_t backtrack_limit) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
data->error = RegExpError::kTooLarge;
data->error =
isolate->factory()->NewStringFromAsciiChecked("RegExp too big");
return false;
}
......@@ -815,8 +810,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
data->error = AnalyzeRegExp(isolate, is_one_byte, node);
if (data->error != RegExpError::kNone) {
if (const char* error_message = AnalyzeRegExp(isolate, is_one_byte, node)) {
data->error = isolate->factory()->NewStringFromAsciiChecked(error_message);
return false;
}
......@@ -918,12 +913,13 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
}
}
if (result.error != RegExpError::kNone) {
if (result.error_message != nullptr) {
if (FLAG_correctness_fuzzer_suppressions &&
result.error == RegExpError::kStackOverflow) {
strncmp(result.error_message, "Stack overflow", 15) == 0) {
FATAL("Aborting on stack overflow");
}
data->error = result.error;
data->error =
isolate->factory()->NewStringFromAsciiChecked(result.error_message);
}
data->code = result.code;
......
......@@ -6,7 +6,6 @@
#define V8_REGEXP_REGEXP_H_
#include "src/objects/js-regexp.h"
#include "src/regexp/regexp-error.h"
namespace v8 {
namespace internal {
......@@ -43,11 +42,7 @@ struct RegExpCompileData {
// The error message. Only used if an error occurred during parsing or
// compilation.
RegExpError error = RegExpError::kNone;
// The position at which the error was detected. Only used if an
// error occurred.
int error_pos = 0;
Handle<String> error;
// The number of capture groups, without the global capture \0.
int capture_count = 0;
......
......@@ -76,7 +76,7 @@ static void CheckParseEq(const char* input, const char* expected,
CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
&reader, flags, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error == RegExpError::kNone);
CHECK(result.error.is_null());
std::ostringstream os;
result.tree->Print(os, &zone);
if (strcmp(expected, os.str().c_str()) != 0) {
......@@ -94,7 +94,7 @@ static bool CheckSimple(const char* input) {
CHECK(v8::internal::RegExpParser::ParseRegExp(
CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error == RegExpError::kNone);
CHECK(result.error.is_null());
return result.simple;
}
......@@ -112,7 +112,7 @@ static MinMaxPair CheckMinMaxMatch(const char* input) {
CHECK(v8::internal::RegExpParser::ParseRegExp(
CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error == RegExpError::kNone);
CHECK(result.error.is_null());
int min_match = result.tree->min_match();
int max_match = result.tree->max_match();
MinMaxPair pair = { min_match, max_match };
......@@ -428,8 +428,9 @@ static void ExpectError(const char* input, const char* expected,
CHECK(!v8::internal::RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
&result));
CHECK_NULL(result.tree);
CHECK(result.error != RegExpError::kNone);
CHECK_EQ(0, strcmp(expected, RegExpErrorString(result.error)));
CHECK(!result.error.is_null());
std::unique_ptr<char[]> str = result.error->ToCString(ALLOW_NULLS);
CHECK_EQ(0, strcmp(expected, str.get()));
}
......@@ -467,7 +468,7 @@ TEST(Errors) {
ExpectError("\\k<a", kInvalidCaptureName, true);
const char* kDuplicateCaptureName = "Duplicate capture group name";
ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
const char* kInvalidUnicodeEscape = "Invalid Unicode escape";
const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment