Commit 2193f691 authored by Leszek Swirski's avatar Leszek Swirski Committed by Commit Bot

Revert "[regexp] Rewrite error handling"

This reverts commit e80ca24c.

Reason for revert: Causes failures in the fast/regex/non-pattern-characters.html Blink web test (https://ci.chromium.org/p/v8/builders/ci/V8%20Blink%20Linux/3679)

Original change's description:
> [regexp] Rewrite error handling
> 
> This patch modifies irregexp's error handling. Instead of representing
> errors as C strings, they are represented as an enumeration value
> (RegExpError), and only converted to strings when throwing the error
> object in regexp.cc. This makes it significantly easier to integrate
> into SpiderMonkey. A few notes:
> 
> 1. Depending on whether the stack overflows during parsing or
>    analysis, the stack overflow message can vary ("Stack overflow" or
>    "Maximum call stack size exceeded"). I kept that behaviour in this
>    patch, under the assumption that stack overflow messages are
>    (sadly) the sorts of things that real world code ends up depending
>    on.
> 
> 2. Depending on the point in code where the error was identified,
>    invalid unicode escapes could be reported as "Invalid Unicode
>    escape", "Invalid unicode escape", or "Invalid Unicode escape
>    sequence". I fervently hope that nobody depends on the specific
>    wording of a syntax error, so I standardized on the first one. (It
>    was both the most common, and the most consistent with other
>    "Invalid X escape" messages.)
> 
> 3. In addition to changing the representation, this patch also adds an
>    error_pos field to RegExpParser and RegExpCompileData, which stores
>    the position at which an error occurred. This is used by
>    SpiderMonkey to provide more helpful messages about where a syntax
>    error occurred in large regular expressions.
> 
> 4. This model is closer to V8's existing MessageTemplate
>    infrastructure. I considered trying to integrate it more closely
>    with MessageTemplate, but since one of our stated goals for this
>    project was to make it easier to use irregexp outside of V8, I
>    decided to hold off.
> 
> R=​jgruber@chromium.org
> 
> Bug: v8:10303
> Change-Id: I62605fd2def2fc539f38a7e0eefa04d36e14bbde
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2091863
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#66784}

TBR=jgruber@chromium.org,iireland@mozilla.com

Change-Id: I9247635f3c5b17c943b9c4abaf82ebe7b2de165e
No-Presubmit: true
No-Tree-Checks: true
No-Try: true
Bug: v8:10303
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2108550Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Commit-Queue: Leszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66786}
parent 38c3bd48
......@@ -2833,8 +2833,6 @@ v8_source_set("v8_base_without_compiler") {
"src/regexp/regexp-compiler.h",
"src/regexp/regexp-dotprinter.cc",
"src/regexp/regexp-dotprinter.h",
"src/regexp/regexp-error.cc",
"src/regexp/regexp-error.h",
"src/regexp/regexp-interpreter.cc",
"src/regexp/regexp-interpreter.h",
"src/regexp/regexp-macro-assembler-arch.h",
......
......@@ -3594,9 +3594,7 @@ template <typename... Propagators>
class Analysis : public NodeVisitor {
public:
Analysis(Isolate* isolate, bool is_one_byte)
: isolate_(isolate),
is_one_byte_(is_one_byte),
error_(RegExpError::kNone) {}
: isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
void EnsureAnalyzed(RegExpNode* that) {
StackLimitCheck check(isolate());
......@@ -3604,7 +3602,7 @@ class Analysis : public NodeVisitor {
if (FLAG_correctness_fuzzer_suppressions) {
FATAL("Analysis: Aborting on stack overflow");
}
fail(RegExpError::kAnalysisStackOverflow);
fail("Stack overflow");
return;
}
if (that->info()->been_analyzed || that->info()->being_analyzed) return;
......@@ -3614,12 +3612,12 @@ class Analysis : public NodeVisitor {
that->info()->been_analyzed = true;
}
bool has_failed() { return error_ != RegExpError::kNone; }
RegExpError error() {
DCHECK(error_ != RegExpError::kNone);
return error_;
bool has_failed() { return error_message_ != nullptr; }
const char* error_message() {
DCHECK(error_message_ != nullptr);
return error_message_;
}
void fail(RegExpError error) { error_ = error; }
void fail(const char* error_message) { error_message_ = error_message; }
Isolate* isolate() const { return isolate_; }
......@@ -3704,19 +3702,19 @@ class Analysis : public NodeVisitor {
private:
Isolate* isolate_;
bool is_one_byte_;
RegExpError error_;
const char* error_message_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
RegExpNode* node) {
Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(isolate,
is_one_byte);
DCHECK_EQ(node->info()->been_analyzed, false);
analysis.EnsureAnalyzed(node);
DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone);
return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
DCHECK_IMPLIES(analysis.has_failed(), analysis.error_message() != nullptr);
return analysis.has_failed() ? analysis.error_message() : nullptr;
}
void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
......
......@@ -423,7 +423,10 @@ struct PreloadState {
// Analysis performs assertion propagation and computes eats_at_least_ values.
// See the comments on AssertionPropagator and EatsAtLeastPropagator for more
// details.
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
//
// This method returns nullptr on success or a null-terminated failure message
// on failure.
const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
class FrequencyCollator {
public:
......@@ -500,17 +503,18 @@ class RegExpCompiler {
}
struct CompilationResult final {
explicit CompilationResult(RegExpError err) : error(err) {}
explicit CompilationResult(const char* error_message)
: error_message(error_message) {}
CompilationResult(Object code, int registers)
: code(code), num_registers(registers) {}
static CompilationResult RegExpTooBig() {
return CompilationResult(RegExpError::kTooLarge);
return CompilationResult("RegExp too big");
}
bool Succeeded() const { return error == RegExpError::kNone; }
bool Succeeded() const { return error_message == nullptr; }
const RegExpError error = RegExpError::kNone;
const char* const error_message = nullptr;
Object code;
int num_registers = 0;
};
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/regexp/regexp-error.h"
namespace v8 {
namespace internal {
const char* kRegExpErrorStrings[] = {
#define TEMPLATE(NAME, STRING) STRING,
REGEXP_ERROR_MESSAGES(TEMPLATE)
#undef TEMPLATE
};
const char* RegExpErrorString(RegExpError error) {
DCHECK_LT(error, RegExpError::NumErrors);
return kRegExpErrorStrings[static_cast<int>(error)];
}
} // namespace internal
} // namespace v8
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_REGEXP_ERROR_H_
#define V8_REGEXP_REGEXP_ERROR_H_
#include "src/base/logging.h"
#include "src/base/macros.h"
namespace v8 {
namespace internal {
#define REGEXP_ERROR_MESSAGES(T) \
T(None, "") \
T(StackOverflow, "Maximum call stack size exceeded") \
T(AnalysisStackOverflow, "Stack overflow") \
T(TooLarge, "Regular expression too large") \
T(UnterminatedGroup, "Unterminated group") \
T(UnmatchedParen, "Unmatched ')'") \
T(EscapeAtEndOfPattern, "\\ at end of pattern") \
T(InvalidPropertyName, "Invalid property name") \
T(InvalidEscape, "Invalid escape") \
T(InvalidDecimalEscape, "Invalid decimal escape") \
T(InvalidUnicodeEscape, "Invalid Unicode escape") \
T(NothingToRepeat, "Nothing to repeat") \
T(LoneQuantifierBrackets, "Lone quantifier brackets") \
T(RangeOutOfOrder, "Numbers out of order in {} quantifier") \
T(IncompleteQuantifier, "Incomplete quantifier") \
T(InvalidQuantifier, "Invalid quantifier") \
T(InvalidGroup, "Invalid group") \
T(MultipleFlagDashes, "Multiple dashes in flag group") \
T(RepeatedFlag, "Repeated flag in flag group") \
T(InvalidFlagGroup, "Invalid flag group") \
T(TooManyCaptures, "Too many captures") \
T(InvalidCaptureGroupName, "Invalid capture group name") \
T(DuplicateCaptureGroupName, "Duplicate capture group name") \
T(InvalidNamedReference, "Invalid named reference") \
T(InvalidNamedCaptureReference, "Invalid named capture referenced") \
T(InvalidClassEscape, "Invalid class escape") \
T(InvalidClassPropertyName, "Invalid property name in character class") \
T(InvalidCharacterClass, "Invalid character class") \
T(UnterminatedCharacterClass, "Unterminated character class") \
T(OutOfOrderCharacterClass, "Range out of order in character class")
enum class RegExpError : uint32_t {
#define TEMPLATE(NAME, STRING) k##NAME,
REGEXP_ERROR_MESSAGES(TEMPLATE)
#undef TEMPLATE
NumErrors
};
V8_EXPORT_PRIVATE const char* RegExpErrorString(RegExpError error);
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_REGEXP_ERROR_H_
This diff is collapsed.
......@@ -8,7 +8,6 @@
#include "src/objects/js-regexp.h"
#include "src/objects/objects.h"
#include "src/regexp/regexp-ast.h"
#include "src/regexp/regexp-error.h"
#include "src/zone/zone.h"
namespace v8 {
......@@ -154,8 +153,8 @@ class RegExpBuilder : public ZoneObject {
class V8_EXPORT_PRIVATE RegExpParser {
public:
RegExpParser(FlatStringReader* in, JSRegExp::Flags flags, Isolate* isolate,
Zone* zone);
RegExpParser(FlatStringReader* in, Handle<String>* error,
JSRegExp::Flags flags, Isolate* isolate, Zone* zone);
static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
JSRegExp::Flags flags, RegExpCompileData* result);
......@@ -203,7 +202,7 @@ class V8_EXPORT_PRIVATE RegExpParser {
char ParseClassEscape();
RegExpTree* ReportError(RegExpError error);
RegExpTree* ReportError(Vector<const char> message);
void Advance();
void Advance(int dist);
void Reset(int pos);
......@@ -336,8 +335,7 @@ class V8_EXPORT_PRIVATE RegExpParser {
Isolate* isolate_;
Zone* zone_;
RegExpError error_ = RegExpError::kNone;
int error_pos_ = 0;
Handle<String>* error_;
ZoneList<RegExpCapture*>* captures_;
ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
ZoneList<RegExpBackReference*>* named_back_references_;
......
......@@ -92,15 +92,9 @@ class RegExpImpl final : public AllStatic {
};
V8_WARN_UNUSED_RESULT
static inline MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> pattern,
RegExpError error) {
Vector<const char> error_data = CStrVector(RegExpErrorString(error));
Handle<String> error_text =
isolate->factory()
->NewStringFromOneByte(Vector<const uint8_t>::cast(error_data))
.ToHandleChecked();
static inline MaybeHandle<Object> ThrowRegExpException(
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
Handle<String> error_text) {
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kMalformedRegExp, pattern, error_text),
......@@ -108,7 +102,7 @@ static inline MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
}
inline void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
RegExpError error_text) {
Handle<String> error_text) {
USE(ThrowRegExpException(isolate, re, Handle<String>(re->Pattern(), isolate),
error_text));
}
......@@ -414,7 +408,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject,
is_one_byte, re->BacktrackLimit());
if (!compilation_succeeded) {
DCHECK(compile_data.error != RegExpError::kNone);
DCHECK(!compile_data.error.is_null());
ThrowRegExpException(isolate, re, compile_data.error);
return false;
}
......@@ -747,7 +741,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
Handle<String> sample_subject, bool is_one_byte,
uint32_t backtrack_limit) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
data->error = RegExpError::kTooLarge;
data->error =
isolate->factory()->NewStringFromAsciiChecked("RegExp too big");
return false;
}
......@@ -815,8 +810,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
data->error = AnalyzeRegExp(isolate, is_one_byte, node);
if (data->error != RegExpError::kNone) {
if (const char* error_message = AnalyzeRegExp(isolate, is_one_byte, node)) {
data->error = isolate->factory()->NewStringFromAsciiChecked(error_message);
return false;
}
......@@ -918,12 +913,13 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
}
}
if (result.error != RegExpError::kNone) {
if (result.error_message != nullptr) {
if (FLAG_correctness_fuzzer_suppressions &&
result.error == RegExpError::kStackOverflow) {
strncmp(result.error_message, "Stack overflow", 15) == 0) {
FATAL("Aborting on stack overflow");
}
data->error = result.error;
data->error =
isolate->factory()->NewStringFromAsciiChecked(result.error_message);
}
data->code = result.code;
......
......@@ -6,7 +6,6 @@
#define V8_REGEXP_REGEXP_H_
#include "src/objects/js-regexp.h"
#include "src/regexp/regexp-error.h"
namespace v8 {
namespace internal {
......@@ -43,11 +42,7 @@ struct RegExpCompileData {
// The error message. Only used if an error occurred during parsing or
// compilation.
RegExpError error = RegExpError::kNone;
// The position at which the error was detected. Only used if an
// error occurred.
int error_pos = 0;
Handle<String> error;
// The number of capture groups, without the global capture \0.
int capture_count = 0;
......
......@@ -76,7 +76,7 @@ static void CheckParseEq(const char* input, const char* expected,
CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
&reader, flags, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error == RegExpError::kNone);
CHECK(result.error.is_null());
std::ostringstream os;
result.tree->Print(os, &zone);
if (strcmp(expected, os.str().c_str()) != 0) {
......@@ -94,7 +94,7 @@ static bool CheckSimple(const char* input) {
CHECK(v8::internal::RegExpParser::ParseRegExp(
CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error == RegExpError::kNone);
CHECK(result.error.is_null());
return result.simple;
}
......@@ -112,7 +112,7 @@ static MinMaxPair CheckMinMaxMatch(const char* input) {
CHECK(v8::internal::RegExpParser::ParseRegExp(
CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error == RegExpError::kNone);
CHECK(result.error.is_null());
int min_match = result.tree->min_match();
int max_match = result.tree->max_match();
MinMaxPair pair = { min_match, max_match };
......@@ -428,8 +428,9 @@ static void ExpectError(const char* input, const char* expected,
CHECK(!v8::internal::RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
&result));
CHECK_NULL(result.tree);
CHECK(result.error != RegExpError::kNone);
CHECK_EQ(0, strcmp(expected, RegExpErrorString(result.error)));
CHECK(!result.error.is_null());
std::unique_ptr<char[]> str = result.error->ToCString(ALLOW_NULLS);
CHECK_EQ(0, strcmp(expected, str.get()));
}
......@@ -467,7 +468,7 @@ TEST(Errors) {
ExpectError("\\k<a", kInvalidCaptureName, true);
const char* kDuplicateCaptureName = "Duplicate capture group name";
ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
const char* kInvalidUnicodeEscape = "Invalid Unicode escape";
const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment