Commit 560f2d8b authored by Iain Ireland's avatar Iain Ireland Committed by Commit Bot

Reland "[regexp] Rewrite error handling"

This is a reland of e80ca24c

Original change's description:
> [regexp] Rewrite error handling
>
> This patch modifies irregexp's error handling. Instead of representing
> errors as C strings, they are represented as an enumeration value
> (RegExpError), and only converted to strings when throwing the error
> object in regexp.cc. This makes it significantly easier to integrate
> into SpiderMonkey. A few notes:
>
> 1. Depending on whether the stack overflows during parsing or
>    analysis, the stack overflow message can vary ("Stack overflow" or
>    "Maximum call stack size exceeded"). I kept that behaviour in this
>    patch, under the assumption that stack overflow messages are
>    (sadly) the sorts of things that real world code ends up depending
>    on.
>
> 2. Depending on the point in code where the error was identified,
>    invalid unicode escapes could be reported as "Invalid Unicode
>    escape", "Invalid unicode escape", or "Invalid Unicode escape
>    sequence". I fervently hope that nobody depends on the specific
>    wording of a syntax error, so I standardized on the first one. (It
>    was both the most common, and the most consistent with other
>    "Invalid X escape" messages.)
>
> 3. In addition to changing the representation, this patch also adds an
>    error_pos field to RegExpParser and RegExpCompileData, which stores
>    the position at which an error occurred. This is used by
>    SpiderMonkey to provide more helpful messages about where a syntax
>    error occurred in large regular expressions.
>
> 4. This model is closer to V8's existing MessageTemplate
>    infrastructure. I considered trying to integrate it more closely
>    with MessageTemplate, but since one of our stated goals for this
>    project was to make it easier to use irregexp outside of V8, I
>    decided to hold off.
>
> R=jgruber@chromium.org
>
> Bug: v8:10303
> Change-Id: I62605fd2def2fc539f38a7e0eefa04d36e14bbde
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2091863
> Commit-Queue: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#66784}

R=jgruber@chromium.org

Bug: v8:10303
Change-Id: Iad1f11a0e0b9e525d7499aacb56c27eff9e7c7b5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2109952Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66798}
parent 1e1d4d82
......@@ -2833,6 +2833,8 @@ v8_source_set("v8_base_without_compiler") {
"src/regexp/regexp-compiler.h",
"src/regexp/regexp-dotprinter.cc",
"src/regexp/regexp-dotprinter.h",
"src/regexp/regexp-error.cc",
"src/regexp/regexp-error.h",
"src/regexp/regexp-interpreter.cc",
"src/regexp/regexp-interpreter.h",
"src/regexp/regexp-macro-assembler-arch.h",
......
......@@ -3594,7 +3594,9 @@ template <typename... Propagators>
class Analysis : public NodeVisitor {
public:
Analysis(Isolate* isolate, bool is_one_byte)
: isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
: isolate_(isolate),
is_one_byte_(is_one_byte),
error_(RegExpError::kNone) {}
void EnsureAnalyzed(RegExpNode* that) {
StackLimitCheck check(isolate());
......@@ -3602,7 +3604,7 @@ class Analysis : public NodeVisitor {
if (FLAG_correctness_fuzzer_suppressions) {
FATAL("Analysis: Aborting on stack overflow");
}
fail("Stack overflow");
fail(RegExpError::kAnalysisStackOverflow);
return;
}
if (that->info()->been_analyzed || that->info()->being_analyzed) return;
......@@ -3612,12 +3614,12 @@ class Analysis : public NodeVisitor {
that->info()->been_analyzed = true;
}
bool has_failed() { return error_message_ != nullptr; }
const char* error_message() {
DCHECK(error_message_ != nullptr);
return error_message_;
bool has_failed() { return error_ != RegExpError::kNone; }
RegExpError error() {
DCHECK(error_ != RegExpError::kNone);
return error_;
}
void fail(const char* error_message) { error_message_ = error_message; }
void fail(RegExpError error) { error_ = error; }
Isolate* isolate() const { return isolate_; }
......@@ -3702,19 +3704,19 @@ class Analysis : public NodeVisitor {
private:
Isolate* isolate_;
bool is_one_byte_;
const char* error_message_;
RegExpError error_;
DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte,
RegExpNode* node) {
Analysis<AssertionPropagator, EatsAtLeastPropagator> analysis(isolate,
is_one_byte);
DCHECK_EQ(node->info()->been_analyzed, false);
analysis.EnsureAnalyzed(node);
DCHECK_IMPLIES(analysis.has_failed(), analysis.error_message() != nullptr);
return analysis.has_failed() ? analysis.error_message() : nullptr;
DCHECK_IMPLIES(analysis.has_failed(), analysis.error() != RegExpError::kNone);
return analysis.has_failed() ? analysis.error() : RegExpError::kNone;
}
void BackReferenceNode::FillInBMInfo(Isolate* isolate, int offset, int budget,
......
......@@ -423,10 +423,7 @@ struct PreloadState {
// Analysis performs assertion propagation and computes eats_at_least_ values.
// See the comments on AssertionPropagator and EatsAtLeastPropagator for more
// details.
//
// This method returns nullptr on success or a null-terminated failure message
// on failure.
const char* AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
RegExpError AnalyzeRegExp(Isolate* isolate, bool is_one_byte, RegExpNode* node);
class FrequencyCollator {
public:
......@@ -503,18 +500,17 @@ class RegExpCompiler {
}
struct CompilationResult final {
explicit CompilationResult(const char* error_message)
: error_message(error_message) {}
explicit CompilationResult(RegExpError err) : error(err) {}
CompilationResult(Object code, int registers)
: code(code), num_registers(registers) {}
static CompilationResult RegExpTooBig() {
return CompilationResult("RegExp too big");
return CompilationResult(RegExpError::kTooLarge);
}
bool Succeeded() const { return error_message == nullptr; }
bool Succeeded() const { return error == RegExpError::kNone; }
const char* const error_message = nullptr;
const RegExpError error = RegExpError::kNone;
Object code;
int num_registers = 0;
};
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/regexp/regexp-error.h"
namespace v8 {
namespace internal {
const char* kRegExpErrorStrings[] = {
#define TEMPLATE(NAME, STRING) STRING,
REGEXP_ERROR_MESSAGES(TEMPLATE)
#undef TEMPLATE
};
const char* RegExpErrorString(RegExpError error) {
DCHECK_LT(error, RegExpError::NumErrors);
return kRegExpErrorStrings[static_cast<int>(error)];
}
} // namespace internal
} // namespace v8
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_REGEXP_ERROR_H_
#define V8_REGEXP_REGEXP_ERROR_H_
#include "src/base/logging.h"
#include "src/base/macros.h"
namespace v8 {
namespace internal {
#define REGEXP_ERROR_MESSAGES(T) \
T(None, "") \
T(StackOverflow, "Maximum call stack size exceeded") \
T(AnalysisStackOverflow, "Stack overflow") \
T(TooLarge, "Regular expression too large") \
T(UnterminatedGroup, "Unterminated group") \
T(UnmatchedParen, "Unmatched ')'") \
T(EscapeAtEndOfPattern, "\\ at end of pattern") \
T(InvalidPropertyName, "Invalid property name") \
T(InvalidEscape, "Invalid escape") \
T(InvalidDecimalEscape, "Invalid decimal escape") \
T(InvalidUnicodeEscape, "Invalid Unicode escape") \
T(NothingToRepeat, "Nothing to repeat") \
T(LoneQuantifierBrackets, "Lone quantifier brackets") \
T(RangeOutOfOrder, "numbers out of order in {} quantifier") \
T(IncompleteQuantifier, "Incomplete quantifier") \
T(InvalidQuantifier, "Invalid quantifier") \
T(InvalidGroup, "Invalid group") \
T(MultipleFlagDashes, "Multiple dashes in flag group") \
T(RepeatedFlag, "Repeated flag in flag group") \
T(InvalidFlagGroup, "Invalid flag group") \
T(TooManyCaptures, "Too many captures") \
T(InvalidCaptureGroupName, "Invalid capture group name") \
T(DuplicateCaptureGroupName, "Duplicate capture group name") \
T(InvalidNamedReference, "Invalid named reference") \
T(InvalidNamedCaptureReference, "Invalid named capture referenced") \
T(InvalidClassEscape, "Invalid class escape") \
T(InvalidClassPropertyName, "Invalid property name in character class") \
T(InvalidCharacterClass, "Invalid character class") \
T(UnterminatedCharacterClass, "Unterminated character class") \
T(OutOfOrderCharacterClass, "Range out of order in character class")
enum class RegExpError : uint32_t {
#define TEMPLATE(NAME, STRING) k##NAME,
REGEXP_ERROR_MESSAGES(TEMPLATE)
#undef TEMPLATE
NumErrors
};
V8_EXPORT_PRIVATE const char* RegExpErrorString(RegExpError error);
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_REGEXP_ERROR_H_
This diff is collapsed.
......@@ -8,6 +8,7 @@
#include "src/objects/js-regexp.h"
#include "src/objects/objects.h"
#include "src/regexp/regexp-ast.h"
#include "src/regexp/regexp-error.h"
#include "src/zone/zone.h"
namespace v8 {
......@@ -153,8 +154,8 @@ class RegExpBuilder : public ZoneObject {
class V8_EXPORT_PRIVATE RegExpParser {
public:
RegExpParser(FlatStringReader* in, Handle<String>* error,
JSRegExp::Flags flags, Isolate* isolate, Zone* zone);
RegExpParser(FlatStringReader* in, JSRegExp::Flags flags, Isolate* isolate,
Zone* zone);
static bool ParseRegExp(Isolate* isolate, Zone* zone, FlatStringReader* input,
JSRegExp::Flags flags, RegExpCompileData* result);
......@@ -202,7 +203,7 @@ class V8_EXPORT_PRIVATE RegExpParser {
char ParseClassEscape();
RegExpTree* ReportError(Vector<const char> message);
RegExpTree* ReportError(RegExpError error);
void Advance();
void Advance(int dist);
void Reset(int pos);
......@@ -335,7 +336,8 @@ class V8_EXPORT_PRIVATE RegExpParser {
Isolate* isolate_;
Zone* zone_;
Handle<String>* error_;
RegExpError error_ = RegExpError::kNone;
int error_pos_ = 0;
ZoneList<RegExpCapture*>* captures_;
ZoneSet<RegExpCapture*, RegExpCaptureNameLess>* named_captures_;
ZoneList<RegExpBackReference*>* named_back_references_;
......
......@@ -92,9 +92,15 @@ class RegExpImpl final : public AllStatic {
};
V8_WARN_UNUSED_RESULT
static inline MaybeHandle<Object> ThrowRegExpException(
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
Handle<String> error_text) {
static inline MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> pattern,
RegExpError error) {
Vector<const char> error_data = CStrVector(RegExpErrorString(error));
Handle<String> error_text =
isolate->factory()
->NewStringFromOneByte(Vector<const uint8_t>::cast(error_data))
.ToHandleChecked();
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kMalformedRegExp, pattern, error_text),
......@@ -102,7 +108,7 @@ static inline MaybeHandle<Object> ThrowRegExpException(
}
inline void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> error_text) {
RegExpError error_text) {
USE(ThrowRegExpException(isolate, re, Handle<String>(re->Pattern(), isolate),
error_text));
}
......@@ -408,7 +414,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject,
is_one_byte, re->BacktrackLimit());
if (!compilation_succeeded) {
DCHECK(!compile_data.error.is_null());
DCHECK(compile_data.error != RegExpError::kNone);
ThrowRegExpException(isolate, re, compile_data.error);
return false;
}
......@@ -741,8 +747,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
Handle<String> sample_subject, bool is_one_byte,
uint32_t backtrack_limit) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
data->error =
isolate->factory()->NewStringFromAsciiChecked("RegExp too big");
data->error = RegExpError::kTooLarge;
return false;
}
......@@ -810,8 +815,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
if (node == nullptr) node = new (zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
if (const char* error_message = AnalyzeRegExp(isolate, is_one_byte, node)) {
data->error = isolate->factory()->NewStringFromAsciiChecked(error_message);
data->error = AnalyzeRegExp(isolate, is_one_byte, node);
if (data->error != RegExpError::kNone) {
return false;
}
......@@ -913,13 +918,12 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
}
}
if (result.error_message != nullptr) {
if (result.error != RegExpError::kNone) {
if (FLAG_correctness_fuzzer_suppressions &&
strncmp(result.error_message, "Stack overflow", 15) == 0) {
result.error == RegExpError::kStackOverflow) {
FATAL("Aborting on stack overflow");
}
data->error =
isolate->factory()->NewStringFromAsciiChecked(result.error_message);
data->error = result.error;
}
data->code = result.code;
......
......@@ -6,6 +6,7 @@
#define V8_REGEXP_REGEXP_H_
#include "src/objects/js-regexp.h"
#include "src/regexp/regexp-error.h"
namespace v8 {
namespace internal {
......@@ -42,7 +43,11 @@ struct RegExpCompileData {
// The error message. Only used if an error occurred during parsing or
// compilation.
Handle<String> error;
RegExpError error = RegExpError::kNone;
// The position at which the error was detected. Only used if an
// error occurred.
int error_pos = 0;
// The number of capture groups, without the global capture \0.
int capture_count = 0;
......
......@@ -76,7 +76,7 @@ static void CheckParseEq(const char* input, const char* expected,
CHECK(v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
&reader, flags, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error.is_null());
CHECK(result.error == RegExpError::kNone);
std::ostringstream os;
result.tree->Print(os, &zone);
if (strcmp(expected, os.str().c_str()) != 0) {
......@@ -94,7 +94,7 @@ static bool CheckSimple(const char* input) {
CHECK(v8::internal::RegExpParser::ParseRegExp(
CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error.is_null());
CHECK(result.error == RegExpError::kNone);
return result.simple;
}
......@@ -112,7 +112,7 @@ static MinMaxPair CheckMinMaxMatch(const char* input) {
CHECK(v8::internal::RegExpParser::ParseRegExp(
CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result));
CHECK_NOT_NULL(result.tree);
CHECK(result.error.is_null());
CHECK(result.error == RegExpError::kNone);
int min_match = result.tree->min_match();
int max_match = result.tree->max_match();
MinMaxPair pair = { min_match, max_match };
......@@ -428,9 +428,8 @@ static void ExpectError(const char* input, const char* expected,
CHECK(!v8::internal::RegExpParser::ParseRegExp(isolate, &zone, &reader, flags,
&result));
CHECK_NULL(result.tree);
CHECK(!result.error.is_null());
std::unique_ptr<char[]> str = result.error->ToCString(ALLOW_NULLS);
CHECK_EQ(0, strcmp(expected, str.get()));
CHECK(result.error != RegExpError::kNone);
CHECK_EQ(0, strcmp(expected, RegExpErrorString(result.error)));
}
......@@ -468,7 +467,7 @@ TEST(Errors) {
ExpectError("\\k<a", kInvalidCaptureName, true);
const char* kDuplicateCaptureName = "Duplicate capture group name";
ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
const char* kInvalidUnicodeEscape = "Invalid Unicode escape";
ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment