Commit ae23436c authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Experimental support for regexp named captures

Named capture groups may be specified using the /(?<name>pattern)/u
syntax, with named backreferences specified as /\k<name>/u. They're
hidden behind the --harmony-regexp-named-captures flag, and are only
enabled for unicode regexps.

R=yangguo@chromium.org
BUG=

Review-Url: https://codereview.chromium.org/2050343002
Cr-Commit-Position: refs/heads/master@{#36986}
parent 5c5985b8
...@@ -2645,6 +2645,7 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_for_in) ...@@ -2645,6 +2645,7 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_for_in)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_iterator_close) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_iterator_close)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_exec) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_exec)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_lookbehind) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_lookbehind)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_named_captures)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_property) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_property)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_function_name) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_function_name)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_function_sent) EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_function_sent)
...@@ -3243,6 +3244,7 @@ bool Genesis::InstallExperimentalNatives() { ...@@ -3243,6 +3244,7 @@ bool Genesis::InstallExperimentalNatives() {
static const char* harmony_regexp_lookbehind_natives[] = {nullptr}; static const char* harmony_regexp_lookbehind_natives[] = {nullptr};
static const char* harmony_instanceof_natives[] = {nullptr}; static const char* harmony_instanceof_natives[] = {nullptr};
static const char* harmony_restrictive_declarations_natives[] = {nullptr}; static const char* harmony_restrictive_declarations_natives[] = {nullptr};
static const char* harmony_regexp_named_captures_natives[] = {nullptr};
static const char* harmony_regexp_property_natives[] = {nullptr}; static const char* harmony_regexp_property_natives[] = {nullptr};
static const char* harmony_function_name_natives[] = {nullptr}; static const char* harmony_function_name_natives[] = {nullptr};
static const char* harmony_function_sent_natives[] = {nullptr}; static const char* harmony_function_sent_natives[] = {nullptr};
......
...@@ -2385,6 +2385,7 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp, ...@@ -2385,6 +2385,7 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(0)); store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(0));
store->set(JSRegExp::kIrregexpCaptureCountIndex, store->set(JSRegExp::kIrregexpCaptureCountIndex,
Smi::FromInt(capture_count)); Smi::FromInt(capture_count));
store->set(JSRegExp::kIrregexpCaptureNameMapIndex, uninitialized);
regexp->set_data(*store); regexp->set_data(*store);
} }
......
...@@ -205,6 +205,7 @@ DEFINE_IMPLICATION(es_staging, harmony_async_await) ...@@ -205,6 +205,7 @@ DEFINE_IMPLICATION(es_staging, harmony_async_await)
V(harmony_do_expressions, "harmony do-expressions") \ V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_restrictive_generators, \ V(harmony_restrictive_generators, \
"harmony restrictions on generator declarations") \ "harmony restrictions on generator declarations") \
V(harmony_regexp_named_captures, "harmony regexp named captures") \
V(harmony_regexp_property, "harmony unicode regexp property classes") \ V(harmony_regexp_property, "harmony unicode regexp property classes") \
V(harmony_for_in, "harmony for-in syntax") \ V(harmony_for_in, "harmony for-in syntax") \
V(harmony_async_await, "harmony async-await") V(harmony_async_await, "harmony async-await")
......
...@@ -7935,7 +7935,6 @@ class JSRegExp: public JSObject { ...@@ -7935,7 +7935,6 @@ class JSRegExp: public JSObject {
// NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet. // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
// ATOM: A simple string to match against using an indexOf operation. // ATOM: A simple string to match against using an indexOf operation.
// IRREGEXP: Compiled with Irregexp. // IRREGEXP: Compiled with Irregexp.
// IRREGEXP_NATIVE: Compiled to native code with Irregexp.
enum Type { NOT_COMPILED, ATOM, IRREGEXP }; enum Type { NOT_COMPILED, ATOM, IRREGEXP };
enum Flag { enum Flag {
kNone = 0, kNone = 0,
...@@ -8028,8 +8027,11 @@ class JSRegExp: public JSObject { ...@@ -8028,8 +8027,11 @@ class JSRegExp: public JSObject {
static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 4; static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 4;
// Number of captures in the compiled regexp. // Number of captures in the compiled regexp.
static const int kIrregexpCaptureCountIndex = kDataIndex + 5; static const int kIrregexpCaptureCountIndex = kDataIndex + 5;
// Maps names of named capture groups (at indices 2i) to their corresponding
// capture group indices (at indices 2i + 1).
static const int kIrregexpCaptureNameMapIndex = kDataIndex + 6;
static const int kIrregexpDataSize = kIrregexpCaptureCountIndex + 1; static const int kIrregexpDataSize = kIrregexpCaptureNameMapIndex + 1;
// Offsets directly into the data fixed array. // Offsets directly into the data fixed array.
static const int kDataTagOffset = static const int kDataTagOffset =
......
...@@ -397,6 +397,7 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re, ...@@ -397,6 +397,7 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data())); Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
data->set(JSRegExp::code_index(is_one_byte), result.code); data->set(JSRegExp::code_index(is_one_byte), result.code);
SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map);
int register_max = IrregexpMaxRegisterCount(*data); int register_max = IrregexpMaxRegisterCount(*data);
if (result.num_registers > register_max) { if (result.num_registers > register_max) {
SetIrregexpMaxRegisterCount(*data, result.num_registers); SetIrregexpMaxRegisterCount(*data, result.num_registers);
...@@ -416,6 +417,14 @@ void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { ...@@ -416,6 +417,14 @@ void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) {
re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value));
} }
void RegExpImpl::SetIrregexpCaptureNameMap(FixedArray* re,
Handle<FixedArray> value) {
if (value.is_null()) {
re->set(JSRegExp::kIrregexpCaptureNameMapIndex, Smi::FromInt(0));
} else {
re->set(JSRegExp::kIrregexpCaptureNameMapIndex, *value);
}
}
int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) {
return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value();
......
...@@ -196,6 +196,8 @@ class RegExpImpl { ...@@ -196,6 +196,8 @@ class RegExpImpl {
// For acting on the JSRegExp data FixedArray. // For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray* re); static int IrregexpMaxRegisterCount(FixedArray* re);
static void SetIrregexpMaxRegisterCount(FixedArray* re, int value); static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
static void SetIrregexpCaptureNameMap(FixedArray* re,
Handle<FixedArray> value);
static int IrregexpNumberOfCaptures(FixedArray* re); static int IrregexpNumberOfCaptures(FixedArray* re);
static int IrregexpNumberOfRegisters(FixedArray* re); static int IrregexpNumberOfRegisters(FixedArray* re);
static ByteArray* IrregexpByteCode(FixedArray* re, bool is_one_byte); static ByteArray* IrregexpByteCode(FixedArray* re, bool is_one_byte);
...@@ -1530,6 +1532,7 @@ struct RegExpCompileData { ...@@ -1530,6 +1532,7 @@ struct RegExpCompileData {
RegExpNode* node; RegExpNode* node;
bool simple; bool simple;
bool contains_anchor; bool contains_anchor;
Handle<FixedArray> capture_name_map;
Handle<String> error; Handle<String> error;
int capture_count; int capture_count;
}; };
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "src/objects.h" #include "src/objects.h"
#include "src/utils.h" #include "src/utils.h"
#include "src/zone-containers.h"
#include "src/zone.h" #include "src/zone.h"
namespace v8 { namespace v8 {
...@@ -412,7 +413,8 @@ class RegExpQuantifier final : public RegExpTree { ...@@ -412,7 +413,8 @@ class RegExpQuantifier final : public RegExpTree {
class RegExpCapture final : public RegExpTree { class RegExpCapture final : public RegExpTree {
public: public:
explicit RegExpCapture(int index) : body_(NULL), index_(index) {} explicit RegExpCapture(int index)
: body_(NULL), index_(index), name_(nullptr) {}
void* Accept(RegExpVisitor* visitor, void* data) override; void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
static RegExpNode* ToNode(RegExpTree* body, int index, static RegExpNode* ToNode(RegExpTree* body, int index,
...@@ -427,12 +429,15 @@ class RegExpCapture final : public RegExpTree { ...@@ -427,12 +429,15 @@ class RegExpCapture final : public RegExpTree {
RegExpTree* body() { return body_; } RegExpTree* body() { return body_; }
void set_body(RegExpTree* body) { body_ = body; } void set_body(RegExpTree* body) { body_ = body; }
int index() { return index_; } int index() { return index_; }
const ZoneVector<uc16>* name() const { return name_; }
void set_name(const ZoneVector<uc16>* name) { name_ = name; }
static int StartRegister(int index) { return index * 2; } static int StartRegister(int index) { return index * 2; }
static int EndRegister(int index) { return index * 2 + 1; } static int EndRegister(int index) { return index * 2 + 1; }
private: private:
RegExpTree* body_; RegExpTree* body_;
int index_; int index_;
const ZoneVector<uc16>* name_;
}; };
...@@ -489,7 +494,9 @@ class RegExpLookaround final : public RegExpTree { ...@@ -489,7 +494,9 @@ class RegExpLookaround final : public RegExpTree {
class RegExpBackReference final : public RegExpTree { class RegExpBackReference final : public RegExpTree {
public: public:
explicit RegExpBackReference(RegExpCapture* capture) : capture_(capture) {} RegExpBackReference() : capture_(nullptr), name_(nullptr) {}
explicit RegExpBackReference(RegExpCapture* capture)
: capture_(capture), name_(nullptr) {}
void* Accept(RegExpVisitor* visitor, void* data) override; void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override; RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpBackReference* AsBackReference() override; RegExpBackReference* AsBackReference() override;
...@@ -500,9 +507,13 @@ class RegExpBackReference final : public RegExpTree { ...@@ -500,9 +507,13 @@ class RegExpBackReference final : public RegExpTree {
int max_match() override { return kInfinity; } int max_match() override { return kInfinity; }
int index() { return capture_->index(); } int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; } RegExpCapture* capture() { return capture_; }
void set_capture(RegExpCapture* capture) { capture_ = capture; }
const ZoneVector<uc16>* name() const { return name_; }
void set_name(const ZoneVector<uc16>* name) { name_ = name; }
private: private:
RegExpCapture* capture_; RegExpCapture* capture_;
const ZoneVector<uc16>* name_;
}; };
......
This diff is collapsed.
...@@ -222,13 +222,15 @@ class RegExpParser BASE_EMBEDDED { ...@@ -222,13 +222,15 @@ class RegExpParser BASE_EMBEDDED {
RegExpParserState(RegExpParserState* previous_state, RegExpParserState(RegExpParserState* previous_state,
SubexpressionType group_type, SubexpressionType group_type,
RegExpLookaround::Type lookaround_type, RegExpLookaround::Type lookaround_type,
int disjunction_capture_index, bool ignore_case, int disjunction_capture_index,
const ZoneVector<uc16>* capture_name, bool ignore_case,
bool unicode, Zone* zone) bool unicode, Zone* zone)
: previous_state_(previous_state), : previous_state_(previous_state),
builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)), builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)),
group_type_(group_type), group_type_(group_type),
lookaround_type_(lookaround_type), lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index) {} disjunction_capture_index_(disjunction_capture_index),
capture_name_(capture_name) {}
// Parser state of containing expression, if any. // Parser state of containing expression, if any.
RegExpParserState* previous_state() { return previous_state_; } RegExpParserState* previous_state() { return previous_state_; }
bool IsSubexpression() { return previous_state_ != NULL; } bool IsSubexpression() { return previous_state_ != NULL; }
...@@ -242,9 +244,16 @@ class RegExpParser BASE_EMBEDDED { ...@@ -242,9 +244,16 @@ class RegExpParser BASE_EMBEDDED {
// Also the capture index of this sub-expression itself, if group_type // Also the capture index of this sub-expression itself, if group_type
// is CAPTURE. // is CAPTURE.
int capture_index() { return disjunction_capture_index_; } int capture_index() { return disjunction_capture_index_; }
// The name of the current sub-expression, if group_type is CAPTURE. Only
// used for named captures.
const ZoneVector<uc16>* capture_name() { return capture_name_; }
bool IsNamedCapture() const { return capture_name_ != nullptr; }
// Check whether the parser is inside a capture group with the given index. // Check whether the parser is inside a capture group with the given index.
bool IsInsideCaptureGroup(int index); bool IsInsideCaptureGroup(int index);
// Check whether the parser is inside a capture group with the given name.
bool IsInsideCaptureGroup(const ZoneVector<uc16>* name);
private: private:
// Linked list implementation of stack of states. // Linked list implementation of stack of states.
...@@ -257,11 +266,32 @@ class RegExpParser BASE_EMBEDDED { ...@@ -257,11 +266,32 @@ class RegExpParser BASE_EMBEDDED {
RegExpLookaround::Type lookaround_type_; RegExpLookaround::Type lookaround_type_;
// Stored disjunction's capture index (if any). // Stored disjunction's capture index (if any).
int disjunction_capture_index_; int disjunction_capture_index_;
// Stored capture name (if any).
const ZoneVector<uc16>* capture_name_;
}; };
// Return the 1-indexed RegExpCapture object, allocate if necessary. // Return the 1-indexed RegExpCapture object, allocate if necessary.
RegExpCapture* GetCapture(int index); RegExpCapture* GetCapture(int index);
// Creates a new named capture at the specified index. Must be called exactly
// once for each named capture. Fails if a capture with the same name is
// encountered.
bool CreateNamedCaptureAtIndex(const ZoneVector<uc16>* name, int index);
// Parses the name of a capture group (?<name>pattern). The name must adhere
// to IdentifierName in the ECMAScript standard.
const ZoneVector<uc16>* ParseCaptureGroupName();
bool ParseNamedBackReference(RegExpBuilder* builder,
RegExpParserState* state);
// After the initial parsing pass, patch corresponding RegExpCapture objects
// into all RegExpBackReferences. This is done after initial parsing in order
// to avoid complicating cases in which references comes before the capture.
void PatchNamedBackReferences();
Handle<FixedArray> CreateCaptureNameMap();
Isolate* isolate() { return isolate_; } Isolate* isolate() { return isolate_; }
Zone* zone() const { return zone_; } Zone* zone() const { return zone_; }
...@@ -278,6 +308,8 @@ class RegExpParser BASE_EMBEDDED { ...@@ -278,6 +308,8 @@ class RegExpParser BASE_EMBEDDED {
Zone* zone_; Zone* zone_;
Handle<String>* error_; Handle<String>* error_;
ZoneList<RegExpCapture*>* captures_; ZoneList<RegExpCapture*>* captures_;
ZoneList<RegExpCapture*>* named_captures_;
ZoneList<RegExpBackReference*>* named_back_references_;
FlatStringReader* in_; FlatStringReader* in_;
uc32 current_; uc32 current_;
bool ignore_case_; bool ignore_case_;
......
...@@ -438,6 +438,23 @@ void TestRegExpParser(bool lookbehind) { ...@@ -438,6 +438,23 @@ void TestRegExpParser(bool lookbehind) {
CHECK_MIN_MAX("a(?=b)c", 2, 2); CHECK_MIN_MAX("a(?=b)c", 2, 2);
CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2); CHECK_MIN_MAX("a(?=bbb|bb)c", 2, 2);
CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2); CHECK_MIN_MAX("a(?!bbb|bb)c", 2, 2);
FLAG_harmony_regexp_named_captures = true;
CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<a>",
"(: (^ 'x') (^ 'x') (^ 'x') (<- 1))", true);
CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<b>",
"(: (^ 'x') (^ 'x') (^ 'x') (<- 2))", true);
CheckParseEq("(?<a>x)(?<b>x)(?<c>x)\\k<c>",
"(: (^ 'x') (^ 'x') (^ 'x') (<- 3))", true);
CheckParseEq("(?<a>a)\\k<a>", "(: (^ 'a') (<- 1))", true);
CheckParseEq("(?<a>a\\k<a>)", "(^ 'a')", true);
CheckParseEq("(?<a>\\k<a>a)", "(^ 'a')", true);
CheckParseEq("(?<a>\\k<b>)(?<b>\\k<a>)", "(: (^ (<- 2)) (^ (<- 1)))", true);
CheckParseEq("\\k<a>(?<a>a)", "(: (<- 1) (^ 'a'))", true);
CheckParseEq("(?<\\u{03C0}>a)", "(^ 'a')", true);
CheckParseEq("(?<\\u03C0>a)", "(^ 'a')", true);
FLAG_harmony_regexp_named_captures = false;
} }
...@@ -450,7 +467,6 @@ TEST(ParserWithoutLookbehind) { ...@@ -450,7 +467,6 @@ TEST(ParserWithoutLookbehind) {
TestRegExpParser(true); // Lookbehind enabled. TestRegExpParser(true); // Lookbehind enabled.
} }
TEST(ParserRegression) { TEST(ParserRegression) {
CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])"); CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')"); CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
...@@ -458,14 +474,16 @@ TEST(ParserRegression) { ...@@ -458,14 +474,16 @@ TEST(ParserRegression) {
CheckParseEq("a|", "(| 'a' %)"); CheckParseEq("a|", "(| 'a' %)");
} }
static void ExpectError(const char* input, static void ExpectError(const char* input, const char* expected,
const char* expected) { bool unicode = false) {
v8::HandleScope scope(CcTest::isolate()); v8::HandleScope scope(CcTest::isolate());
Zone zone(CcTest::i_isolate()->allocator()); Zone zone(CcTest::i_isolate()->allocator());
FlatStringReader reader(CcTest::i_isolate(), CStrVector(input)); FlatStringReader reader(CcTest::i_isolate(), CStrVector(input));
RegExpCompileData result; RegExpCompileData result;
CHECK(!v8::internal::RegExpParser::ParseRegExp( JSRegExp::Flags flags = JSRegExp::kNone;
CcTest::i_isolate(), &zone, &reader, JSRegExp::kNone, &result)); if (unicode) flags |= JSRegExp::kUnicode;
CHECK(!v8::internal::RegExpParser::ParseRegExp(CcTest::i_isolate(), &zone,
&reader, flags, &result));
CHECK(result.tree == NULL); CHECK(result.tree == NULL);
CHECK(!result.error.is_null()); CHECK(!result.error.is_null());
v8::base::SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS); v8::base::SmartArrayPointer<char> str = result.error->ToCString(ALLOW_NULLS);
...@@ -499,6 +517,23 @@ TEST(Errors) { ...@@ -499,6 +517,23 @@ TEST(Errors) {
os << "()"; os << "()";
} }
ExpectError(os.str().c_str(), kTooManyCaptures); ExpectError(os.str().c_str(), kTooManyCaptures);
FLAG_harmony_regexp_named_captures = true;
const char* kInvalidCaptureName = "Invalid capture group name";
ExpectError("(?<>.)", kInvalidCaptureName, true);
ExpectError("(?<1>.)", kInvalidCaptureName, true);
ExpectError("(?<_%>.)", kInvalidCaptureName, true);
ExpectError("\\k<a", kInvalidCaptureName, true);
const char* kDuplicateCaptureName = "Duplicate capture group name";
ExpectError("(?<a>.)(?<a>.)", kDuplicateCaptureName, true);
const char* kInvalidUnicodeEscape = "Invalid Unicode escape sequence";
ExpectError("(?<\\u{FISK}", kInvalidUnicodeEscape, true);
const char* kInvalidCaptureReferenced = "Invalid named capture referenced";
ExpectError("\\k<a>", kInvalidCaptureReferenced, true);
ExpectError("(?<b>)\\k<a>", kInvalidCaptureReferenced, true);
const char* kInvalidNamedReference = "Invalid named reference";
ExpectError("\\ka", kInvalidNamedReference, true);
FLAG_harmony_regexp_named_captures = false;
} }
......
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-named-captures
// Malformed named captures.
assertThrows("/(?<>a)/u"); // Empty name.
assertThrows("/(?<aa)/u"); // Unterminated name.
assertThrows("/(?<42a>a)/u"); // Name starting with digits.
assertThrows("/(?<:a>a)/u"); // Name starting with invalid char.
assertThrows("/(?<a:>a)/u"); // Name containing with invalid char.
assertThrows("/(?<a>a)(?<a>a)/u"); // Duplicate name.
assertThrows("/(?<a>a)(?<b>b)(?<a>a)/u"); // Duplicate name.
assertThrows("/\\k<a>/u"); // Invalid reference.
assertThrows("/(?<a>a)\\k<ab>/u"); // Invalid reference.
assertThrows("/(?<ab>a)\\k<a>/u"); // Invalid reference.
assertThrows("/\\k<a>(?<ab>a)/u"); // Invalid reference.
// Fallback behavior in non-unicode mode.
assertThrows("/(?<>a)/");
assertThrows("/(?<aa)/");
assertThrows("/(?<42a>a)/");
assertThrows("/(?<:a>a)/");
assertThrows("/(?<a:>a)/");
assertThrows("/(?<a>a)(?<a>a)/");
assertThrows("/(?<a>a)(?<b>b)(?<a>a)/");
assertThrows("/(?<a>a)\\k<ab>/");
assertThrows("/(?<ab>a)\\k<a>/");
assertEquals(["k<a>"], "xxxk<a>xxx".match(/\k<a>/));
assertEquals(["k<a"], "xxxk<a>xxx".match(/\k<a/));
// Basic named groups.
assertEquals(["a", "a"], "bab".match(/(?<a>a)/u));
assertEquals(["a", "a"], "bab".match(/(?<a42>a)/u));
assertEquals(["a", "a"], "bab".match(/(?<_>a)/u));
assertEquals(["a", "a"], "bab".match(/(?<$>a)/u));
assertEquals(["bab", "a"], "bab".match(/.(?<$>a)./u));
assertEquals(["bab", "a", "b"], "bab".match(/.(?<a>a)(.)/u));
assertEquals(["bab", "a", "b"], "bab".match(/.(?<a>a)(?<b>.)/u));
assertEquals(["bab", "ab"], "bab".match(/.(?<a>\w\w)/u));
assertEquals(["bab", "bab"], "bab".match(/(?<a>\w\w\w)/u));
assertEquals(["bab", "ba", "b"], "bab".match(/(?<a>\w\w)(?<b>\w)/u));
assertEquals("bab".match(/(a)/u), "bab".match(/(?<a>a)/u));
assertEquals("bab".match(/(a)/u), "bab".match(/(?<a42>a)/u));
assertEquals("bab".match(/(a)/u), "bab".match(/(?<_>a)/u));
assertEquals("bab".match(/(a)/u), "bab".match(/(?<$>a)/u));
assertEquals("bab".match(/.(a)./u), "bab".match(/.(?<$>a)./u));
assertEquals("bab".match(/.(a)(.)/u), "bab".match(/.(?<a>a)(.)/u));
assertEquals("bab".match(/.(a)(.)/u), "bab".match(/.(?<a>a)(?<b>.)/u));
assertEquals("bab".match(/.(\w\w)/u), "bab".match(/.(?<a>\w\w)/u));
assertEquals("bab".match(/(\w\w\w)/u), "bab".match(/(?<a>\w\w\w)/u));
assertEquals("bab".match(/(\w\w)(\w)/u), "bab".match(/(?<a>\w\w)(?<b>\w)/u));
assertEquals(["bab", "b"], "bab".match(/(?<b>b).\1/u));
assertEquals(["baba", "b", "a"], "baba".match(/(.)(?<a>a)\1\2/u));
assertEquals(["baba", "b", "a", "b", "a"],
"baba".match(/(.)(?<a>a)(?<b>\1)(\2)/u));
assertEquals(["<a", "<"], "<a".match(/(?<lt><)a/u));
assertEquals([">a", ">"], ">a".match(/(?<gt>>)a/u));
// Named references.
assertEquals(["bab", "b"], "bab".match(/(?<b>.).\k<b>/u));
assertNull("baa".match(/(?<b>.).\k<b>/u));
// Nested groups.
assertEquals(["bab", "bab", "ab", "b"], "bab".match(/(?<a>.(?<b>.(?<c>.)))/u));
// Reference inside group.
assertEquals(["bab", "b"], "bab".match(/(?<a>\k<a>\w)../u));
// Reference before group.
assertEquals(["bab", "b"], "bab".match(/\k<a>(?<a>b)\w\k<a>/u));
assertEquals(["bab", "b", "a"], "bab".match(/(?<b>b)\k<a>(?<a>a)\k<b>/u));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment