Commit 44a8fec8 authored by yangguo's avatar yangguo Committed by Commit bot

[regexp] break recursion in mutually recursive capture/back references.

TBR=adamk@chromium.org
R=erik.corry@gmail.com
BUG=v8:4616
LOG=N

Review URL: https://codereview.chromium.org/1522353002

Cr-Commit-Position: refs/heads/master@{#32879}
parent f910ed8e
......@@ -3186,14 +3186,9 @@ class RegExpBackReference final : public RegExpTree {
RegExpBackReference* AsBackReference() override;
bool IsBackReference() override;
int min_match() override { return 0; }
// The capture may not be completely parsed yet, if the reference occurs
// before the capture. In the ordinary case, nothing has been captured yet,
// so the back reference must have the length 0. If the back reference is
// inside a lookbehind, effectively making it a forward reference, we return
// 0 since lookbehinds have a length of 0.
int max_match() override {
return capture_->body() ? capture_->max_match() : 0;
}
// The back reference may be recursive, e.g. /(\2)(\1)/. To avoid infinite
// recursion, we give up. Ignorance is bliss.
int max_match() override { return kInfinity; }
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
......
......@@ -5565,8 +5565,11 @@ RegExpTree* RegExpParser::ParseDisjunction() {
int index = 0;
if (ParseBackReferenceIndex(&index)) {
if (state->IsInsideCaptureGroup(index)) {
// The backreference is inside the capture group it refers to.
// Nothing can possibly have been captured yet.
// The back reference is inside the capture group it refers to.
// Nothing can possibly have been captured yet, so we use empty
// instead. This ensures that, when checking a back reference,
// the capture registers of the referenced capture are either
// both set or both cleared.
builder->AddEmpty();
} else {
RegExpCapture* capture = GetCapture(index);
......
......@@ -110,6 +110,9 @@ static void CheckParseEq(const char* input, const char* expected) {
CHECK(result.error.is_null());
std::ostringstream os;
result.tree->Print(os, &zone);
if (strcmp(expected, os.str().c_str()) != 0) {
printf("%s | %s\n", expected, os.str().c_str());
}
CHECK_EQ(0, strcmp(expected, os.str().c_str()));
}
......@@ -269,6 +272,7 @@ void TestRegExpParser(bool lookbehind) {
CheckParseEq("(a)\\1", "(: (^ 'a') (<- 1))");
CheckParseEq("(a\\1)", "(^ 'a')");
CheckParseEq("(\\1a)", "(^ 'a')");
CheckParseEq("(\\2)(\\1)", "(: (^ (<- 2)) (^ (<- 1)))");
CheckParseEq("(?=a)?a", "'a'");
CheckParseEq("(?=a){0,10}a", "'a'");
CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
......@@ -375,8 +379,8 @@ void TestRegExpParser(bool lookbehind) {
CHECK_MIN_MAX("(?:ab)|cde", 2, 3);
CHECK_MIN_MAX("(ab)", 2, 2);
CHECK_MIN_MAX("(ab|cde)", 2, 3);
CHECK_MIN_MAX("(ab)\\1", 2, 4);
CHECK_MIN_MAX("(ab|cde)\\1", 2, 6);
CHECK_MIN_MAX("(ab)\\1", 2, RegExpTree::kInfinity);
CHECK_MIN_MAX("(ab|cde)\\1", 2, RegExpTree::kInfinity);
CHECK_MIN_MAX("(?:ab)?", 0, 2);
CHECK_MIN_MAX("(?:ab)*", 0, RegExpTree::kInfinity);
CHECK_MIN_MAX("(?:ab)+", 2, RegExpTree::kInfinity);
......
......@@ -157,3 +157,9 @@ assertEquals(["abc", "abc"], /(abc\1)/i.exec("abc\u1234"));
var oob_subject = "abcdefghijklmnabcdefghijklmn".substr(14);
assertNull(oob_subject.match(/(?=(abcdefghijklmn))(?<=\1)a/i));
assertNull(oob_subject.match(/(?=(abcdefghijklmn))(?<=\1)a/));
// Mutual recursive capture/back references
assertEquals(["cacb", "a", ""], /(?<=a(.\2)b(\1)).{4}/.exec("aabcacbc"));
assertEquals(["b", "ac", "ac"], /(?<=a(\2)b(..\1))b/.exec("aacbacb"));
assertEquals(["x", "aa"], /(?<=(?:\1b)(aa))./.exec("aabaax"));
assertEquals(["x", "aa"], /(?<=(?:\1|b)(aa))./.exec("aaaax"));
......@@ -722,3 +722,10 @@ assertThrows("RegExp.prototype.toString.call(true)", TypeError);
assertThrows("RegExp.prototype.toString.call([])", TypeError);
assertThrows("RegExp.prototype.toString.call({})", TypeError);
assertThrows("RegExp.prototype.toString.call(function(){})", TypeError);
// Test mutually recursive capture and backreferences.
assertEquals(["b", "", ""], /(\2)b(\1)/.exec("aba"));
assertEquals(["a", "", ""], /(\2).(\1)/.exec("aba"));
assertEquals(["aba", "a", "a"], /(.\2).(\1)/.exec("aba"));
assertEquals(["acbc", "c", "c"], /a(.\2)b(\1)$/.exec("acbc"));
assertEquals(["acbc", "c", "c"], /a(.\2)b(\1)/.exec("aabcacbc"));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment