Commit baf7ebd6 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Fix assert triggered in fast/regex/pcre-test-4.html We were not filtering out

all the nodes that had non-ASCII characters.  That has been fixed, but because
of the protection against over-deep recursion when filtering it is wrong to
assert that all nodes were filtered.  This change therefore also makes sure we
can cope with non-filtered nodes by adding back some code removed in
https://chromiumcodereview.appspot.com/10174017/
Review URL: https://chromiumcodereview.appspot.com/10358008

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@11487 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 10b0eee0
...@@ -2426,9 +2426,15 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, ...@@ -2426,9 +2426,15 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
QuickCheckDetails::Position* pos = QuickCheckDetails::Position* pos =
details->positions(characters_filled_in); details->positions(characters_filled_in);
uc16 c = quarks[i]; uc16 c = quarks[i];
// We should already have filtered out nodes that have non-ASCII if (c > char_mask) {
// characters if we are matching against an ASCII string. // If we expect a non-ASCII character from an ASCII string,
ASSERT(c <= char_mask); // there is no way we can match. Not even case independent
// matching can turn an ASCII character into non-ASCII or
// vice versa.
details->set_cannot_match();
pos->determines_perfectly = false;
return;
}
if (compiler->ignore_case()) { if (compiler->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(), int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
...@@ -2490,9 +2496,11 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, ...@@ -2490,9 +2496,11 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
int first_range = 0; int first_range = 0;
while (ranges->at(first_range).from() > char_mask) { while (ranges->at(first_range).from() > char_mask) {
first_range++; first_range++;
// We should already have filtered out nodes that cannot match if (first_range == ranges->length()) {
// so the first range should be a valid range. details->set_cannot_match();
ASSERT(first_range != ranges->length()); pos->determines_perfectly = false;
return;
}
} }
CharacterRange range = ranges->at(first_range); CharacterRange range = ranges->at(first_range);
uc16 from = range.from(); uc16 from = range.from();
...@@ -2540,10 +2548,12 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, ...@@ -2540,10 +2548,12 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
} }
} }
ASSERT(characters_filled_in != details->characters()); ASSERT(characters_filled_in != details->characters());
on_success()-> GetQuickCheckDetails(details, if (!details->cannot_match()) {
compiler, on_success()-> GetQuickCheckDetails(details,
characters_filled_in, compiler,
true); characters_filled_in,
true);
}
} }
...@@ -2687,12 +2697,14 @@ RegExpNode* LoopChoiceNode::FilterASCII(int depth) { ...@@ -2687,12 +2697,14 @@ RegExpNode* LoopChoiceNode::FilterASCII(int depth) {
if (info()->replacement_calculated) return replacement(); if (info()->replacement_calculated) return replacement();
if (depth < 0) return this; if (depth < 0) return this;
if (info()->visited) return this; if (info()->visited) return this;
VisitMarker marker(info()); {
VisitMarker marker(info());
RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1); RegExpNode* continue_replacement = continue_node_->FilterASCII(depth - 1);
// If we can't continue after the loop then there is no sense in doing the // If we can't continue after the loop then there is no sense in doing the
// loop. // loop.
if (continue_replacement == NULL) return set_replacement(NULL); if (continue_replacement == NULL) return set_replacement(NULL);
}
return ChoiceNode::FilterASCII(depth - 1); return ChoiceNode::FilterASCII(depth - 1);
} }
......
...@@ -187,3 +187,28 @@ NoHang(/(a|(((.*)*)*x)ø|(((.*)*)*x)å)/); // 2 out of 3 branches pruned. ...@@ -187,3 +187,28 @@ NoHang(/(a|(((.*)*)*x)ø|(((.*)*)*x)å)/); // 2 out of 3 branches pruned.
var s = "Don't prune based on a repetition of length 0"; var s = "Don't prune based on a repetition of length 0";
assertEquals(null, s.match({1,1}prune/)); assertEquals(null, s.match({1,1}prune/));
assertEquals("prune", (s.match({0,0}prune/)[0])); assertEquals("prune", (s.match({0,0}prune/)[0]));
// Some very deep regexps where FilterASCII gives up in order not to make the
// stack overflow.
var regex6 = /a*\u0100*\w/;
var input0 = "a";
regex6.exec(input0);
var re = "\u0100*\\w";
for (var i = 0; i < 200; i++) re = "a*" + re;
var regex7 = new RegExp(re);
regex7.exec(input0);
var regex8 = new RegExp(re, "i");
regex8.exec(input0);
re = "[\u0100]*\\w";
for (var i = 0; i < 200; i++) re = "a*" + re;
var regex9 = new RegExp(re);
regex9.exec(input0);
var regex10 = new RegExp(re, "i");
regex10.exec(input0);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment