Commit c95ecb1f authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Refix issue 1472. The previous fix worked for the example in the bug

report, but was not general enough to catch all cases.  This is a new
approach.  Includes regression test!
Review URL: http://codereview.chromium.org/7193007

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8318 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent ee59eff1
......@@ -925,26 +925,6 @@ bool RegExpCapture::IsAnchoredAtEnd() {
}
bool RegExpDisjunction::ContainsExpandedQuantifier() {
if (contains_expanded_quantifier_) return true;
int len = alternatives_->length();
for (int i = 0; i < len; i++) {
if (alternatives_->at(i)->ContainsExpandedQuantifier()) return true;
}
return false;
}
bool RegExpAlternative::ContainsExpandedQuantifier() {
if (contains_expanded_quantifier_) return true;
int len = nodes_->length();
for (int i = 0; i < len; i++) {
if (nodes_->at(i)->ContainsExpandedQuantifier()) return true;
}
return false;
}
// Convert regular expression trees to a simple sexp representation.
// This representation should be different from the input grammar
// in as many cases as possible, to make it more difficult for incorrect
......
......@@ -1751,7 +1751,6 @@ class RegExpVisitor BASE_EMBEDDED {
class RegExpTree: public ZoneObject {
public:
static const int kInfinity = kMaxInt;
RegExpTree() : contains_expanded_quantifier_(false) { }
virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
......@@ -1761,12 +1760,6 @@ class RegExpTree: public ZoneObject {
virtual bool IsAnchoredAtEnd() { return false; }
virtual int min_match() = 0;
virtual int max_match() = 0;
virtual bool ContainsExpandedQuantifier() {
return contains_expanded_quantifier_;
}
void set_contains_expanded_quantifier(bool value) {
contains_expanded_quantifier_ = value;
}
// Returns the interval of registers used for captures within this
// expression.
virtual Interval CaptureRegisters() { return Interval::Empty(); }
......@@ -1777,9 +1770,6 @@ class RegExpTree: public ZoneObject {
virtual bool Is##Name();
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ASTYPE)
#undef MAKE_ASTYPE
protected:
bool contains_expanded_quantifier_;
};
......@@ -1796,7 +1786,6 @@ class RegExpDisjunction: public RegExpTree {
virtual bool IsAnchoredAtEnd();
virtual int min_match() { return min_match_; }
virtual int max_match() { return max_match_; }
virtual bool ContainsExpandedQuantifier();
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
private:
ZoneList<RegExpTree*>* alternatives_;
......@@ -1818,7 +1807,6 @@ class RegExpAlternative: public RegExpTree {
virtual bool IsAnchoredAtEnd();
virtual int min_match() { return min_match_; }
virtual int max_match() { return max_match_; }
virtual bool ContainsExpandedQuantifier();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
ZoneList<RegExpTree*>* nodes_;
......@@ -1968,8 +1956,7 @@ class RegExpQuantifier: public RegExpTree {
min_(min),
max_(max),
min_match_(min * body->min_match()),
type_(type),
contains_expanded_quantifier_(false) {
type_(type) {
if (max > 0 && body->max_match() > kInfinity / max) {
max_match_ = kInfinity;
} else {
......@@ -1991,9 +1978,6 @@ class RegExpQuantifier: public RegExpTree {
virtual bool IsQuantifier();
virtual int min_match() { return min_match_; }
virtual int max_match() { return max_match_; }
virtual bool ContainsExpandedQuantifier() {
return contains_expanded_quantifier_ || body_->ContainsExpandedQuantifier();
}
int min() { return min_; }
int max() { return max_; }
bool is_possessive() { return type_ == POSSESSIVE; }
......@@ -2008,7 +1992,6 @@ class RegExpQuantifier: public RegExpTree {
int min_match_;
int max_match_;
Type type_;
bool contains_expanded_quantifier_;
};
......@@ -2030,9 +2013,6 @@ class RegExpCapture: public RegExpTree {
virtual bool IsCapture();
virtual int min_match() { return body_->min_match(); }
virtual int max_match() { return body_->max_match(); }
virtual bool ContainsExpandedQuantifier() {
return contains_expanded_quantifier_ || body_->ContainsExpandedQuantifier();
}
RegExpTree* body() { return body_; }
int index() { return index_; }
static int StartRegister(int index) { return index * 2; }
......@@ -2064,9 +2044,6 @@ class RegExpLookahead: public RegExpTree {
virtual bool IsAnchoredAtStart();
virtual int min_match() { return 0; }
virtual int max_match() { return 0; }
virtual bool ContainsExpandedQuantifier() {
return contains_expanded_quantifier_ || body_->ContainsExpandedQuantifier();
}
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
int capture_count() { return capture_count_; }
......
......@@ -810,6 +810,11 @@ class RegExpCompiler {
inline bool ignore_case() { return ignore_case_; }
inline bool ascii() { return ascii_; }
int current_expansion_factor() { return current_expansion_factor_; }
void set_current_expansion_factor(int value) {
current_expansion_factor_ = value;
}
static const int kNoRegister = -1;
private:
......@@ -821,6 +826,7 @@ class RegExpCompiler {
bool ignore_case_;
bool ascii_;
bool reg_exp_too_big_;
int current_expansion_factor_;
};
......@@ -848,7 +854,8 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii)
recursion_depth_(0),
ignore_case_(ignore_case),
ascii_(ascii),
reg_exp_too_big_(false) {
reg_exp_too_big_(false),
current_expansion_factor_(1) {
accept_ = new EndNode(EndNode::ACCEPT);
ASSERT(next_register_ - 1 <= RegExpMacroAssembler::kMaxRegister);
}
......@@ -3730,6 +3737,44 @@ RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
}
// Scoped object to keep track of how much we unroll quantifier loops in the
// regexp graph generator.
class RegExpExpansionLimiter {
public:
static const int kMaxExpansionFactor = 6;
RegExpExpansionLimiter(RegExpCompiler* compiler, int factor)
: compiler_(compiler),
saved_expansion_factor_(compiler->current_expansion_factor()),
ok_to_expand_(saved_expansion_factor_ <= kMaxExpansionFactor) {
ASSERT(factor > 0);
if (ok_to_expand_) {
if (factor > kMaxExpansionFactor) {
// Avoid integer overflow of the current expansion factor.
ok_to_expand_ = false;
compiler->set_current_expansion_factor(kMaxExpansionFactor + 1);
} else {
int new_factor = saved_expansion_factor_ * factor;
ok_to_expand_ = (new_factor <= kMaxExpansionFactor);
compiler->set_current_expansion_factor(new_factor);
}
}
}
~RegExpExpansionLimiter() {
compiler_->set_current_expansion_factor(saved_expansion_factor_);
}
bool ok_to_expand() { return ok_to_expand_; }
private:
RegExpCompiler* compiler_;
int saved_expansion_factor_;
bool ok_to_expand_;
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
};
RegExpNode* RegExpQuantifier::ToNode(int min,
int max,
bool is_greedy,
......@@ -3766,45 +3811,49 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
bool needs_capture_clearing = !capture_registers.is_empty();
if (body_can_be_empty) {
body_start_reg = compiler->AllocateRegister();
} else if (FLAG_regexp_optimization &&
!body->ContainsExpandedQuantifier() &&
!needs_capture_clearing) {
} else if (FLAG_regexp_optimization && !needs_capture_clearing) {
// Only unroll if there are no captures and the body can't be
// empty.
if (min > 0 && min <= kMaxUnrolledMinMatches) {
int new_max = (max == kInfinity) ? max : max - min;
// Recurse once to get the loop or optional matches after the fixed ones.
RegExpNode* answer = ToNode(
0, new_max, is_greedy, body, compiler, on_success, true);
// Unroll the forced matches from 0 to min. This can cause chains of
// TextNodes (which the parser does not generate). These should be
// combined if it turns out they hinder good code generation.
for (int i = 0; i < min; i++) {
answer = body->ToNode(compiler, answer);
{
RegExpExpansionLimiter limiter(
compiler, min + ((max != min) ? 1 : 0));
if (min > 0 && min <= kMaxUnrolledMinMatches && limiter.ok_to_expand()) {
int new_max = (max == kInfinity) ? max : max - min;
// Recurse once to get the loop or optional matches after the fixed
// ones.
RegExpNode* answer = ToNode(
0, new_max, is_greedy, body, compiler, on_success, true);
// Unroll the forced matches from 0 to min. This can cause chains of
// TextNodes (which the parser does not generate). These should be
// combined if it turns out they hinder good code generation.
for (int i = 0; i < min; i++) {
answer = body->ToNode(compiler, answer);
}
return answer;
}
if (min > 1) body->set_contains_expanded_quantifier(true);
return answer;
}
if (max <= kMaxUnrolledMaxMatches) {
ASSERT(min == 0);
// Unroll the optional matches up to max.
RegExpNode* answer = on_success;
for (int i = 0; i < max; i++) {
ChoiceNode* alternation = new ChoiceNode(2);
if (is_greedy) {
alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
answer)));
alternation->AddAlternative(GuardedAlternative(on_success));
} else {
alternation->AddAlternative(GuardedAlternative(on_success));
alternation->AddAlternative(GuardedAlternative(body->ToNode(compiler,
answer)));
if (max <= kMaxUnrolledMaxMatches && min == 0) {
ASSERT(max > 0); // Due to the 'if' above.
RegExpExpansionLimiter limiter(compiler, max);
if (limiter.ok_to_expand()) {
// Unroll the optional matches up to max.
RegExpNode* answer = on_success;
for (int i = 0; i < max; i++) {
ChoiceNode* alternation = new ChoiceNode(2);
if (is_greedy) {
alternation->AddAlternative(
GuardedAlternative(body->ToNode(compiler, answer)));
alternation->AddAlternative(GuardedAlternative(on_success));
} else {
alternation->AddAlternative(GuardedAlternative(on_success));
alternation->AddAlternative(
GuardedAlternative(body->ToNode(compiler, answer)));
}
answer = alternation;
if (not_at_start) alternation->set_not_at_start();
}
answer = alternation;
if (not_at_start) alternation->set_not_at_start();
return answer;
}
if (max > 1) body->set_contains_expanded_quantifier(true);
return answer;
}
}
bool has_min = min > 0;
......@@ -4130,12 +4179,6 @@ void CharacterRange::Split(ZoneList<CharacterRange>* base,
}
static void AddUncanonicals(Isolate* isolate,
ZoneList<CharacterRange>* ranges,
int bottom,
int top);
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
bool is_ascii) {
Isolate* isolate = Isolate::Current();
......@@ -4296,101 +4339,6 @@ SetRelation CharacterRange::WordCharacterRelation(
}
static void AddUncanonicals(Isolate* isolate,
ZoneList<CharacterRange>* ranges,
int bottom,
int top) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
// Zones with no case mappings. There is a DEBUG-mode loop to assert that
// this table is correct.
// 0x0600 - 0x0fff
// 0x1100 - 0x1cff
// 0x2000 - 0x20ff
// 0x2200 - 0x23ff
// 0x2500 - 0x2bff
// 0x2e00 - 0xa5ff
// 0xa800 - 0xfaff
// 0xfc00 - 0xfeff
const int boundary_count = 18;
int boundaries[] = {
0x600, 0x1000, 0x1100, 0x1d00, 0x2000, 0x2100, 0x2200, 0x2400, 0x2500,
0x2c00, 0x2e00, 0xa600, 0xa800, 0xfb00, 0xfc00, 0xff00};
// Special ASCII rule from spec can save us some work here.
if (bottom == 0x80 && top == 0xffff) return;
if (top <= boundaries[0]) {
CharacterRange range(bottom, top);
range.AddCaseEquivalents(ranges, false);
return;
}
// Split up very large ranges. This helps remove ranges where there are no
// case mappings.
for (int i = 0; i < boundary_count; i++) {
if (bottom < boundaries[i] && top >= boundaries[i]) {
AddUncanonicals(isolate, ranges, bottom, boundaries[i] - 1);
AddUncanonicals(isolate, ranges, boundaries[i], top);
return;
}
}
// If we are completely in a zone with no case mappings then we are done.
for (int i = 0; i < boundary_count; i += 2) {
if (bottom >= boundaries[i] && top < boundaries[i + 1]) {
#ifdef DEBUG
for (int j = bottom; j <= top; j++) {
unsigned current_char = j;
int length = isolate->jsregexp_uncanonicalize()->get(current_char,
'\0', chars);
for (int k = 0; k < length; k++) {
ASSERT(chars[k] == current_char);
}
}
#endif
return;
}
}
// Step through the range finding equivalent characters.
ZoneList<unibrow::uchar> *characters = new ZoneList<unibrow::uchar>(100);
for (int i = bottom; i <= top; i++) {
int length = isolate->jsregexp_uncanonicalize()->get(i, '\0', chars);
for (int j = 0; j < length; j++) {
uc32 chr = chars[j];
if (chr != i && (chr < bottom || chr > top)) {
characters->Add(chr);
}
}
}
// Step through the equivalent characters finding simple ranges and
// adding ranges to the character class.
if (characters->length() > 0) {
int new_from = characters->at(0);
int new_to = new_from;
for (int i = 1; i < characters->length(); i++) {
int chr = characters->at(i);
if (chr == new_to + 1) {
new_to++;
} else {
if (new_to == new_from) {
ranges->Add(CharacterRange::Singleton(new_from));
} else {
ranges->Add(CharacterRange(new_from, new_to));
}
new_from = new_to = chr;
}
}
if (new_to == new_from) {
ranges->Add(CharacterRange::Singleton(new_from));
} else {
ranges->Add(CharacterRange(new_from, new_to));
}
}
}
ZoneList<CharacterRange>* CharacterSet::ranges() {
if (ranges_ == NULL) {
ranges_ = new ZoneList<CharacterRange>(2);
......
// Copyright 2011 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Avoid excessive expansions of regexp repetitions inside regexp repetitions.
// Some of these caused stack overflows, others cause out-of-memory.
var r1 = /(?:a(?:b(?:c(?:d(?:e(?:f(?:g(?:h(?:i(?:j(?:k(?:l(?:m(?:n(?:o(?:p(?:q(?:r(?:s(?:t(?:u(?:v(?:w(?:x(?:y(?:z(?:FooBar)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)+)/;
"xxx".match(r1);
var r2 = /(?:a(?:b(?:c(?:d(?:e(?:f(?:g(?:h(?:i(?:j(?:k(?:l(?:FooBar){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}){0,2}/;
"xxx".match(r2);
var r3 = /(?:a(?:b(?:c(?:d(?:e(?:f(?:g(?:h(?:i(?:j(?:k(?:l(?:FooBar){2}){2}){2}){2}){2}){2}){2}){2}){2}){2}){2}){2}){2}/;
"xxx".match(r3);
var r4 = /(?:a(?:b(?:c(?:d(?:e(?:f(?:g(?:h(?:i(?:FooBar){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}){3,6}/;
"xxx".match(r4);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment