Commit c5c852f6 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Irregexp: Preload more characters when we are not at the

start of the input and some alternations in the disjunction
are anchored.
Review URL: http://codereview.chromium.org/5524006

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5915 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent dd1a7fa2
...@@ -1650,41 +1650,64 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler, ...@@ -1650,41 +1650,64 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
} }
int ActionNode::EatsAtLeast(int still_to_find, int recursion_depth) { int ActionNode::EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input! if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0; // Rewinds input!
return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1); return on_success()->EatsAtLeast(still_to_find,
recursion_depth + 1,
not_at_start);
} }
int AssertionNode::EatsAtLeast(int still_to_find, int recursion_depth) { int AssertionNode::EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1); // If we know we are not at the start and we are asked "how many characters
// will you match if you succeed?" then we can answer anything since false
// implies false. So lets just return the max answer (still_to_find) since
// that won't prevent us from preloading a lot of characters for the other
// branches in the node graph.
if (type() == AT_START && not_at_start) return still_to_find;
return on_success()->EatsAtLeast(still_to_find,
recursion_depth + 1,
not_at_start);
} }
int BackReferenceNode::EatsAtLeast(int still_to_find, int recursion_depth) { int BackReferenceNode::EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
return on_success()->EatsAtLeast(still_to_find, recursion_depth + 1); return on_success()->EatsAtLeast(still_to_find,
recursion_depth + 1,
not_at_start);
} }
int TextNode::EatsAtLeast(int still_to_find, int recursion_depth) { int TextNode::EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start) {
int answer = Length(); int answer = Length();
if (answer >= still_to_find) return answer; if (answer >= still_to_find) return answer;
if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer; if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
// We are not at start after this node so we set the last argument to 'true'.
return answer + on_success()->EatsAtLeast(still_to_find - answer, return answer + on_success()->EatsAtLeast(still_to_find - answer,
recursion_depth + 1); recursion_depth + 1,
true);
} }
int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find, int NegativeLookaheadChoiceNode::EatsAtLeast(int still_to_find,
int recursion_depth) { int recursion_depth,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
// Alternative 0 is the negative lookahead, alternative 1 is what comes // Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards. // afterwards.
RegExpNode* node = alternatives_->at(1).node(); RegExpNode* node = alternatives_->at(1).node();
return node->EatsAtLeast(still_to_find, recursion_depth + 1); return node->EatsAtLeast(still_to_find, recursion_depth + 1, not_at_start);
} }
...@@ -1702,7 +1725,8 @@ void NegativeLookaheadChoiceNode::GetQuickCheckDetails( ...@@ -1702,7 +1725,8 @@ void NegativeLookaheadChoiceNode::GetQuickCheckDetails(
int ChoiceNode::EatsAtLeastHelper(int still_to_find, int ChoiceNode::EatsAtLeastHelper(int still_to_find,
int recursion_depth, int recursion_depth,
RegExpNode* ignore_this_node) { RegExpNode* ignore_this_node,
bool not_at_start) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0; if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
int min = 100; int min = 100;
int choice_count = alternatives_->length(); int choice_count = alternatives_->length();
...@@ -1710,20 +1734,31 @@ int ChoiceNode::EatsAtLeastHelper(int still_to_find, ...@@ -1710,20 +1734,31 @@ int ChoiceNode::EatsAtLeastHelper(int still_to_find,
RegExpNode* node = alternatives_->at(i).node(); RegExpNode* node = alternatives_->at(i).node();
if (node == ignore_this_node) continue; if (node == ignore_this_node) continue;
int node_eats_at_least = node->EatsAtLeast(still_to_find, int node_eats_at_least = node->EatsAtLeast(still_to_find,
recursion_depth + 1); recursion_depth + 1,
not_at_start);
if (node_eats_at_least < min) min = node_eats_at_least; if (node_eats_at_least < min) min = node_eats_at_least;
} }
return min; return min;
} }
int LoopChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) { int LoopChoiceNode::EatsAtLeast(int still_to_find,
return EatsAtLeastHelper(still_to_find, recursion_depth, loop_node_); int recursion_depth,
bool not_at_start) {
return EatsAtLeastHelper(still_to_find,
recursion_depth,
loop_node_,
not_at_start);
} }
int ChoiceNode::EatsAtLeast(int still_to_find, int recursion_depth) { int ChoiceNode::EatsAtLeast(int still_to_find,
return EatsAtLeastHelper(still_to_find, recursion_depth, NULL); int recursion_depth,
bool not_at_start) {
return EatsAtLeastHelper(still_to_find,
recursion_depth,
NULL,
not_at_start);
} }
...@@ -2630,8 +2665,9 @@ void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { ...@@ -2630,8 +2665,9 @@ void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
} }
int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler) { int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
int preload_characters = EatsAtLeast(4, 0); bool not_at_start) {
int preload_characters = EatsAtLeast(4, 0, not_at_start);
if (compiler->macro_assembler()->CanReadUnaligned()) { if (compiler->macro_assembler()->CanReadUnaligned()) {
bool ascii = compiler->ascii(); bool ascii = compiler->ascii();
if (ascii) { if (ascii) {
...@@ -2839,7 +2875,9 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) { ...@@ -2839,7 +2875,9 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
int first_normal_choice = greedy_loop ? 1 : 0; int first_normal_choice = greedy_loop ? 1 : 0;
int preload_characters = CalculatePreloadCharacters(compiler); int preload_characters =
CalculatePreloadCharacters(compiler,
current_trace->at_start() == Trace::FALSE);
bool preload_is_current = bool preload_is_current =
(current_trace->characters_preloaded() == preload_characters); (current_trace->characters_preloaded() == preload_characters);
bool preload_has_checked_bounds = preload_is_current; bool preload_has_checked_bounds = preload_is_current;
......
...@@ -596,8 +596,13 @@ class RegExpNode: public ZoneObject { ...@@ -596,8 +596,13 @@ class RegExpNode: public ZoneObject {
// How many characters must this node consume at a minimum in order to // How many characters must this node consume at a minimum in order to
// succeed. If we have found at least 'still_to_find' characters that // succeed. If we have found at least 'still_to_find' characters that
// must be consumed there is no need to ask any following nodes whether // must be consumed there is no need to ask any following nodes whether
// they are sure to eat any more characters. // they are sure to eat any more characters. The not_at_start argument is
virtual int EatsAtLeast(int still_to_find, int recursion_depth) = 0; // used to indicate that we know we are not at the start of the input. In
// this case anchored branches will always fail and can be ignored when
// determining how many characters are consumed on success.
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start) = 0;
// Emits some quick code that checks whether the preloaded characters match. // Emits some quick code that checks whether the preloaded characters match.
// Falls through on certain failure, jumps to the label on possible success. // Falls through on certain failure, jumps to the label on possible success.
// If the node cannot make a quick check it does nothing and returns false. // If the node cannot make a quick check it does nothing and returns false.
...@@ -765,7 +770,9 @@ class ActionNode: public SeqRegExpNode { ...@@ -765,7 +770,9 @@ class ActionNode: public SeqRegExpNode {
RegExpNode* on_success); RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int filled_in, int filled_in,
...@@ -829,7 +836,9 @@ class TextNode: public SeqRegExpNode { ...@@ -829,7 +836,9 @@ class TextNode: public SeqRegExpNode {
} }
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
...@@ -897,7 +906,9 @@ class AssertionNode: public SeqRegExpNode { ...@@ -897,7 +906,9 @@ class AssertionNode: public SeqRegExpNode {
} }
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int filled_in, int filled_in,
...@@ -925,7 +936,9 @@ class BackReferenceNode: public SeqRegExpNode { ...@@ -925,7 +936,9 @@ class BackReferenceNode: public SeqRegExpNode {
int start_register() { return start_reg_; } int start_register() { return start_reg_; }
int end_register() { return end_reg_; } int end_register() { return end_reg_; }
virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
...@@ -946,7 +959,9 @@ class EndNode: public RegExpNode { ...@@ -946,7 +959,9 @@ class EndNode: public RegExpNode {
explicit EndNode(Action action) : action_(action) { } explicit EndNode(Action action) : action_(action) { }
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start) { return 0; }
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
...@@ -1028,10 +1043,13 @@ class ChoiceNode: public RegExpNode { ...@@ -1028,10 +1043,13 @@ class ChoiceNode: public RegExpNode {
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
DispatchTable* GetTable(bool ignore_case); DispatchTable* GetTable(bool ignore_case);
virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
int EatsAtLeastHelper(int still_to_find, int EatsAtLeastHelper(int still_to_find,
int recursion_depth, int recursion_depth,
RegExpNode* ignore_this_node); RegExpNode* ignore_this_node,
bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
...@@ -1054,7 +1072,7 @@ class ChoiceNode: public RegExpNode { ...@@ -1054,7 +1072,7 @@ class ChoiceNode: public RegExpNode {
void GenerateGuard(RegExpMacroAssembler* macro_assembler, void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard* guard, Guard* guard,
Trace* trace); Trace* trace);
int CalculatePreloadCharacters(RegExpCompiler* compiler); int CalculatePreloadCharacters(RegExpCompiler* compiler, bool not_at_start);
void EmitOutOfLineContinuation(RegExpCompiler* compiler, void EmitOutOfLineContinuation(RegExpCompiler* compiler,
Trace* trace, Trace* trace,
GuardedAlternative alternative, GuardedAlternative alternative,
...@@ -1077,7 +1095,9 @@ class NegativeLookaheadChoiceNode: public ChoiceNode { ...@@ -1077,7 +1095,9 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
AddAlternative(this_must_fail); AddAlternative(this_must_fail);
AddAlternative(then_do_this); AddAlternative(then_do_this);
} }
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
...@@ -1102,7 +1122,9 @@ class LoopChoiceNode: public ChoiceNode { ...@@ -1102,7 +1122,9 @@ class LoopChoiceNode: public ChoiceNode {
void AddLoopAlternative(GuardedAlternative alt); void AddLoopAlternative(GuardedAlternative alt);
void AddContinueAlternative(GuardedAlternative alt); void AddContinueAlternative(GuardedAlternative alt);
virtual void Emit(RegExpCompiler* compiler, Trace* trace); virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
bool not_at_start);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in, int characters_filled_in,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment