Commit a13d748e authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Regexp: Refactor ChoiceNode::Emit

BUG=
R=yangguo@chromium.org

Review URL: https://codereview.chromium.org/537913002

git-svn-id: https://v8.googlecode.com/svn/branches/bleeding_edge@23694 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 8f59d82f
......@@ -3493,6 +3493,7 @@ void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
if (trace->stop_node() == this) {
// Back edge of greedy optimized loop node graph.
int text_length =
GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
DCHECK(text_length != kNodeIsTooComplexForGreedyLoops);
......@@ -3759,13 +3760,13 @@ int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
// See comment above on the implementation of GetSkipTable.
bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
const int kSize = RegExpMacroAssembler::kTableSize;
int min_lookahead = 0;
int max_lookahead = 0;
if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;
if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
bool found_single_character = false;
int single_character = 0;
......@@ -3789,7 +3790,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
// The mask-compare can probably handle this better.
return false;
return;
}
if (found_single_character) {
......@@ -3806,7 +3807,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
masm->AdvanceCurrentPosition(lookahead_width);
masm->GoTo(&again);
masm->Bind(&cont);
return true;
return;
}
Factory* factory = masm->zone()->isolate()->factory();
......@@ -3822,8 +3823,6 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
masm->AdvanceCurrentPosition(skip_distance);
masm->GoTo(&again);
masm->Bind(&cont);
return true;
}
......@@ -3905,10 +3904,15 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
* \______________/
*/
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
int choice_count = alternatives_->length();
GreedyLoopState::GreedyLoopState(bool not_at_start) {
counter_backtrack_trace_.set_backtrack(&label_);
if (not_at_start) counter_backtrack_trace_.set_at_start(false);
}
void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
#ifdef DEBUG
int choice_count = alternatives_->length();
for (int i = 0; i < choice_count - 1; i++) {
GuardedAlternative alternative = alternatives_->at(i);
ZoneList<Guard*>* guards = alternative.guards();
......@@ -3918,12 +3922,39 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
}
}
#endif
}
void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
Trace* current_trace,
PreloadState* state) {
if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
// Save some time by looking at most one machine word ahead.
state->eats_at_least_ =
EatsAtLeast(compiler->ascii() ? 4 : 2,
kRecursionBudget,
current_trace->at_start() == Trace::FALSE_VALUE);
}
state->preload_characters_ =
CalculatePreloadCharacters(compiler, state->eats_at_least_);
state->preload_is_current_ =
(current_trace->characters_preloaded() == state->preload_characters_);
state->preload_has_checked_bounds_ = state->preload_is_current_;
}
void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
int choice_count = alternatives_->length();
AssertGuardsMentionRegisters(trace);
LimitResult limit_result = LimitVersions(compiler, trace);
if (limit_result == DONE) return;
DCHECK(limit_result == CONTINUE);
int new_flush_budget = trace->flush_budget() / choice_count;
// For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
// other choice nodes we only flush if we are out of code size budget.
if (trace->flush_budget() == 0 && trace->actions() != NULL) {
trace->Flush(compiler, this);
return;
......@@ -3931,143 +3962,216 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RecursionCheck rc(compiler);
Trace* current_trace = trace;
PreloadState preload;
preload.init();
GreedyLoopState greedy_loop_state(not_at_start());
int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
bool greedy_loop = false;
Label greedy_loop_label;
Trace counter_backtrack_trace;
counter_backtrack_trace.set_backtrack(&greedy_loop_label);
if (not_at_start()) counter_backtrack_trace.set_at_start(false);
int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
AlternativeGenerationList alt_gens(choice_count, zone());
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
// Here we have special handling for greedy loops containing only text nodes
// and other simple nodes. These are handled by pushing the current
// position on the stack and then incrementing the current position each
// time around the switch. On backtrack we decrement the current position
// and check it against the pushed value. This avoids pushing backtrack
// information for each iteration of the loop, which could take up a lot of
// space.
greedy_loop = true;
DCHECK(trace->stop_node() == NULL);
macro_assembler->PushCurrentPosition();
current_trace = &counter_backtrack_trace;
Label greedy_match_failed;
Trace greedy_match_trace;
if (not_at_start()) greedy_match_trace.set_at_start(false);
greedy_match_trace.set_backtrack(&greedy_match_failed);
Label loop_label;
macro_assembler->Bind(&loop_label);
greedy_match_trace.set_stop_node(this);
greedy_match_trace.set_loop_label(&loop_label);
alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
macro_assembler->Bind(&greedy_match_failed);
trace = EmitGreedyLoop(compiler,
trace,
&alt_gens,
&preload,
&greedy_loop_state,
text_length);
} else {
// TODO(erikcorry): Delete this. We don't need this label, but it makes us
// match the traces produced pre-cleanup.
Label second_choice;
compiler->macro_assembler()->Bind(&second_choice);
preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
EmitChoices(compiler,
&alt_gens,
0,
trace,
false, // Not greedy loop.
&preload);
}
// At this point we need to generate slow checks for the alternatives where
// the quick check was inlined. We can recognize these because the associated
// label was bound.
int new_flush_budget = trace->flush_budget() / choice_count;
for (int i = 0; i < choice_count - 1; i++) {
AlternativeGeneration* alt_gen = alt_gens.at(i);
Trace new_trace(*trace);
// If there are actions to be flushed we have to limit how many times
// they are flushed. Take the budget of the parent trace and distribute
// it fairly amongst the children.
if (new_trace.actions() != NULL) {
new_trace.set_flush_budget(new_flush_budget);
}
EmitOutOfLineContinuation(compiler,
&new_trace,
alternatives_->at(i),
alt_gen,
preload.preload_characters_,
alt_gens.at(i + 1)->expects_preload);
}
}
Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
Trace* trace,
AlternativeGenerationList* alt_gens,
PreloadState* preload,
GreedyLoopState* greedy_loop_state,
int text_length) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
// Here we have special handling for greedy loops containing only text nodes
// and other simple nodes. These are handled by pushing the current
// position on the stack and then incrementing the current position each
// time around the switch. On backtrack we decrement the current position
// and check it against the pushed value. This avoids pushing backtrack
// information for each iteration of the loop, which could take up a lot of
// space.
DCHECK(trace->stop_node() == NULL);
macro_assembler->PushCurrentPosition();
Label greedy_match_failed;
Trace greedy_match_trace;
if (not_at_start()) greedy_match_trace.set_at_start(false);
greedy_match_trace.set_backtrack(&greedy_match_failed);
Label loop_label;
macro_assembler->Bind(&loop_label);
greedy_match_trace.set_stop_node(this);
greedy_match_trace.set_loop_label(&loop_label);
alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
macro_assembler->Bind(&greedy_match_failed);
Label second_choice; // For use in greedy matches.
macro_assembler->Bind(&second_choice);
int first_normal_choice = greedy_loop ? 1 : 0;
bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
const int kEatsAtLeastNotYetInitialized = -1;
int eats_at_least = kEatsAtLeastNotYetInitialized;
bool skip_was_emitted = false;
if (!greedy_loop && choice_count == 2) {
GuardedAlternative alt1 = alternatives_->at(1);
if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
RegExpNode* eats_anything_node = alt1.node();
if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
this) {
// At this point we know that we are at a non-greedy loop that will eat
// any character one at a time. Any non-anchored regexp has such a
// loop prepended to it in order to find where it starts. We look for
// a pattern of the form ...abc... where we can look 6 characters ahead
// and step forwards 3 if the character is not one of abc. Abc need
// not be atoms, they can be any reasonably limited character class or
// small alternation.
DCHECK(trace->is_trivial()); // This is the case on LoopChoiceNodes.
BoyerMooreLookahead* lookahead = bm_info(not_at_start);
if (lookahead == NULL) {
eats_at_least = Min(kMaxLookaheadForBoyerMoore,
EatsAtLeast(kMaxLookaheadForBoyerMoore,
kRecursionBudget,
not_at_start));
if (eats_at_least >= 1) {
BoyerMooreLookahead* bm =
new(zone()) BoyerMooreLookahead(eats_at_least,
compiler,
zone());
GuardedAlternative alt0 = alternatives_->at(0);
alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
}
} else {
skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
}
}
}
Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
EmitChoices(compiler,
alt_gens,
1,
new_trace,
true, // Is greedy loop.
preload);
macro_assembler->Bind(greedy_loop_state->label());
// If we have unwound to the bottom then backtrack.
macro_assembler->CheckGreedyLoop(trace->backtrack());
// Otherwise try the second priority at an earlier position.
macro_assembler->AdvanceCurrentPosition(-text_length);
macro_assembler->GoTo(&second_choice);
return new_trace;
}
int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
Trace* trace) {
int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
if (alternatives_->length() != 2) return eats_at_least;
GuardedAlternative alt1 = alternatives_->at(1);
if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
return eats_at_least;
}
RegExpNode* eats_anything_node = alt1.node();
if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
return eats_at_least;
}
// Really we should be creating a new trace when we execute this function,
// but there is no need, because the code it generates cannot backtrack, and
// we always arrive here with a trivial trace (since it's the entry to a
// loop. That also implies that there are no preloaded characters, which is
// good, because it means we won't be violating any assumptions by
// overwriting those characters with new load instructions.
DCHECK(trace->is_trivial());
if (eats_at_least == kEatsAtLeastNotYetInitialized) {
// Save some time by looking at most one machine word ahead.
eats_at_least =
EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
// At this point we know that we are at a non-greedy loop that will eat
// any character one at a time. Any non-anchored regexp has such a
// loop prepended to it in order to find where it starts. We look for
// a pattern of the form ...abc... where we can look 6 characters ahead
// and step forwards 3 if the character is not one of abc. Abc need
// not be atoms, they can be any reasonably limited character class or
// small alternation.
BoyerMooreLookahead* bm = bm_info(false);
if (bm == NULL) {
eats_at_least = Min(kMaxLookaheadForBoyerMoore,
EatsAtLeast(kMaxLookaheadForBoyerMoore,
kRecursionBudget,
false));
if (eats_at_least >= 1) {
bm = new(zone()) BoyerMooreLookahead(eats_at_least,
compiler,
zone());
GuardedAlternative alt0 = alternatives_->at(0);
alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false);
}
}
if (bm != NULL) {
bm->EmitSkipInstructions(macro_assembler);
}
int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
return eats_at_least;
}
bool preload_is_current = !skip_was_emitted &&
(current_trace->characters_preloaded() == preload_characters);
bool preload_has_checked_bounds = preload_is_current;
AlternativeGenerationList alt_gens(choice_count, zone());
void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
AlternativeGenerationList* alt_gens,
int first_choice,
Trace* trace,
bool is_greedy_loop,
PreloadState* preload) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
SetUpPreLoad(compiler, trace, preload);
// For now we just call all choices one after the other. The idea ultimately
// is to use the Dispatch table to try only the relevant ones.
for (int i = first_normal_choice; i < choice_count; i++) {
int choice_count = alternatives_->length();
int new_flush_budget = trace->flush_budget() / choice_count;
for (int i = first_choice; i < choice_count; i++) {
bool is_last = i == choice_count - 1;
GuardedAlternative alternative = alternatives_->at(i);
AlternativeGeneration* alt_gen = alt_gens.at(i);
alt_gen->quick_check_details.set_characters(preload_characters);
AlternativeGeneration* alt_gen = alt_gens->at(i);
alt_gen->quick_check_details.set_characters(preload->preload_characters_);
ZoneList<Guard*>* guards = alternative.guards();
int guard_count = (guards == NULL) ? 0 : guards->length();
Trace new_trace(*current_trace);
new_trace.set_characters_preloaded(preload_is_current ?
preload_characters :
Trace new_trace(*trace);
new_trace.set_characters_preloaded(preload->preload_is_current_ ?
preload->preload_characters_ :
0);
if (preload_has_checked_bounds) {
new_trace.set_bound_checked_up_to(preload_characters);
if (preload->preload_has_checked_bounds_) {
new_trace.set_bound_checked_up_to(preload->preload_characters_);
}
new_trace.quick_check_performed()->Clear();
if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
alt_gen->expects_preload = preload_is_current;
alt_gen->expects_preload = preload->preload_is_current_;
bool generate_full_check_inline = false;
if (FLAG_regexp_optimization &&
try_to_emit_quick_check_for_alternative(i) &&
try_to_emit_quick_check_for_alternative(i == 0) &&
alternative.node()->EmitQuickCheck(compiler,
&new_trace,
preload_has_checked_bounds,
preload->preload_has_checked_bounds_,
&alt_gen->possible_success,
&alt_gen->quick_check_details,
i < choice_count - 1)) {
!is_last)) {
// Quick check was generated for this choice.
preload_is_current = true;
preload_has_checked_bounds = true;
preload->preload_is_current_ = true;
preload->preload_has_checked_bounds_ = true;
// On the last choice in the ChoiceNode we generated the quick
// check to fall through on possible success. So now we need to
// generate the full check inline.
if (i == choice_count - 1) {
if (is_last) {
macro_assembler->Bind(&alt_gen->possible_success);
new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
new_trace.set_characters_preloaded(preload_characters);
new_trace.set_bound_checked_up_to(preload_characters);
new_trace.set_characters_preloaded(preload->preload_characters_);
new_trace.set_bound_checked_up_to(preload->preload_characters_);
generate_full_check_inline = true;
}
} else if (alt_gen->quick_check_details.cannot_match()) {
if (i == choice_count - 1 && !greedy_loop) {
if (is_last && !is_greedy_loop) {
macro_assembler->GoTo(trace->backtrack());
}
} // Else just fall through to the next test.
continue;
} else {
// No quick check was generated. Put the full code here.
......@@ -4075,11 +4179,11 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
// previous cases that go here when they fail. There's no reason to
// insist that they preload characters since the slow check we are about
// to generate probably can't use it.
if (i != first_normal_choice) {
if (i != first_choice) {
alt_gen->expects_preload = false;
new_trace.InvalidateCurrentCharacter();
}
if (i < choice_count - 1) {
if (!is_last) {
new_trace.set_backtrack(&alt_gen->after);
}
generate_full_check_inline = true;
......@@ -4092,38 +4196,10 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
GenerateGuard(macro_assembler, guards->at(j), &new_trace);
}
alternative.node()->Emit(compiler, &new_trace);
preload_is_current = false;
preload->preload_is_current_ = false;
}
macro_assembler->Bind(&alt_gen->after);
}
if (greedy_loop) {
macro_assembler->Bind(&greedy_loop_label);
// If we have unwound to the bottom then backtrack.
macro_assembler->CheckGreedyLoop(trace->backtrack());
// Otherwise try the second priority at an earlier position.
macro_assembler->AdvanceCurrentPosition(-text_length);
macro_assembler->GoTo(&second_choice);
}
// At this point we need to generate slow checks for the alternatives where
// the quick check was inlined. We can recognize these because the associated
// label was bound.
for (int i = first_normal_choice; i < choice_count - 1; i++) {
AlternativeGeneration* alt_gen = alt_gens.at(i);
Trace new_trace(*current_trace);
// If there are actions to be flushed we have to limit how many times
// they are flushed. Take the budget of the parent trace and distribute
// it fairly amongst the children.
if (new_trace.actions() != NULL) {
new_trace.set_flush_budget(new_flush_budget);
}
EmitOutOfLineContinuation(compiler,
&new_trace,
alternatives_->at(i),
alt_gen,
preload_characters,
alt_gens.at(i + 1)->expects_preload);
}
}
......
......@@ -445,7 +445,9 @@ class TextElement FINAL BASE_EMBEDDED {
class Trace;
struct PreloadState;
class GreedyLoopState;
class AlternativeGenerationList;
struct NodeInfo {
NodeInfo()
......@@ -1076,7 +1078,9 @@ class ChoiceNode: public RegExpNode {
bool not_at_start() { return not_at_start_; }
void set_not_at_start() { not_at_start_ = true; }
void set_being_calculated(bool b) { being_calculated_ = b; }
virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true;
}
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
protected:
......@@ -1096,6 +1100,23 @@ class ChoiceNode: public RegExpNode {
AlternativeGeneration* alt_gen,
int preload_characters,
bool next_expects_preload);
void SetUpPreLoad(RegExpCompiler* compiler,
Trace* current_trace,
PreloadState* preloads);
void AssertGuardsMentionRegisters(Trace* trace);
int EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler, Trace* trace);
Trace* EmitGreedyLoop(RegExpCompiler* compiler,
Trace* trace,
AlternativeGenerationList* alt_gens,
PreloadState* preloads,
GreedyLoopState* greedy_loop_state,
int text_length);
void EmitChoices(RegExpCompiler* compiler,
AlternativeGenerationList* alt_gens,
int first_choice,
Trace* trace,
bool is_greedy_loop,
PreloadState* preloads);
DispatchTable* table_;
// If true, this node is never checked at the start of the input.
// Allows a new trace to start with at_start() set to false.
......@@ -1131,7 +1152,9 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
// starts by loading enough characters for the alternative that takes fewest
// characters, but on a negative lookahead the negative branch did not take
// part in that calculation (EatsAtLeast) so the assumptions don't hold.
virtual bool try_to_emit_quick_check_for_alternative(int i) { return i != 0; }
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return !is_first;
}
virtual RegExpNode* FilterASCII(int depth, bool ignore_case);
};
......@@ -1142,7 +1165,8 @@ class LoopChoiceNode: public ChoiceNode {
: ChoiceNode(2, zone),
loop_node_(NULL),
continue_node_(NULL),
body_can_be_zero_length_(body_can_be_zero_length) { }
body_can_be_zero_length_(body_can_be_zero_length)
{ }
void AddLoopAlternative(GuardedAlternative alt);
void AddContinueAlternative(GuardedAlternative alt);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
......@@ -1293,7 +1317,7 @@ class BoyerMooreLookahead : public ZoneObject {
void SetRest(int from_map) {
for (int i = from_map; i < length_; i++) SetAll(i);
}
bool EmitSkipInstructions(RegExpMacroAssembler* masm);
void EmitSkipInstructions(RegExpMacroAssembler* masm);
private:
// This is the value obtained by EatsAtLeast. If we do not have at least this
......@@ -1486,6 +1510,31 @@ class Trace {
};
class GreedyLoopState {
public:
explicit GreedyLoopState(bool not_at_start);
Label* label() { return &label_; }
Trace* counter_backtrack_trace() { return &counter_backtrack_trace_; }
private:
Label label_;
Trace counter_backtrack_trace_;
};
struct PreloadState {
static const int kEatsAtLeastNotYetInitialized = -1;
bool preload_is_current_;
bool preload_has_checked_bounds_;
int preload_characters_;
int eats_at_least_;
void init() {
eats_at_least_ = kEatsAtLeastNotYetInitialized;
}
};
class NodeVisitor {
public:
virtual ~NodeVisitor() { }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment