Commit cf1e1b1b authored by lrn@chromium.org's avatar lrn@chromium.org

Trace contains information about whether we know that we are at the start of input.

Choice nodes may know that they are never not at the start of input.
This can remove start_of_input assertions in cases where they are statically known to fail.
The initial loop is unrolled once if the regexp might check for the start of input. Only the first iteration may be at the start, the following loop knows that it isn't.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1217 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 3835e915
...@@ -250,7 +250,13 @@ bool RegExpAssertion::IsAnchored() { ...@@ -250,7 +250,13 @@ bool RegExpAssertion::IsAnchored() {
bool RegExpAlternative::IsAnchored() { bool RegExpAlternative::IsAnchored() {
return this->nodes()->at(0)->IsAnchored(); ZoneList<RegExpTree*>* nodes = this->nodes();
for (int i = 0; i < nodes->length(); i++) {
RegExpTree* node = nodes->at(i);
if (node->IsAnchored()) { return true; }
if (node->max_match() > 0) { return false; }
}
return false;
} }
......
...@@ -1461,7 +1461,8 @@ class RegExpQuantifier: public RegExpTree { ...@@ -1461,7 +1461,8 @@ class RegExpQuantifier: public RegExpTree {
bool is_greedy, bool is_greedy,
RegExpTree* body, RegExpTree* body,
RegExpCompiler* compiler, RegExpCompiler* compiler,
RegExpNode* on_success); RegExpNode* on_success,
bool not_at_start = false);
virtual RegExpQuantifier* AsQuantifier(); virtual RegExpQuantifier* AsQuantifier();
virtual Interval CaptureRegisters(); virtual Interval CaptureRegisters();
virtual bool IsQuantifier(); virtual bool IsQuantifier();
......
...@@ -438,6 +438,7 @@ int FlagList::SetFlagsFromCommandLine(int* argc, ...@@ -438,6 +438,7 @@ int FlagList::SetFlagsFromCommandLine(int* argc,
if (FLAG_help) { if (FLAG_help) {
PrintHelp(); PrintHelp();
exit(0);
} }
// parsed all flags successfully // parsed all flags successfully
return 0; return 0;
......
This diff is collapsed.
...@@ -454,10 +454,6 @@ class Trace; ...@@ -454,10 +454,6 @@ class Trace;
struct NodeInfo { struct NodeInfo {
enum TriBool {
UNKNOWN = -1, FALSE = 0, TRUE = 1
};
NodeInfo() NodeInfo()
: being_analyzed(false), : being_analyzed(false),
been_analyzed(false), been_analyzed(false),
...@@ -544,17 +540,21 @@ class QuickCheckDetails { ...@@ -544,17 +540,21 @@ class QuickCheckDetails {
QuickCheckDetails() QuickCheckDetails()
: characters_(0), : characters_(0),
mask_(0), mask_(0),
value_(0) { } value_(0),
cannot_match_(false) { }
explicit QuickCheckDetails(int characters) explicit QuickCheckDetails(int characters)
: characters_(characters), : characters_(characters),
mask_(0), mask_(0),
value_(0) { } value_(0),
cannot_match_(false) { }
bool Rationalize(bool ascii); bool Rationalize(bool ascii);
// Merge in the information from another branch of an alternation. // Merge in the information from another branch of an alternation.
void Merge(QuickCheckDetails* other, int from_index); void Merge(QuickCheckDetails* other, int from_index);
// Advance the current position by some amount. // Advance the current position by some amount.
void Advance(int by, bool ascii); void Advance(int by, bool ascii);
void Clear(); void Clear();
bool cannot_match() { return cannot_match_; }
void set_cannot_match() { cannot_match_ = true; }
struct Position { struct Position {
Position() : mask(0), value(0), determines_perfectly(false) { } Position() : mask(0), value(0), determines_perfectly(false) { }
uc16 mask; uc16 mask;
...@@ -579,6 +579,9 @@ class QuickCheckDetails { ...@@ -579,6 +579,9 @@ class QuickCheckDetails {
// These values are the condensate of the above array after Rationalize(). // These values are the condensate of the above array after Rationalize().
uint32_t mask_; uint32_t mask_;
uint32_t value_; uint32_t value_;
// If set to true, there is no way this quick check can match at all.
// E.g., if it requires to be at the start of the input, and isn't.
bool cannot_match_;
}; };
...@@ -609,7 +612,8 @@ class RegExpNode: public ZoneObject { ...@@ -609,7 +612,8 @@ class RegExpNode: public ZoneObject {
// A comparison success indicates the node may match. // A comparison success indicates the node may match.
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in) = 0; int characters_filled_in,
bool not_at_start) = 0;
static const int kNodeIsTooComplexForGreedyLoops = -1; static const int kNodeIsTooComplexForGreedyLoops = -1;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; } virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
Label* label() { return &label_; } Label* label() { return &label_; }
...@@ -740,8 +744,10 @@ class ActionNode: public SeqRegExpNode { ...@@ -740,8 +744,10 @@ class ActionNode: public SeqRegExpNode {
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find, int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int filled_in) { int filled_in,
return on_success()->GetQuickCheckDetails(details, compiler, filled_in); bool not_at_start) {
return on_success()->GetQuickCheckDetails(
details, compiler, filled_in, not_at_start);
} }
Type type() { return type_; } Type type() { return type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops. // TODO(erikcorry): We should allow some action nodes in greedy loops.
...@@ -802,7 +808,8 @@ class TextNode: public SeqRegExpNode { ...@@ -802,7 +808,8 @@ class TextNode: public SeqRegExpNode {
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find, int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in); int characters_filled_in,
bool not_at_start);
ZoneList<TextElement>* elements() { return elms_; } ZoneList<TextElement>* elements() { return elms_; }
void MakeCaseIndependent(); void MakeCaseIndependent();
virtual int GreedyLoopTextLength(); virtual int GreedyLoopTextLength();
...@@ -860,9 +867,8 @@ class AssertionNode: public SeqRegExpNode { ...@@ -860,9 +867,8 @@ class AssertionNode: public SeqRegExpNode {
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find, int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int filled_in) { int filled_in,
return on_success()->GetQuickCheckDetails(details, compiler, filled_in); bool not_at_start);
}
virtual AssertionNode* Clone() { return new AssertionNode(*this); } virtual AssertionNode* Clone() { return new AssertionNode(*this); }
AssertionNodeType type() { return type_; } AssertionNodeType type() { return type_; }
private: private:
...@@ -887,7 +893,8 @@ class BackReferenceNode: public SeqRegExpNode { ...@@ -887,7 +893,8 @@ class BackReferenceNode: public SeqRegExpNode {
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find, int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in) { int characters_filled_in,
bool not_at_start) {
return; return;
} }
virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
...@@ -907,7 +914,8 @@ class EndNode: public RegExpNode { ...@@ -907,7 +914,8 @@ class EndNode: public RegExpNode {
virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; } virtual int EatsAtLeast(int still_to_find, int recursion_depth) { return 0; }
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in) { int characters_filled_in,
bool not_at_start) {
// Returning 0 from EatsAtLeast should ensure we never get here. // Returning 0 from EatsAtLeast should ensure we never get here.
UNREACHABLE(); UNREACHABLE();
} }
...@@ -979,6 +987,7 @@ class ChoiceNode: public RegExpNode { ...@@ -979,6 +987,7 @@ class ChoiceNode: public RegExpNode {
explicit ChoiceNode(int expected_size) explicit ChoiceNode(int expected_size)
: alternatives_(new ZoneList<GuardedAlternative>(expected_size)), : alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
table_(NULL), table_(NULL),
not_at_start_(false),
being_calculated_(false) { } being_calculated_(false) { }
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
...@@ -991,10 +1000,13 @@ class ChoiceNode: public RegExpNode { ...@@ -991,10 +1000,13 @@ class ChoiceNode: public RegExpNode {
RegExpNode* ignore_this_node); RegExpNode* ignore_this_node);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in); int characters_filled_in,
bool not_at_start);
virtual ChoiceNode* Clone() { return new ChoiceNode(*this); } virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
bool being_calculated() { return being_calculated_; } bool being_calculated() { return being_calculated_; }
bool not_at_start() { return not_at_start_; }
void set_not_at_start() { not_at_start_ = true; }
void set_being_calculated(bool b) { being_calculated_ = b; } void set_being_calculated(bool b) { being_calculated_ = b; }
virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; } virtual bool try_to_emit_quick_check_for_alternative(int i) { return true; }
...@@ -1016,6 +1028,9 @@ class ChoiceNode: public RegExpNode { ...@@ -1016,6 +1028,9 @@ class ChoiceNode: public RegExpNode {
int preload_characters, int preload_characters,
bool next_expects_preload); bool next_expects_preload);
DispatchTable* table_; DispatchTable* table_;
// If true, this node is never checked at the start of the input.
// Allows a new trace to start with at_start() set to false.
bool not_at_start_;
bool being_calculated_; bool being_calculated_;
}; };
...@@ -1031,7 +1046,8 @@ class NegativeLookaheadChoiceNode: public ChoiceNode { ...@@ -1031,7 +1046,8 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find, int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in); int characters_filled_in,
bool not_at_start);
// For a negative lookahead we don't emit the quick check for the // For a negative lookahead we don't emit the quick check for the
// alternative that is expected to fail. This is because quick check code // alternative that is expected to fail. This is because quick check code
// starts by loading enough characters for the alternative that takes fewest // starts by loading enough characters for the alternative that takes fewest
...@@ -1054,7 +1070,8 @@ class LoopChoiceNode: public ChoiceNode { ...@@ -1054,7 +1070,8 @@ class LoopChoiceNode: public ChoiceNode {
virtual int EatsAtLeast(int still_to_find, int recursion_depth); virtual int EatsAtLeast(int still_to_find, int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details, virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler, RegExpCompiler* compiler,
int characters_filled_in); int characters_filled_in,
bool not_at_start);
virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); } virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
RegExpNode* loop_node() { return loop_node_; } RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; } RegExpNode* continue_node() { return continue_node_; }
...@@ -1088,6 +1105,12 @@ class LoopChoiceNode: public ChoiceNode { ...@@ -1088,6 +1105,12 @@ class LoopChoiceNode: public ChoiceNode {
// where baz has been matched. // where baz has been matched.
class Trace { class Trace {
public: public:
// A value for a property that is either known to be true, know to be false,
// or not known.
enum TriBool {
UNKNOWN = -1, FALSE = 0, TRUE = 1
};
class DeferredAction { class DeferredAction {
public: public:
DeferredAction(ActionNode::Type type, int reg) DeferredAction(ActionNode::Type type, int reg)
...@@ -1150,7 +1173,9 @@ class Trace { ...@@ -1150,7 +1173,9 @@ class Trace {
stop_node_(NULL), stop_node_(NULL),
loop_label_(NULL), loop_label_(NULL),
characters_preloaded_(0), characters_preloaded_(0),
bound_checked_up_to_(0) { } bound_checked_up_to_(0),
at_start_(UNKNOWN) { }
// End the trace. This involves flushing the deferred actions in the trace // End the trace. This involves flushing the deferred actions in the trace
// and pushing a backtrack location onto the backtrack stack. Once this is // and pushing a backtrack location onto the backtrack stack. Once this is
// done we can start a new trace or go to one that has already been // done we can start a new trace or go to one that has already been
...@@ -1174,8 +1199,11 @@ class Trace { ...@@ -1174,8 +1199,11 @@ class Trace {
cp_offset_ == 0 && cp_offset_ == 0 &&
characters_preloaded_ == 0 && characters_preloaded_ == 0 &&
bound_checked_up_to_ == 0 && bound_checked_up_to_ == 0 &&
quick_check_performed_.characters() == 0; quick_check_performed_.characters() == 0 &&
at_start_ == UNKNOWN;
} }
TriBool at_start() { return at_start_; }
void set_at_start(bool at_start) { at_start_ = at_start ? TRUE : FALSE; }
Label* backtrack() { return backtrack_; } Label* backtrack() { return backtrack_; }
Label* loop_label() { return loop_label_; } Label* loop_label() { return loop_label_; }
RegExpNode* stop_node() { return stop_node_; } RegExpNode* stop_node() { return stop_node_; }
...@@ -1223,6 +1251,7 @@ class Trace { ...@@ -1223,6 +1251,7 @@ class Trace {
int characters_preloaded_; int characters_preloaded_;
int bound_checked_up_to_; int bound_checked_up_to_;
QuickCheckDetails quick_check_performed_; QuickCheckDetails quick_check_performed_;
TriBool at_start_;
}; };
...@@ -1305,10 +1334,12 @@ struct RegExpCompileData { ...@@ -1305,10 +1334,12 @@ struct RegExpCompileData {
: tree(NULL), : tree(NULL),
node(NULL), node(NULL),
simple(true), simple(true),
contains_anchor(false),
capture_count(0) { } capture_count(0) { }
RegExpTree* tree; RegExpTree* tree;
RegExpNode* node; RegExpNode* node;
bool simple; bool simple;
bool contains_anchor;
Handle<String> error; Handle<String> error;
int capture_count; int capture_count;
}; };
......
...@@ -532,7 +532,8 @@ class RegExpParser { ...@@ -532,7 +532,8 @@ class RegExpParser {
// Reports whether the pattern might be used as a literal search string. // Reports whether the pattern might be used as a literal search string.
// Only use if the result of the parse is a single atom node. // Only use if the result of the parse is a single atom node.
bool simple(); bool simple();
bool contains_anchor() { return contains_anchor_; }
void set_contains_anchor() { contains_anchor_ = true; }
int captures_started() { return captures_ == NULL ? 0 : captures_->length(); } int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
int position() { return next_pos_ - 1; } int position() { return next_pos_ - 1; }
bool failed() { return failed_; } bool failed() { return failed_; }
...@@ -555,6 +556,7 @@ class RegExpParser { ...@@ -555,6 +556,7 @@ class RegExpParser {
FlatStringReader* in_; FlatStringReader* in_;
Handle<String>* error_; Handle<String>* error_;
bool simple_; bool simple_;
bool contains_anchor_;
ZoneList<RegExpCapture*>* captures_; ZoneList<RegExpCapture*>* captures_;
bool is_scanned_for_captures_; bool is_scanned_for_captures_;
// The capture count is only valid after we have scanned for captures. // The capture count is only valid after we have scanned for captures.
...@@ -3486,6 +3488,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, ...@@ -3486,6 +3488,7 @@ RegExpParser::RegExpParser(FlatStringReader* in,
in_(in), in_(in),
error_(error), error_(error),
simple_(true), simple_(true),
contains_anchor_(false),
captures_(NULL), captures_(NULL),
is_scanned_for_captures_(false), is_scanned_for_captures_(false),
capture_count_(0), capture_count_(0),
...@@ -3603,10 +3606,14 @@ RegExpTree* RegExpParser::ParseDisjunction() { ...@@ -3603,10 +3606,14 @@ RegExpTree* RegExpParser::ParseDisjunction() {
ReportError(CStrVector("Nothing to repeat") CHECK_FAILED); ReportError(CStrVector("Nothing to repeat") CHECK_FAILED);
case '^': { case '^': {
Advance(); Advance();
RegExpAssertion::Type type = if (multiline_) {
multiline_ ? RegExpAssertion::START_OF_LINE : builder.AddAssertion(
RegExpAssertion::START_OF_INPUT; new RegExpAssertion(RegExpAssertion::START_OF_LINE));
builder.AddAssertion(new RegExpAssertion(type)); } else {
builder.AddAssertion(
new RegExpAssertion(RegExpAssertion::START_OF_INPUT));
set_contains_anchor();
}
continue; continue;
} }
case '$': { case '$': {
...@@ -4312,6 +4319,7 @@ bool ParseRegExp(FlatStringReader* input, ...@@ -4312,6 +4319,7 @@ bool ParseRegExp(FlatStringReader* input,
result->tree = tree; result->tree = tree;
int capture_count = parser.captures_started(); int capture_count = parser.captures_started();
result->simple = tree->IsAtom() && parser.simple() && capture_count == 0; result->simple = tree->IsAtom() && parser.simple() && capture_count == 0;
result->contains_anchor = parser.contains_anchor();
result->capture_count = capture_count; result->capture_count = capture_count;
} }
return !parser.failed(); return !parser.failed();
......
...@@ -636,7 +636,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) { ...@@ -636,7 +636,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
__ push(esi); __ push(esi);
__ push(edi); __ push(edi);
__ push(ebx); // Callee-save on MacOS. __ push(ebx); // Callee-save on MacOS.
__ push(Immediate(0)); // Make room for input start minus one __ push(Immediate(0)); // Make room for "input start - 1" constant.
// Check if we have space on the stack for registers. // Check if we have space on the stack for registers.
Label retry_stack_check; Label retry_stack_check;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment