Commit ba09ec5e authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Irregexp:

* Facility for generating a node several ways.  This allows
  code to be generated for a node knowing where it is trying
  to match relative to the 'current position' and it allows
  code to be generated that knows where to backtrack to.  Both
  allow dramatic reductions in the amount of popping and pushing
  on the stack and the number of indirect jumps.
* Generate special backtracking for greedy quantifiers on
  constant-length atoms.  This allows .* to run in constant
  space relative to input string size.
* When we are checking a long sequence of characters or character
  classes in the input then we do them right to left and only the
  first (rightmost) needs to check for end-of-string.
* Record the pattern in the profile instead of just <CompiledRegExp>
* Nodes no longer contain an on_failure_ node.  This was only used
  for lookaheads and they are now handled with a choice node instead.
Review URL: http://codereview.chromium.org/12900

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@930 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent f306b978
...@@ -1216,8 +1216,7 @@ class RegExpTree: public ZoneObject { ...@@ -1216,8 +1216,7 @@ class RegExpTree: public ZoneObject {
virtual ~RegExpTree() { } virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) = 0;
RegExpNode* on_failure) = 0;
virtual bool IsTextElement() { return false; } virtual bool IsTextElement() { return false; }
virtual void AppendToText(RegExpText* text); virtual void AppendToText(RegExpText* text);
SmartPointer<const char> ToString(); SmartPointer<const char> ToString();
...@@ -1235,8 +1234,7 @@ class RegExpDisjunction: public RegExpTree { ...@@ -1235,8 +1234,7 @@ class RegExpDisjunction: public RegExpTree {
: alternatives_(alternatives) { } : alternatives_(alternatives) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpDisjunction* AsDisjunction(); virtual RegExpDisjunction* AsDisjunction();
virtual bool IsDisjunction(); virtual bool IsDisjunction();
ZoneList<RegExpTree*>* alternatives() { return alternatives_; } ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
...@@ -1250,8 +1248,7 @@ class RegExpAlternative: public RegExpTree { ...@@ -1250,8 +1248,7 @@ class RegExpAlternative: public RegExpTree {
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { } explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpAlternative* AsAlternative(); virtual RegExpAlternative* AsAlternative();
virtual bool IsAlternative(); virtual bool IsAlternative();
ZoneList<RegExpTree*>* nodes() { return nodes_; } ZoneList<RegExpTree*>* nodes() { return nodes_; }
...@@ -1265,8 +1262,7 @@ class RegExpText: public RegExpTree { ...@@ -1265,8 +1262,7 @@ class RegExpText: public RegExpTree {
RegExpText() : elements_(2) { } RegExpText() : elements_(2) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpText* AsText(); virtual RegExpText* AsText();
virtual bool IsText(); virtual bool IsText();
virtual bool IsTextElement() { return true; } virtual bool IsTextElement() { return true; }
...@@ -1291,8 +1287,7 @@ class RegExpAssertion: public RegExpTree { ...@@ -1291,8 +1287,7 @@ class RegExpAssertion: public RegExpTree {
explicit RegExpAssertion(Type type) : type_(type) { } explicit RegExpAssertion(Type type) : type_(type) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpAssertion* AsAssertion(); virtual RegExpAssertion* AsAssertion();
virtual bool IsAssertion(); virtual bool IsAssertion();
Type type() { return type_; } Type type() { return type_; }
...@@ -1313,8 +1308,7 @@ class RegExpCharacterClass: public RegExpTree { ...@@ -1313,8 +1308,7 @@ class RegExpCharacterClass: public RegExpTree {
} }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpCharacterClass* AsCharacterClass(); virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass(); virtual bool IsCharacterClass();
virtual bool IsTextElement() { return true; } virtual bool IsTextElement() { return true; }
...@@ -1332,8 +1326,7 @@ class RegExpAtom: public RegExpTree { ...@@ -1332,8 +1326,7 @@ class RegExpAtom: public RegExpTree {
explicit RegExpAtom(Vector<const uc16> data) : data_(data) { } explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpAtom* AsAtom(); virtual RegExpAtom* AsAtom();
virtual bool IsAtom(); virtual bool IsAtom();
virtual bool IsTextElement() { return true; } virtual bool IsTextElement() { return true; }
...@@ -1353,15 +1346,13 @@ class RegExpQuantifier: public RegExpTree { ...@@ -1353,15 +1346,13 @@ class RegExpQuantifier: public RegExpTree {
body_(body) { } body_(body) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
static RegExpNode* ToNode(int min, static RegExpNode* ToNode(int min,
int max, int max,
bool is_greedy, bool is_greedy,
RegExpTree* body, RegExpTree* body,
RegExpCompiler* compiler, RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpQuantifier* AsQuantifier(); virtual RegExpQuantifier* AsQuantifier();
virtual bool IsQuantifier(); virtual bool IsQuantifier();
int min() { return min_; } int min() { return min_; }
...@@ -1391,13 +1382,11 @@ class RegExpCapture: public RegExpTree { ...@@ -1391,13 +1382,11 @@ class RegExpCapture: public RegExpTree {
: body_(body), index_(index), available_(CAPTURE_AVAILABLE) { } : body_(body), index_(index), available_(CAPTURE_AVAILABLE) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
static RegExpNode* ToNode(RegExpTree* body, static RegExpNode* ToNode(RegExpTree* body,
int index, int index,
RegExpCompiler* compiler, RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpCapture* AsCapture(); virtual RegExpCapture* AsCapture();
virtual bool IsCapture(); virtual bool IsCapture();
RegExpTree* body() { return body_; } RegExpTree* body() { return body_; }
...@@ -1422,8 +1411,7 @@ class RegExpLookahead: public RegExpTree { ...@@ -1422,8 +1411,7 @@ class RegExpLookahead: public RegExpTree {
is_positive_(is_positive) { } is_positive_(is_positive) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpLookahead* AsLookahead(); virtual RegExpLookahead* AsLookahead();
virtual bool IsLookahead(); virtual bool IsLookahead();
RegExpTree* body() { return body_; } RegExpTree* body() { return body_; }
...@@ -1440,8 +1428,7 @@ class RegExpBackReference: public RegExpTree { ...@@ -1440,8 +1428,7 @@ class RegExpBackReference: public RegExpTree {
: capture_(capture) { } : capture_(capture) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpBackReference* AsBackReference(); virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference(); virtual bool IsBackReference();
int index() { return capture_->index(); } int index() { return capture_->index(); }
...@@ -1456,8 +1443,7 @@ class RegExpEmpty: public RegExpTree { ...@@ -1456,8 +1443,7 @@ class RegExpEmpty: public RegExpTree {
RegExpEmpty() { } RegExpEmpty() { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpEmpty* AsEmpty(); virtual RegExpEmpty* AsEmpty();
virtual bool IsEmpty(); virtual bool IsEmpty();
static RegExpEmpty* GetInstance() { return &kInstance; } static RegExpEmpty* GetInstance() { return &kInstance; }
......
...@@ -50,22 +50,24 @@ V(SUCCEED, 14, 1) /* succeed */ \ ...@@ -50,22 +50,24 @@ V(SUCCEED, 14, 1) /* succeed */ \
V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \ V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \
V(GOTO, 16, 5) /* goto addr32 */ \ V(GOTO, 16, 5) /* goto addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \ V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
V(CHECK_CHAR, 18, 7) /* check_char uc16 addr32 */ \ V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32 */ \
V(CHECK_NOT_CHAR, 19, 7) /* check_not_char uc16 addr32 */ \ V(CHECK_CHAR, 19, 7) /* check_char uc16 addr32 */ \
V(OR_CHECK_NOT_CHAR, 20, 9) /* or_check_not_char uc16 uc16 addr32 */ \ V(CHECK_NOT_CHAR, 20, 7) /* check_not_char uc16 addr32 */ \
V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \ V(OR_CHECK_NOT_CHAR, 21, 9) /* or_check_not_char uc16 uc16 addr32 */ \
V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \ V(MINUS_OR_CHECK_NOT_CHAR, 22, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \ V(CHECK_LT, 23, 7) /* check_lt uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \ V(CHECK_GT, 24, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 25, 6) /* check_not_back_ref_no_case captu... */ \ V(CHECK_NOT_BACK_REF, 25, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_REGS_EQUAL, 26, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \ V(CHECK_NOT_BACK_REF_NO_CASE, 26, 6) /* check_not_back_ref_no_case captu... */ \
V(LOOKUP_MAP1, 27, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \ V(CHECK_NOT_REGS_EQUAL, 27, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP2, 28, 99) /* l_map2 start16 half_nibble_map_addr32* */ \ V(LOOKUP_MAP1, 28, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP8, 29, 99) /* l_map8 start16 byte_map addr32* */ \ V(LOOKUP_MAP2, 29, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_HI_MAP8, 30, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \ V(LOOKUP_MAP8, 30, 99) /* l_map8 start16 byte_map addr32* */ \
V(CHECK_REGISTER_LT, 31, 8) /* check_reg_lt register_index value16 addr32 */ \ V(LOOKUP_HI_MAP8, 31, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_GE, 32, 8) /* check_reg_ge register_index value16 addr32 */ \ V(CHECK_REGISTER_LT, 32, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 33, 5) /* check_not_at_start addr32 */ V(CHECK_REGISTER_GE, 33, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 34, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 35, 5) /* check_greedy addr32 */
#define DECLARE_BYTECODES(name, code, length) \ #define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code; static const int BC_##name = code;
......
...@@ -191,6 +191,15 @@ static bool RawMatch(const byte* code_base, ...@@ -191,6 +191,15 @@ static bool RawMatch(const byte* code_base,
BYTECODE(GOTO) BYTECODE(GOTO)
pc = code_base + Load32(pc + 1); pc = code_base + Load32(pc + 1);
break; break;
BYTECODE(CHECK_GREEDY)
if (current == backtrack_sp[-1]) {
backtrack_sp--;
backtrack_stack_space++;
pc = code_base + Load32(pc + 1);
} else {
pc += BC_CHECK_GREEDY_LENGTH;
}
break;
BYTECODE(LOAD_CURRENT_CHAR) { BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + Load32(pc + 1); int pos = current + Load32(pc + 1);
if (pos >= subject.length()) { if (pos >= subject.length()) {
...@@ -201,6 +210,12 @@ static bool RawMatch(const byte* code_base, ...@@ -201,6 +210,12 @@ static bool RawMatch(const byte* code_base,
} }
break; break;
} }
BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
int pos = current + Load32(pc + 1);
current_char = subject[pos];
pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
break;
}
BYTECODE(CHECK_CHAR) { BYTECODE(CHECK_CHAR) {
int c = Load16(pc + 1); int c = Load16(pc + 1);
if (c == current_char) { if (c == current_char) {
......
...@@ -253,11 +253,6 @@ static void DoForEach(Node* node, Callback* callback) { ...@@ -253,11 +253,6 @@ static void DoForEach(Node* node, Callback* callback) {
} }
void RegExpNode::Bind(RegExpMacroAssembler* macro) {
macro->Bind(&label_);
}
}} // namespace v8::internal }} // namespace v8::internal
......
...@@ -242,7 +242,8 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re, ...@@ -242,7 +242,8 @@ Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
RegExpEngine::Compile(&parse_result, RegExpEngine::Compile(&parse_result,
&node, &node,
flags.is_ignore_case(), flags.is_ignore_case(),
flags.is_multiline()); flags.is_multiline(),
pattern);
if (irregexp_data.is_null()) { if (irregexp_data.is_null()) {
if (FLAG_disable_jscre) { if (FLAG_disable_jscre) {
UNIMPLEMENTED(); UNIMPLEMENTED();
...@@ -858,6 +859,154 @@ Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) { ...@@ -858,6 +859,154 @@ Handle<ByteArray> RegExpImpl::IrregexpCode(Handle<JSRegExp> re) {
// ------------------------------------------------------------------- // -------------------------------------------------------------------
// Implmentation of the Irregexp regular expression engine. // Implmentation of the Irregexp regular expression engine.
//
// The Irregexp regular expression engine is intended to be a complete
// implementation of ECMAScript regular expressions. It generates either
// bytecodes or native code.
// The Irregexp regexp engine is structured in three steps.
// 1) The parser generates an abstract syntax tree. See ast.cc.
// 2) From the AST a node network is created. The nodes are all
// subclasses of RegExpNode. The nodes represent states when
// executing a regular expression. Several optimizations are
// performed on the node network.
// 3) From the nodes we generate either byte codes or native code
// that can actually execute the regular expression (perform
// the search). The code generation step is described in more
// detail below.
// Code generation.
//
// The nodes are divided into four main categories.
// * Choice nodes
// These represent places where the regular expression can
// match in more than one way. For example on entry to an
// alternation (foo|bar) or a repetition (*, +, ? or {}).
// * Action nodes
// These represent places where some action should be
// performed. Examples include recording the current position
// in the input string to a register (in order to implement
// captures) or other actions on register for example in order
// to implement the counters needed for {} repetitions.
// * Matching nodes
// These attempt to match some element part of the input string.
// Examples of elements include character classes, plain strings
// or back references.
// * End nodes
// These are used to implement the actions required on finding
// a successful match or failing to find a match.
//
// The code generated (whether as byte codes or native code) maintains
// some state as it runs. This consists of the following elements:
//
// * The capture registers. Used for string captures.
// * Other registers. Used for counters etc.
// * The current position.
// * The stack of backtracking information. Used when a matching node
// fails to find a match and needs to try an alternative.
//
// Conceptual regular expression execution model:
//
// There is a simple conceptual model of regular expression execution
// which will be presented first. The actual code generated is a more
// efficient simulation of the simple conceptual model:
//
// * Choice nodes are implemented as follows:
// For each choice except the last {
// push current position
// push backtrack code location
// <generate code to test for choice>
// backtrack code location:
// pop current position
// }
// <generate code to test for last choice>
//
// * Actions nodes are generated as follows
// <push affected registers on backtrack stack>
// <generate code to perform action>
// push backtrack code location
// <generate code to test for following nodes>
// backtrack code location:
// <pop affected registers to restore their state>
// <pop backtrack location from stack and go to it>
//
// * Matching nodes are generated as follows:
// if input string matches at current position
// update current position
// <generate code to test for following nodes>
// else
// <pop backtrack location from stack and go to it>
//
// Thus it can be seen that the current position is saved and restored
// by the choice nodes, whereas the registers are saved and restored by
// by the action nodes that manipulate them.
//
// The other interesting aspect of this model is that nodes are generated
// at the point where they are needed by a recursive call to Emit(). If
// the node has already been code generated then the Emit() call will
// generate a jump to the previously generated code instead. In order to
// limit recursion it is possible for the Emit() function to put the node
// on a work list for later generation and instead generate a jump. The
// destination of the jump is resolved later when the code is generated.
//
// Actual regular expression code generation.
//
// Code generation is actually more complicated than the above. In order
// to improve the efficiency of the generated code some optimizations are
// performed
//
// * Choice nodes have 1-character lookahead.
// A choice node looks at the following character and eliminates some of
// the choices immediately based on that character. This is not yet
// implemented.
// * Simple greedy loops store reduced backtracking information.
// A quantifier like /.*foo/m will greedily match the whole input. It will
// then need to backtrack to a point where it can match "foo". The naive
// implementation of this would push each character position onto the
// backtracking stack, then pop them off one by one. This would use space
// proportional to the length of the input string. However since the "."
// can only match in one way and always has a constant length (in this case
// of 1) it suffices to store the current position on the top of the stack
// once. Matching now becomes merely incrementing the current position and
// backtracking becomes decrementing the current position and checking the
// result against the stored current position. This is faster and saves
// space.
// * The current state is virtualized.
// This is used to defer expensive operations until it is clear that they
// are needed and to generate code for a node more than once, allowing
// specialized an efficient versions of the code to be created. This is
// explained in the section below.
//
// Execution state virtualization.
//
// Instead of emitting code, nodes that manipulate the state can record their
// manipulation in an object called the GenerationVariant. The
// GenerationVariant object can record a current position offset, an
// optional backtrack code location on the top of the virtualized backtrack
// stack and some register changes. When a node is to be emitted it can flush
// the GenerationVariant or update it. Flushing the GenerationVariant
// will emit code to bring the actual state into line with the virtual state.
// Avoiding flushing the state can postpone some work (eg updates of capture
// registers). Postponing work can save time when executing the regular
// expression since it may be found that the work never has to be done as a
// failure to match can occur. In addition it is much faster to jump to a
// known backtrack code location than it is to pop an unknown backtrack
// location from the stack and jump there.
//
// The virtual state found in the GenerationVariant affects code generation.
// For example the virtual state contains the difference between the actual
// current position and the virtual current position, and matching code needs
// to use this offset to attempt a match in the correct location of the input
// string. Therefore code generated for a non-trivial GenerationVariant is
// specialized to that GenerationVariant. The code generator therefore
// has the ability to generate code for each node several times. In order to
// limit the size of the generated code there is an arbitrary limit on how
// many specialized sets of code may be generated for a given node. If the
// limit is reached, the GenerationVariant is flushed and a generic version of
// the code for a node is emitted. This is subsequently used for that node.
// The code emitted for non-generic GenerationVariants is not recorded in the
// node and so it cannot currently be reused in the event that code generation
// is requested for an identical GenerationVariant.
void RegExpTree::AppendToText(RegExpText* text) { void RegExpTree::AppendToText(RegExpText* text) {
...@@ -914,7 +1063,8 @@ class RegExpCompiler { ...@@ -914,7 +1063,8 @@ class RegExpCompiler {
Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler, Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
RegExpNode* start, RegExpNode* start,
int capture_count); int capture_count,
Handle<String> pattern);
inline void AddWork(RegExpNode* node) { work_list_->Add(node); } inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
...@@ -924,7 +1074,6 @@ class RegExpCompiler { ...@@ -924,7 +1074,6 @@ class RegExpCompiler {
RegExpMacroAssembler* macro_assembler() { return macro_assembler_; } RegExpMacroAssembler* macro_assembler() { return macro_assembler_; }
EndNode* accept() { return accept_; } EndNode* accept() { return accept_; }
EndNode* backtrack() { return backtrack_; }
static const int kMaxRecursion = 100; static const int kMaxRecursion = 100;
inline int recursion_depth() { return recursion_depth_; } inline int recursion_depth() { return recursion_depth_; }
...@@ -935,7 +1084,6 @@ class RegExpCompiler { ...@@ -935,7 +1084,6 @@ class RegExpCompiler {
private: private:
EndNode* accept_; EndNode* accept_;
EndNode* backtrack_;
int next_register_; int next_register_;
List<RegExpNode*>* work_list_; List<RegExpNode*>* work_list_;
int recursion_depth_; int recursion_depth_;
...@@ -944,6 +1092,17 @@ class RegExpCompiler { ...@@ -944,6 +1092,17 @@ class RegExpCompiler {
}; };
class RecursionCheck {
public:
explicit RecursionCheck(RegExpCompiler* compiler) : compiler_(compiler) {
compiler->IncrementRecursionDepth();
}
~RecursionCheck() { compiler_->DecrementRecursionDepth(); }
private:
RegExpCompiler* compiler_;
};
// Attempts to compile the regexp using an Irregexp code generator. Returns // Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded. // a fixed array or a null handle depending on whether it succeeded.
RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
...@@ -952,14 +1111,14 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case) ...@@ -952,14 +1111,14 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
recursion_depth_(0), recursion_depth_(0),
ignore_case_(ignore_case) { ignore_case_(ignore_case) {
accept_ = new EndNode(EndNode::ACCEPT); accept_ = new EndNode(EndNode::ACCEPT);
backtrack_ = new EndNode(EndNode::BACKTRACK);
} }
Handle<FixedArray> RegExpCompiler::Assemble( Handle<FixedArray> RegExpCompiler::Assemble(
RegExpMacroAssembler* macro_assembler, RegExpMacroAssembler* macro_assembler,
RegExpNode* start, RegExpNode* start,
int capture_count) { int capture_count,
Handle<String> pattern) {
#ifdef DEBUG #ifdef DEBUG
if (FLAG_trace_regexp_assembler) if (FLAG_trace_regexp_assembler)
macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler); macro_assembler_ = new RegExpMacroAssemblerTracer(macro_assembler);
...@@ -969,19 +1128,19 @@ Handle<FixedArray> RegExpCompiler::Assemble( ...@@ -969,19 +1128,19 @@ Handle<FixedArray> RegExpCompiler::Assemble(
List <RegExpNode*> work_list(0); List <RegExpNode*> work_list(0);
work_list_ = &work_list; work_list_ = &work_list;
Label fail; Label fail;
macro_assembler_->PushBacktrack(&fail); macro_assembler->PushBacktrack(&fail);
if (!start->GoTo(this)) { GenerationVariant generic_variant;
if (!start->Emit(this, &generic_variant)) {
fail.Unuse(); fail.Unuse();
return Handle<FixedArray>::null(); return Handle<FixedArray>::null();
} }
macro_assembler_->Bind(&fail);
macro_assembler_->Fail();
while (!work_list.is_empty()) { while (!work_list.is_empty()) {
if (!work_list.RemoveLast()->GoTo(this)) { if (!work_list.RemoveLast()->Emit(this, &generic_variant)) {
fail.Unuse();
return Handle<FixedArray>::null(); return Handle<FixedArray>::null();
} }
} }
macro_assembler_->Bind(&fail);
macro_assembler_->Fail();
Handle<FixedArray> array = Handle<FixedArray> array =
Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength);
array->set(RegExpImpl::kIrregexpImplementationIndex, array->set(RegExpImpl::kIrregexpImplementationIndex,
...@@ -990,7 +1149,7 @@ Handle<FixedArray> RegExpCompiler::Assemble( ...@@ -990,7 +1149,7 @@ Handle<FixedArray> RegExpCompiler::Assemble(
Smi::FromInt(next_register_)); Smi::FromInt(next_register_));
array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex,
Smi::FromInt(capture_count)); Smi::FromInt(capture_count));
Handle<Object> code = macro_assembler_->GetCode(); Handle<Object> code = macro_assembler_->GetCode(pattern);
array->set(RegExpImpl::kIrregexpCodeIndex, *code); array->set(RegExpImpl::kIrregexpCodeIndex, *code);
work_list_ = NULL; work_list_ = NULL;
#ifdef DEBUG #ifdef DEBUG
...@@ -1002,71 +1161,217 @@ Handle<FixedArray> RegExpCompiler::Assemble( ...@@ -1002,71 +1161,217 @@ Handle<FixedArray> RegExpCompiler::Assemble(
} }
bool RegExpNode::GoTo(RegExpCompiler* compiler) { bool GenerationVariant::mentions_reg(int reg) {
// TODO(erikcorry): Implement support. for (DeferredAction* action = actions_;
if (info_.follows_word_interest || action != NULL;
info_.follows_newline_interest || action = action->next()) {
info_.follows_start_interest) { if (reg == action->reg()) return true;
}
return false; return false;
}
int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) {
int max_register = -1;
for (DeferredAction* action = actions_;
action != NULL;
action = action->next()) {
affected_registers->Set(action->reg());
if (action->reg() > max_register) max_register = action->reg();
} }
if (label_.is_bound()) { return max_register;
compiler->macro_assembler()->GoTo(&label_); }
return true;
void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers) {
for (int reg = 0; reg <= max_register; reg++) {
if (affected_registers.Get(reg)) macro->PushRegister(reg);
}
}
void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers) {
for (int reg = max_register; reg >= 0; reg--) {
if (affected_registers.Get(reg)) macro->PopRegister(reg);
}
}
void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers) {
for (int reg = 0; reg <= max_register; reg++) {
if (!affected_registers.Get(reg)) {
continue;
}
int value = 0;
bool absolute = false;
int store_position = -1;
// This is a little tricky because we are scanning the actions in reverse
// historical order (newest first).
for (DeferredAction* action = actions_;
action != NULL;
action = action->next()) {
if (action->reg() == reg) {
switch (action->type()) {
case ActionNode::SET_REGISTER: {
GenerationVariant::DeferredSetRegister* psr =
static_cast<GenerationVariant::DeferredSetRegister*>(action);
value += psr->value();
absolute = true;
ASSERT_EQ(store_position, -1);
break;
}
case ActionNode::INCREMENT_REGISTER:
if (!absolute) {
value++;
}
ASSERT_EQ(store_position, -1);
break;
case ActionNode::STORE_POSITION: {
GenerationVariant::DeferredCapture* pc =
static_cast<GenerationVariant::DeferredCapture*>(action);
if (store_position == -1) {
store_position = pc->cp_offset();
}
ASSERT(!absolute);
ASSERT_EQ(value, 0);
break;
}
default:
UNREACHABLE();
break;
}
}
}
if (store_position != -1) {
macro->WriteCurrentPositionToRegister(reg, store_position);
} else { } else {
if (compiler->recursion_depth() > RegExpCompiler::kMaxRecursion) { if (absolute) {
compiler->macro_assembler()->GoTo(&label_); macro->SetRegister(reg, value);
compiler->AddWork(this);
return true;
} else { } else {
compiler->IncrementRecursionDepth(); if (value != 0) {
bool how_it_went = Emit(compiler); macro->AdvanceRegister(reg, value);
compiler->DecrementRecursionDepth(); }
return how_it_went; }
} }
} }
} }
// EndNodes are special. Because they can be very common and they are very // This is called as we come into a loop choice node and some other tricky
// short we normally inline them. That is, if we are asked to emit a GoTo // nodes. It normalises the state of the code generator to ensure we can
// we just emit the entire node. Since they don't have successors this // generate generic code.
// works. bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
bool EndNode::GoTo(RegExpCompiler* compiler) { RegExpMacroAssembler* macro = compiler->macro_assembler();
if (info()->follows_word_interest ||
info()->follows_newline_interest || ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL);
info()->follows_start_interest) {
return false; if (actions_ == NULL && backtrack() == NULL) {
// Here we just have some deferred cp advances to fix and we are back to
// a normal situation.
macro->AdvanceCurrentPosition(cp_offset_);
// Create a new trivial state and generate the node with that.
GenerationVariant new_state;
return successor->Emit(compiler, &new_state);
}
// Generate deferred actions here along with code to undo them again.
OutSet affected_registers;
int max_register = FindAffectedRegisters(&affected_registers);
PushAffectedRegisters(macro, max_register, affected_registers);
PerformDeferredActions(macro, max_register, affected_registers);
if (backtrack() != NULL) {
// Here we have a concrete backtrack location. These are set up by choice
// nodes and so they indicate that we have a deferred save of the current
// position which we may need to emit here.
macro->PushCurrentPosition();
}
if (cp_offset_ != 0) {
macro->AdvanceCurrentPosition(cp_offset_);
} }
return Emit(compiler);
}
// Create a new trivial state and generate the node with that.
Label undo;
macro->PushBacktrack(&undo);
GenerationVariant new_state;
bool ok = successor->Emit(compiler, &new_state);
Label* RegExpNode::label() { // On backtrack we need to restore state.
return &label_; macro->Bind(&undo);
if (!ok) return false;
if (backtrack() != NULL) {
macro->PopCurrentPosition();
}
RestoreAffectedRegisters(macro, max_register, affected_registers);
if (backtrack() == NULL) {
macro->Backtrack();
} else {
macro->GoTo(backtrack());
}
return true;
} }
bool EndNode::Emit(RegExpCompiler* compiler) { void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro,
RegExpMacroAssembler* macro = compiler->macro_assembler(); GenerationVariant* variant) {
switch (action_) {
case ACCEPT:
if (!label()->is_bound()) Bind(macro);
if (info()->at_end) { if (info()->at_end) {
Label succeed; Label succeed;
// LoadCurrentCharacter will go to the label if we are at the end of the // LoadCurrentCharacter will go to the label if we are at the end of the
// input string. // input string.
macro->LoadCurrentCharacter(0, &succeed); macro->LoadCurrentCharacter(0, &succeed);
macro->Backtrack(); macro->GoTo(variant->backtrack());
macro->Bind(&succeed); macro->Bind(&succeed);
} }
}
bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler,
GenerationVariant* variant) {
if (!variant->is_trivial()) {
return variant->Flush(compiler, this);
}
RegExpMacroAssembler* macro = compiler->macro_assembler();
if (!label()->is_bound()) {
macro->Bind(label());
}
EmitInfoChecks(macro, variant);
macro->ReadCurrentPositionFromRegister(current_position_register_);
macro->ReadStackPointerFromRegister(stack_pointer_register_);
// Now that we have unwound the stack we find at the top of the stack the
// backtrack that the BeginSubmatch node got.
macro->Backtrack();
return true;
}
bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
if (!variant->is_trivial()) {
return variant->Flush(compiler, this);
}
RegExpMacroAssembler* macro = compiler->macro_assembler();
if (!label()->is_bound()) {
macro->Bind(label());
}
switch (action_) {
case ACCEPT:
EmitInfoChecks(macro, variant);
macro->Succeed(); macro->Succeed();
return true; return true;
case BACKTRACK: case BACKTRACK:
if (!label()->is_bound()) Bind(macro);
ASSERT(!info()->at_end); ASSERT(!info()->at_end);
macro->Backtrack(); macro->GoTo(variant->backtrack());
return true; return true;
case NEGATIVE_SUBMATCH_SUCCESS:
// This case is handled in a different virtual method.
UNREACHABLE();
} }
UNIMPLEMENTED();
return false; return false;
} }
...@@ -1078,10 +1383,10 @@ void GuardedAlternative::AddGuard(Guard* guard) { ...@@ -1078,10 +1383,10 @@ void GuardedAlternative::AddGuard(Guard* guard) {
} }
ActionNode* ActionNode::StoreRegister(int reg, ActionNode* ActionNode::SetRegister(int reg,
int val, int val,
RegExpNode* on_success) { RegExpNode* on_success) {
ActionNode* result = new ActionNode(STORE_REGISTER, on_success); ActionNode* result = new ActionNode(SET_REGISTER, on_success);
result->data_.u_store_register.reg = reg; result->data_.u_store_register.reg = reg;
result->data_.u_store_register.value = val; result->data_.u_store_register.value = val;
return result; return result;
...@@ -1102,13 +1407,6 @@ ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) { ...@@ -1102,13 +1407,6 @@ ActionNode* ActionNode::StorePosition(int reg, RegExpNode* on_success) {
} }
ActionNode* ActionNode::RestorePosition(int reg, RegExpNode* on_success) {
ActionNode* result = new ActionNode(RESTORE_POSITION, on_success);
result->data_.u_position_register.reg = reg;
return result;
}
ActionNode* ActionNode::BeginSubmatch(int stack_reg, ActionNode* ActionNode::BeginSubmatch(int stack_reg,
int position_reg, int position_reg,
RegExpNode* on_success) { RegExpNode* on_success) {
...@@ -1119,17 +1417,12 @@ ActionNode* ActionNode::BeginSubmatch(int stack_reg, ...@@ -1119,17 +1417,12 @@ ActionNode* ActionNode::BeginSubmatch(int stack_reg,
} }
ActionNode* ActionNode::EscapeSubmatch(int stack_reg, ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
bool restore_position,
int position_reg, int position_reg,
RegExpNode* on_success) { RegExpNode* on_success) {
ActionNode* result = new ActionNode(ESCAPE_SUBMATCH, on_success); ActionNode* result = new ActionNode(POSITIVE_SUBMATCH_SUCCESS, on_success);
result->data_.u_submatch.stack_pointer_register = stack_reg; result->data_.u_submatch.stack_pointer_register = stack_reg;
if (restore_position) {
result->data_.u_submatch.current_position_register = position_reg; result->data_.u_submatch.current_position_register = position_reg;
} else {
result->data_.u_submatch.current_position_register = -1;
}
return result; return result;
} }
...@@ -1148,13 +1441,19 @@ FOR_EACH_NODE_TYPE(DEFINE_ACCEPT) ...@@ -1148,13 +1441,19 @@ FOR_EACH_NODE_TYPE(DEFINE_ACCEPT)
void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler, void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard* guard, Guard* guard,
Label* on_failure) { GenerationVariant* variant) {
switch (guard->op()) { switch (guard->op()) {
case Guard::LT: case Guard::LT:
macro_assembler->IfRegisterGE(guard->reg(), guard->value(), on_failure); ASSERT(!variant->mentions_reg(guard->reg()));
macro_assembler->IfRegisterGE(guard->reg(),
guard->value(),
variant->backtrack());
break; break;
case Guard::GEQ: case Guard::GEQ:
macro_assembler->IfRegisterLT(guard->reg(), guard->value(), on_failure); ASSERT(!variant->mentions_reg(guard->reg()));
macro_assembler->IfRegisterLT(guard->reg(),
guard->value(),
variant->backtrack());
break; break;
} }
} }
...@@ -1169,13 +1468,22 @@ static inline void EmitAtomNonLetters( ...@@ -1169,13 +1468,22 @@ static inline void EmitAtomNonLetters(
TextElement elm, TextElement elm,
Vector<const uc16> quarks, Vector<const uc16> quarks,
Label* on_failure, Label* on_failure,
int cp_offset) { int cp_offset,
bool check_offset) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
// It is vital that this loop is backwards due to the unchecked character
// load below.
for (int i = quarks.length() - 1; i >= 0; i--) { for (int i = quarks.length() - 1; i >= 0; i--) {
uc16 c = quarks[i]; uc16 c = quarks[i];
int length = uncanonicalize.get(c, '\0', chars); int length = uncanonicalize.get(c, '\0', chars);
if (length <= 1) { if (length <= 1) {
if (check_offset && i == quarks.length() - 1) {
macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
} else {
// Here we don't need to check against the end of the input string
// since this character lies before a character that matched.
macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
}
macro_assembler->CheckNotCharacter(c, on_failure); macro_assembler->CheckNotCharacter(c, on_failure);
} }
} }
...@@ -1216,13 +1524,22 @@ static inline void EmitAtomLetters( ...@@ -1216,13 +1524,22 @@ static inline void EmitAtomLetters(
TextElement elm, TextElement elm,
Vector<const uc16> quarks, Vector<const uc16> quarks,
Label* on_failure, Label* on_failure,
int cp_offset) { int cp_offset,
bool check_offset) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth]; unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
// It is vital that this loop is backwards due to the unchecked character
// load below.
for (int i = quarks.length() - 1; i >= 0; i--) { for (int i = quarks.length() - 1; i >= 0; i--) {
uc16 c = quarks[i]; uc16 c = quarks[i];
int length = uncanonicalize.get(c, '\0', chars); int length = uncanonicalize.get(c, '\0', chars);
if (length <= 1) continue; if (length <= 1) continue;
if (check_offset && i == quarks.length() - 1) {
macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure); macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
} else {
// Here we don't need to check against the end of the input string
// since this character lies before a character that matched.
macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
}
Label ok; Label ok;
ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4); ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
switch (length) { switch (length) {
...@@ -1259,10 +1576,8 @@ static inline void EmitAtomLetters( ...@@ -1259,10 +1576,8 @@ static inline void EmitAtomLetters(
static void EmitCharClass(RegExpMacroAssembler* macro_assembler, static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
RegExpCharacterClass* cc, RegExpCharacterClass* cc,
int cp_offset, int cp_offset,
Label* on_failure) { Label* on_failure,
macro_assembler->LoadCurrentCharacter(cp_offset, on_failure); bool check_offset) {
cp_offset++;
ZoneList<CharacterRange>* ranges = cc->ranges(); ZoneList<CharacterRange>* ranges = cc->ranges();
Label success; Label success;
...@@ -1279,6 +1594,26 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, ...@@ -1279,6 +1594,26 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
return; return;
} }
if (range_count == 1 &&
!cc->is_negated() &&
ranges->at(0).IsEverything(0xffff)) {
// This is a common case hit by non-anchored expressions.
// TODO(erikcorry): We should have a macro assembler instruction that just
// checks for end of string without loading the character.
if (check_offset) {
macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
}
return;
}
if (check_offset) {
macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
} else {
// Here we don't need to check against the end of the input string
// since this character lies before a character that matched.
macro_assembler->LoadCurrentCharacterUnchecked(cp_offset);
}
for (int i = 0; i < range_count - 1; i++) { for (int i = 0; i < range_count - 1; i++) {
CharacterRange& range = ranges->at(i); CharacterRange& range = ranges->at(i);
Label next_range; Label next_range;
...@@ -1333,73 +1668,143 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler, ...@@ -1333,73 +1668,143 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
} }
RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
GenerationVariant* variant) {
// TODO(erikcorry): Implement support.
if (info_.follows_word_interest ||
info_.follows_newline_interest ||
info_.follows_start_interest) {
return FAIL;
}
// If we are generating a greedy loop then don't stop and don't reuse code.
if (variant->stop_node() != NULL) {
return CONTINUE;
}
bool TextNode::Emit(RegExpCompiler* compiler) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
Bind(macro_assembler); if (variant->is_trivial()) {
if (label_.is_bound()) {
// We are being asked to generate a generic version, but that's already
// been done so just go to it.
macro_assembler->GoTo(&label_);
return DONE;
}
if (compiler->recursion_depth() >= RegExpCompiler::kMaxRecursion) {
// To avoid too deep recursion we push the node to the work queue and just
// generate a goto here.
compiler->AddWork(this);
macro_assembler->GoTo(&label_);
return DONE;
}
// Generate generic version of the node and bind the label for later use.
macro_assembler->Bind(&label_);
return CONTINUE;
}
// We are being asked to make a non-generic version. Keep track of how many
// non-generic versions we generate so as not to overdo it.
variants_generated_++;
if (variants_generated_ < kMaxVariantsGenerated &&
compiler->recursion_depth() <= RegExpCompiler::kMaxRecursion) {
return CONTINUE;
}
// If we get here there have been too many variants generated or recursion
// is too deep. Time to switch to a generic version. The code for
// generic versions above can handle deep recursion properly.
bool ok = variant->Flush(compiler, this);
return ok ? DONE : FAIL;
}
// This generates the code to match a text node. A text node can contain
// straight character sequences (possibly to be matched in a case-independent
// way) and character classes. In order to be most efficient we test for the
// simple things first and then move on to the more complicated things. The
// simplest thing is a non-letter or a letter if we are matching case. The
// next-most simple thing is a case-independent letter. The least simple is
// a character class. Another optimization is that we test the last one first.
// If that succeeds we don't need to test for the end of the string when we
// load other characters.
bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
Label *backtrack = variant->backtrack();
LimitResult limit_result = LimitVersions(compiler, variant);
if (limit_result == FAIL) return false;
if (limit_result == DONE) return true;
ASSERT(limit_result == CONTINUE);
int element_count = elms_->length(); int element_count = elms_->length();
ASSERT(element_count != 0); ASSERT(element_count != 0);
int cp_offset = 0;
if (info()->at_end) { if (info()->at_end) {
macro_assembler->Backtrack(); macro_assembler->GoTo(backtrack);
return true; return true;
} }
// First, handle straight character matches. // First, handle straight character matches.
for (int i = 0; i < element_count; i++) { int checked_up_to = -1;
for (int i = element_count - 1; i >= 0; i--) {
TextElement elm = elms_->at(i); TextElement elm = elms_->at(i);
ASSERT(elm.cp_offset >= 0);
int cp_offset = variant->cp_offset() + elm.cp_offset;
if (elm.type == TextElement::ATOM) { if (elm.type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data(); Vector<const uc16> quarks = elm.data.u_atom->data();
int last_cp_offset = cp_offset + quarks.length();
if (compiler->ignore_case()) { if (compiler->ignore_case()) {
EmitAtomNonLetters(macro_assembler, EmitAtomNonLetters(macro_assembler,
elm, elm,
quarks, quarks,
on_failure_->label(), backtrack,
cp_offset); cp_offset,
checked_up_to < last_cp_offset);
} else { } else {
macro_assembler->CheckCharacters(quarks, macro_assembler->CheckCharacters(quarks,
cp_offset, cp_offset,
on_failure_->label()); backtrack,
checked_up_to < last_cp_offset);
} }
cp_offset += quarks.length(); if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
} else { } else {
ASSERT_EQ(elm.type, TextElement::CHAR_CLASS); ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
cp_offset++;
} }
} }
// Second, handle case independent letter matches if any. // Second, handle case independent letter matches if any.
if (compiler->ignore_case()) { if (compiler->ignore_case()) {
cp_offset = 0; for (int i = element_count - 1; i >= 0; i--) {
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i); TextElement elm = elms_->at(i);
int cp_offset = variant->cp_offset() + elm.cp_offset;
if (elm.type == TextElement::ATOM) { if (elm.type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data(); Vector<const uc16> quarks = elm.data.u_atom->data();
int last_cp_offset = cp_offset + quarks.length();
EmitAtomLetters(macro_assembler, EmitAtomLetters(macro_assembler,
elm, elm,
quarks, quarks,
on_failure_->label(), backtrack,
cp_offset); cp_offset,
cp_offset += quarks.length(); checked_up_to < last_cp_offset);
} else { if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
cp_offset++;
} }
} }
} }
// If the fast character matches passed then do the character classes. // If the fast character matches passed then do the character classes.
cp_offset = 0; for (int i = element_count - 1; i >= 0; i--) {
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i); TextElement elm = elms_->at(i);
int cp_offset = variant->cp_offset() + elm.cp_offset;
if (elm.type == TextElement::CHAR_CLASS) { if (elm.type == TextElement::CHAR_CLASS) {
RegExpCharacterClass* cc = elm.data.u_char_class; RegExpCharacterClass* cc = elm.data.u_char_class;
EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label()); EmitCharClass(macro_assembler,
cp_offset++; cc,
} else { cp_offset,
cp_offset += elm.data.u_atom->data().length(); backtrack,
checked_up_to < cp_offset);
if (cp_offset > checked_up_to) checked_up_to = cp_offset;
} }
} }
compiler->AddWork(on_failure_); GenerationVariant new_variant(*variant);
macro_assembler->AdvanceCurrentPosition(cp_offset); new_variant.set_cp_offset(checked_up_to + 1);
return on_success()->GoTo(compiler); RecursionCheck rc(compiler);
return on_success()->Emit(compiler, &new_variant);
} }
...@@ -1419,141 +1824,257 @@ void TextNode::MakeCaseIndependent() { ...@@ -1419,141 +1824,257 @@ void TextNode::MakeCaseIndependent() {
} }
bool ChoiceNode::Emit(RegExpCompiler* compiler) { int TextNode::GreedyLoopTextLength() {
int choice_count = alternatives_->length(); TextElement elm = elms_->at(elms_->length() - 1);
if (elm.type == TextElement::CHAR_CLASS) {
return elm.cp_offset + 1;
} else {
return elm.cp_offset + elm.data.u_atom->data().length();
}
}
// Finds the fixed match length of a sequence of nodes that goes from
// this alternative and back to this choice node. If there are variable
// length nodes or other complications in the way then return a sentinel
// value indicating that a greedy loop cannot be constructed.
int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) {
int length = 0;
RegExpNode* node = alternative->node();
// Later we will generate code for all these text nodes using recursion
// so we have to limit the max number.
int recursion_depth = 0;
while (node != this) {
if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
return kNodeIsTooComplexForGreedyLoops;
}
NodeInfo* info = node->info();
if (info->follows_word_interest ||
info->follows_newline_interest ||
info->follows_start_interest) {
return kNodeIsTooComplexForGreedyLoops;
}
int node_length = node->GreedyLoopTextLength();
if (node_length == kNodeIsTooComplexForGreedyLoops) {
return kNodeIsTooComplexForGreedyLoops;
}
length += node_length;
SeqRegExpNode* seq_node = static_cast<SeqRegExpNode*>(node);
node = seq_node->on_success();
}
return length;
}
bool LoopChoiceNode::Emit(RegExpCompiler* compiler,
GenerationVariant* variant) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler(); RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
Bind(macro_assembler); if (variant->stop_node() == this) {
// For now we just call all choices one after the other. The idea ultimately int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
// is to use the Dispatch table to try only the relevant ones. ASSERT(text_length != kNodeIsTooComplexForGreedyLoops);
// Update the counter-based backtracking info on the stack. This is an
// optimization for greedy loops (see below).
ASSERT(variant->cp_offset() == text_length);
macro_assembler->AdvanceCurrentPosition(text_length);
macro_assembler->GoTo(variant->loop_label());
return true;
}
ASSERT(variant->stop_node() == NULL);
if (!variant->is_trivial()) {
return variant->Flush(compiler, this);
}
return ChoiceNode::Emit(compiler, variant);
}
bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
int choice_count = alternatives_->length();
#ifdef DEBUG
for (int i = 0; i < choice_count - 1; i++) { for (int i = 0; i < choice_count - 1; i++) {
GuardedAlternative alternative = alternatives_->at(i); GuardedAlternative alternative = alternatives_->at(i);
Label after;
Label after_no_pop_cp;
ZoneList<Guard*>* guards = alternative.guards(); ZoneList<Guard*>* guards = alternative.guards();
if (guards != NULL) { int guard_count = (guards == NULL) ? 0 : guards->length();
int guard_count = guards->length();
for (int j = 0; j < guard_count; j++) { for (int j = 0; j < guard_count; j++) {
GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp); ASSERT(!variant->mentions_reg(guards->at(j)->reg()));
} }
} }
#endif
LimitResult limit_result = LimitVersions(compiler, variant);
if (limit_result == DONE) return true;
if (limit_result == FAIL) return false;
ASSERT(limit_result == CONTINUE);
RecursionCheck rc(compiler);
GenerationVariant* current_variant = variant;
int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
bool greedy_loop = false;
Label greedy_loop_label;
GenerationVariant counter_backtrack_variant(&greedy_loop_label);
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
// Here we have special handling for greedy loops containing only text nodes
// and other simple nodes. These are handled by pushing the current
// position on the stack and then incrementing the current position each
// time around the switch. On backtrack we decrement the current position
// and check it against the pushed value. This avoids pushing backtrack
// information for each iteration of the loop, which could take up a lot of
// space.
greedy_loop = true;
ASSERT(variant->stop_node() == NULL);
macro_assembler->PushCurrentPosition(); macro_assembler->PushCurrentPosition();
macro_assembler->PushBacktrack(&after); current_variant = &counter_backtrack_variant;
if (!alternative.node()->GoTo(compiler)) { Label greedy_match_failed;
after.Unuse(); GenerationVariant greedy_match_variant(&greedy_match_failed);
after_no_pop_cp.Unuse(); Label loop_label;
macro_assembler->Bind(&loop_label);
greedy_match_variant.set_stop_node(this);
greedy_match_variant.set_loop_label(&loop_label);
bool ok = alternatives_->at(0).node()->Emit(compiler,
&greedy_match_variant);
macro_assembler->Bind(&greedy_match_failed);
if (!ok) {
greedy_loop_label.Unuse();
return false; return false;
} }
macro_assembler->Bind(&after);
macro_assembler->PopCurrentPosition();
macro_assembler->Bind(&after_no_pop_cp);
} }
GuardedAlternative alternative = alternatives_->at(choice_count - 1);
Label second_choice; // For use in greedy matches.
macro_assembler->Bind(&second_choice);
// For now we just call all choices one after the other. The idea ultimately
// is to use the Dispatch table to try only the relevant ones.
for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) {
GuardedAlternative alternative = alternatives_->at(i);
Label after;
ZoneList<Guard*>* guards = alternative.guards(); ZoneList<Guard*>* guards = alternative.guards();
if (guards != NULL) { int guard_count = (guards == NULL) ? 0 : guards->length();
int guard_count = guards->length(); GenerationVariant new_variant(*current_variant);
new_variant.set_backtrack(&after);
for (int j = 0; j < guard_count; j++) { for (int j = 0; j < guard_count; j++) {
GenerateGuard(macro_assembler, guards->at(j), on_failure_->label()); GenerateGuard(macro_assembler, guards->at(j), &new_variant);
} }
if (!alternative.node()->Emit(compiler, &new_variant)) {
after.Unuse();
return false;
} }
if (!on_failure_->IsBacktrack()) { macro_assembler->Bind(&after);
ASSERT_NOT_NULL(on_failure_ -> label());
macro_assembler->PushBacktrack(on_failure_->label());
compiler->AddWork(on_failure_);
} }
if (!alternative.node()->GoTo(compiler)) { GuardedAlternative alternative = alternatives_->at(choice_count - 1);
return false; ZoneList<Guard*>* guards = alternative.guards();
int guard_count = (guards == NULL) ? 0 : guards->length();
for (int j = 0; j < guard_count; j++) {
GenerateGuard(macro_assembler, guards->at(j), current_variant);
}
bool ok = alternative.node()->Emit(compiler, current_variant);
if (!ok) return false;
if (greedy_loop) {
macro_assembler->Bind(&greedy_loop_label);
// If we have unwound to the bottom then backtrack.
macro_assembler->CheckGreedyLoop(variant->backtrack());
// Otherwise try the second priority at an earlier position.
macro_assembler->AdvanceCurrentPosition(-text_length);
macro_assembler->GoTo(&second_choice);
} }
return true; return true;
} }
bool ActionNode::Emit(RegExpCompiler* compiler) { bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
RegExpMacroAssembler* macro = compiler->macro_assembler(); RegExpMacroAssembler* macro = compiler->macro_assembler();
Bind(macro); LimitResult limit_result = LimitVersions(compiler, variant);
if (limit_result == DONE) return true;
if (limit_result == FAIL) return false;
ASSERT(limit_result == CONTINUE);
RecursionCheck rc(compiler);
switch (type_) { switch (type_) {
case STORE_REGISTER:
macro->SetRegister(data_.u_store_register.reg,
data_.u_store_register.value);
break;
case INCREMENT_REGISTER: {
Label undo;
macro->PushBacktrack(&undo);
macro->AdvanceRegister(data_.u_increment_register.reg, 1);
bool ok = on_success()->GoTo(compiler);
if (!ok) {
undo.Unuse();
return false;
}
macro->Bind(&undo);
macro->AdvanceRegister(data_.u_increment_register.reg, -1);
macro->Backtrack();
break;
}
case STORE_POSITION: { case STORE_POSITION: {
Label undo; GenerationVariant::DeferredCapture
macro->PushRegister(data_.u_position_register.reg); new_capture(data_.u_position_register.reg, variant);
macro->PushBacktrack(&undo); GenerationVariant new_variant = *variant;
macro->WriteCurrentPositionToRegister(data_.u_position_register.reg); new_variant.add_action(&new_capture);
bool ok = on_success()->GoTo(compiler); return on_success()->Emit(compiler, &new_variant);
if (!ok) {
undo.Unuse();
return false;
} }
macro->Bind(&undo); case INCREMENT_REGISTER: {
macro->PopRegister(data_.u_position_register.reg); GenerationVariant::DeferredIncrementRegister
macro->Backtrack(); new_increment(data_.u_increment_register.reg);
break; GenerationVariant new_variant = *variant;
new_variant.add_action(&new_increment);
return on_success()->Emit(compiler, &new_variant);
}
case SET_REGISTER: {
GenerationVariant::DeferredSetRegister
new_set(data_.u_store_register.reg, data_.u_store_register.value);
GenerationVariant new_variant = *variant;
new_variant.add_action(&new_set);
return on_success()->Emit(compiler, &new_variant);
} }
case RESTORE_POSITION:
macro->ReadCurrentPositionFromRegister(
data_.u_position_register.reg);
break;
case BEGIN_SUBMATCH: case BEGIN_SUBMATCH:
if (!variant->is_trivial()) return variant->Flush(compiler, this);
macro->WriteCurrentPositionToRegister( macro->WriteCurrentPositionToRegister(
data_.u_submatch.current_position_register); data_.u_submatch.current_position_register, 0);
macro->WriteStackPointerToRegister( macro->WriteStackPointerToRegister(
data_.u_submatch.stack_pointer_register); data_.u_submatch.stack_pointer_register);
break; return on_success()->Emit(compiler, variant);
case ESCAPE_SUBMATCH: case POSITIVE_SUBMATCH_SUCCESS:
if (!variant->is_trivial()) return variant->Flush(compiler, this);
// TODO(erikcorry): Implement support.
if (info()->follows_word_interest ||
info()->follows_newline_interest ||
info()->follows_start_interest) {
return false;
}
if (info()->at_end) { if (info()->at_end) {
Label at_end; Label at_end;
// Load current character jumps to the label if we are beyond the string // Load current character jumps to the label if we are beyond the string
// end. // end.
macro->LoadCurrentCharacter(0, &at_end); macro->LoadCurrentCharacter(0, &at_end);
macro->Backtrack(); macro->GoTo(variant->backtrack());
macro->Bind(&at_end); macro->Bind(&at_end);
} }
if (data_.u_submatch.current_position_register != -1) {
macro->ReadCurrentPositionFromRegister( macro->ReadCurrentPositionFromRegister(
data_.u_submatch.current_position_register); data_.u_submatch.current_position_register);
}
macro->ReadStackPointerFromRegister( macro->ReadStackPointerFromRegister(
data_.u_submatch.stack_pointer_register); data_.u_submatch.stack_pointer_register);
break; return on_success()->Emit(compiler, variant);
default: default:
UNREACHABLE(); UNREACHABLE();
return false; return false;
} }
return on_success()->GoTo(compiler);
} }
bool BackReferenceNode::Emit(RegExpCompiler* compiler) { bool BackReferenceNode::Emit(RegExpCompiler* compiler,
GenerationVariant* variant) {
RegExpMacroAssembler* macro = compiler->macro_assembler(); RegExpMacroAssembler* macro = compiler->macro_assembler();
Bind(macro); if (!variant->is_trivial()) {
return variant->Flush(compiler, this);
}
LimitResult limit_result = LimitVersions(compiler, variant);
if (limit_result == DONE) return true;
if (limit_result == FAIL) return false;
ASSERT(limit_result == CONTINUE);
RecursionCheck rc(compiler);
ASSERT_EQ(start_reg_ + 1, end_reg_); ASSERT_EQ(start_reg_ + 1, end_reg_);
if (info()->at_end) { if (info()->at_end) {
// If we are constrained to match at the end of the input then succeed // If we are constrained to match at the end of the input then succeed
// iff the back reference is empty. // iff the back reference is empty.
macro->CheckNotRegistersEqual(start_reg_, end_reg_, on_failure_->label()); macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack());
} else { } else {
if (compiler->ignore_case()) { if (compiler->ignore_case()) {
macro->CheckNotBackReferenceIgnoreCase(start_reg_, on_failure_->label()); macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack());
} else { } else {
macro->CheckNotBackReference(start_reg_, on_failure_->label()); macro->CheckNotBackReference(start_reg_, variant->backtrack());
} }
} }
return on_success()->GoTo(compiler); return on_success()->Emit(compiler, variant);
} }
...@@ -1571,9 +2092,9 @@ class DotPrinter: public NodeVisitor { ...@@ -1571,9 +2092,9 @@ class DotPrinter: public NodeVisitor {
stream_(&alloc_) { } stream_(&alloc_) { }
void PrintNode(const char* label, RegExpNode* node); void PrintNode(const char* label, RegExpNode* node);
void Visit(RegExpNode* node); void Visit(RegExpNode* node);
void PrintOnFailure(RegExpNode* from, RegExpNode* on_failure);
void PrintAttributes(RegExpNode* from); void PrintAttributes(RegExpNode* from);
StringStream* stream() { return &stream_; } StringStream* stream() { return &stream_; }
void PrintOnFailure(RegExpNode* from, RegExpNode* to);
#define DECLARE_VISIT(Type) \ #define DECLARE_VISIT(Type) \
virtual void Visit##Type(Type##Node* that); virtual void Visit##Type(Type##Node* that);
FOR_EACH_NODE_TYPE(DECLARE_VISIT) FOR_EACH_NODE_TYPE(DECLARE_VISIT)
...@@ -1615,7 +2136,6 @@ void DotPrinter::Visit(RegExpNode* node) { ...@@ -1615,7 +2136,6 @@ void DotPrinter::Visit(RegExpNode* node) {
void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) { void DotPrinter::PrintOnFailure(RegExpNode* from, RegExpNode* on_failure) {
if (on_failure->IsBacktrack()) return;
stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure); stream()->Add(" n%p -> n%p [style=dotted];\n", from, on_failure);
Visit(on_failure); Visit(on_failure);
} }
...@@ -1740,7 +2260,6 @@ void DotPrinter::VisitChoice(ChoiceNode* that) { ...@@ -1740,7 +2260,6 @@ void DotPrinter::VisitChoice(ChoiceNode* that) {
PrintAttributes(that); PrintAttributes(that);
TableEntryBodyPrinter body_printer(stream(), that); TableEntryBodyPrinter body_printer(stream(), that);
that->GetTable(ignore_case_)->ForEach(&body_printer); that->GetTable(ignore_case_)->ForEach(&body_printer);
PrintOnFailure(that, that->on_failure());
} else { } else {
stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that); stream()->Add(" n%p [shape=Mrecord, label=\"?\"];\n", that);
for (int i = 0; i < that->alternatives()->length(); i++) { for (int i = 0; i < that->alternatives()->length(); i++) {
...@@ -1785,7 +2304,6 @@ void DotPrinter::VisitText(TextNode* that) { ...@@ -1785,7 +2304,6 @@ void DotPrinter::VisitText(TextNode* that) {
PrintAttributes(that); PrintAttributes(that);
stream()->Add(" n%p -> n%p;\n", that, that->on_success()); stream()->Add(" n%p -> n%p;\n", that, that->on_success());
Visit(that->on_success()); Visit(that->on_success());
PrintOnFailure(that, that->on_failure());
} }
...@@ -1797,7 +2315,6 @@ void DotPrinter::VisitBackReference(BackReferenceNode* that) { ...@@ -1797,7 +2315,6 @@ void DotPrinter::VisitBackReference(BackReferenceNode* that) {
PrintAttributes(that); PrintAttributes(that);
stream()->Add(" n%p -> n%p;\n", that, that->on_success()); stream()->Add(" n%p -> n%p;\n", that, that->on_success());
Visit(that->on_success()); Visit(that->on_success());
PrintOnFailure(that, that->on_failure());
} }
...@@ -1810,7 +2327,7 @@ void DotPrinter::VisitEnd(EndNode* that) { ...@@ -1810,7 +2327,7 @@ void DotPrinter::VisitEnd(EndNode* that) {
void DotPrinter::VisitAction(ActionNode* that) { void DotPrinter::VisitAction(ActionNode* that) {
stream()->Add(" n%p [", that); stream()->Add(" n%p [", that);
switch (that->type_) { switch (that->type_) {
case ActionNode::STORE_REGISTER: case ActionNode::SET_REGISTER:
stream()->Add("label=\"$%i:=%i\", shape=octagon", stream()->Add("label=\"$%i:=%i\", shape=octagon",
that->data_.u_store_register.reg, that->data_.u_store_register.reg,
that->data_.u_store_register.value); that->data_.u_store_register.value);
...@@ -1823,22 +2340,19 @@ void DotPrinter::VisitAction(ActionNode* that) { ...@@ -1823,22 +2340,19 @@ void DotPrinter::VisitAction(ActionNode* that) {
stream()->Add("label=\"$%i:=$pos\", shape=octagon", stream()->Add("label=\"$%i:=$pos\", shape=octagon",
that->data_.u_position_register.reg); that->data_.u_position_register.reg);
break; break;
case ActionNode::RESTORE_POSITION:
stream()->Add("label=\"$pos:=$%i\", shape=octagon",
that->data_.u_position_register.reg);
break;
case ActionNode::BEGIN_SUBMATCH: case ActionNode::BEGIN_SUBMATCH:
stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon", stream()->Add("label=\"$%i:=$pos,begin\", shape=septagon",
that->data_.u_submatch.current_position_register); that->data_.u_submatch.current_position_register);
break; break;
case ActionNode::ESCAPE_SUBMATCH: case ActionNode::POSITIVE_SUBMATCH_SUCCESS:
stream()->Add("label=\"escape\", shape=septagon"); stream()->Add("label=\"escape\", shape=septagon");
break; break;
} }
stream()->Add("];\n"); stream()->Add("];\n");
PrintAttributes(that); PrintAttributes(that);
stream()->Add(" n%p -> n%p;\n", that, that->on_success()); RegExpNode* successor = that->on_success();
Visit(that->on_success()); stream()->Add(" n%p -> n%p;\n", that, successor);
Visit(successor);
} }
...@@ -1895,40 +2409,35 @@ void RegExpEngine::DotPrint(const char* label, ...@@ -1895,40 +2409,35 @@ void RegExpEngine::DotPrint(const char* label,
RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpAtom::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
elms->Add(TextElement::Atom(this)); elms->Add(TextElement::Atom(this));
return new TextNode(elms, on_success, on_failure); return new TextNode(elms, on_success);
} }
RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) { return new TextNode(elements(), on_success);
return new TextNode(elements(), on_success, on_failure);
} }
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
ZoneList<TextElement>* elms = new ZoneList<TextElement>(1); ZoneList<TextElement>* elms = new ZoneList<TextElement>(1);
elms->Add(TextElement::CharClass(this)); elms->Add(TextElement::CharClass(this));
return new TextNode(elms, on_success, on_failure); return new TextNode(elms, on_success);
} }
RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
ZoneList<RegExpTree*>* alternatives = this->alternatives(); ZoneList<RegExpTree*>* alternatives = this->alternatives();
int length = alternatives->length(); int length = alternatives->length();
ChoiceNode* result = new ChoiceNode(length, on_failure); ChoiceNode* result = new ChoiceNode(length);
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler, GuardedAlternative alternative(alternatives->at(i)->ToNode(compiler,
on_success, on_success));
on_failure));
result->AddAlternative(alternative); result->AddAlternative(alternative);
} }
return result; return result;
...@@ -1936,15 +2445,13 @@ RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler, ...@@ -1936,15 +2445,13 @@ RegExpNode* RegExpDisjunction::ToNode(RegExpCompiler* compiler,
RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpQuantifier::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
return ToNode(min(), return ToNode(min(),
max(), max(),
is_greedy(), is_greedy(),
body(), body(),
compiler, compiler,
on_success, on_success);
on_failure);
} }
...@@ -1953,8 +2460,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min, ...@@ -1953,8 +2460,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
bool is_greedy, bool is_greedy,
RegExpTree* body, RegExpTree* body,
RegExpCompiler* compiler, RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
// x{f, t} becomes this: // x{f, t} becomes this:
// //
// (r++)<-. // (r++)<-.
...@@ -1972,11 +2478,11 @@ RegExpNode* RegExpQuantifier::ToNode(int min, ...@@ -1972,11 +2478,11 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
bool has_max = max < RegExpQuantifier::kInfinity; bool has_max = max < RegExpQuantifier::kInfinity;
bool needs_counter = has_min || has_max; bool needs_counter = has_min || has_max;
int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1; int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
ChoiceNode* center = new ChoiceNode(2, on_failure); ChoiceNode* center = new LoopChoiceNode(2);
RegExpNode* loop_return = needs_counter RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center)) ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
: static_cast<RegExpNode*>(center); : static_cast<RegExpNode*>(center);
RegExpNode* body_node = body->ToNode(compiler, loop_return, on_failure); RegExpNode* body_node = body->ToNode(compiler, loop_return);
GuardedAlternative body_alt(body_node); GuardedAlternative body_alt(body_node);
if (has_max) { if (has_max) {
Guard* body_guard = new Guard(reg_ctr, Guard::LT, max); Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
...@@ -1995,7 +2501,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min, ...@@ -1995,7 +2501,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
center->AddAlternative(body_alt); center->AddAlternative(body_alt);
} }
if (needs_counter) { if (needs_counter) {
return ActionNode::StoreRegister(reg_ctr, 0, center); return ActionNode::SetRegister(reg_ctr, 0, center);
} else { } else {
return center; return center;
} }
...@@ -2003,8 +2509,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min, ...@@ -2003,8 +2509,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
NodeInfo info; NodeInfo info;
switch (type()) { switch (type()) {
case START_OF_LINE: case START_OF_LINE:
...@@ -2028,108 +2533,85 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler, ...@@ -2028,108 +2533,85 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpBackReference::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
return new BackReferenceNode(RegExpCapture::StartRegister(index()), return new BackReferenceNode(RegExpCapture::StartRegister(index()),
RegExpCapture::EndRegister(index()), RegExpCapture::EndRegister(index()),
on_success, on_success);
on_failure);
} }
RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpEmpty::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
return on_success; return on_success;
} }
RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpLookahead::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
int stack_pointer_register = compiler->AllocateRegister(); int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister(); int position_register = compiler->AllocateRegister();
RegExpNode* success;
if (is_positive()) { if (is_positive()) {
// begin submatch scope
// $reg = $pos
// if [body]
// then
// $pos = $reg
// escape submatch scope (drop all backtracks created in scope)
// succeed
// else
// end submatch scope (nothing to clean up, just exit the scope)
// fail
return ActionNode::BeginSubmatch( return ActionNode::BeginSubmatch(
stack_pointer_register, stack_pointer_register,
position_register, position_register,
body()->ToNode( body()->ToNode(
compiler, compiler,
ActionNode::EscapeSubmatch( ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
stack_pointer_register,
true, // Also restore input position.
position_register, position_register,
on_success), on_success)));
on_failure));
} else { } else {
// begin submatch scope // We use a ChoiceNode for a negative lookahead because it has most of
// try // the characteristics we need. It has the body of the lookahead as its
// first if (body) // first alternative and the expression after the lookahead of the second
// then // alternative. If the first alternative succeeds then the
// escape submatch scope // NegativeSubmatchSuccess will unwind the stack including everything the
// fail // choice node set up and backtrack. If the first alternative fails then
// else // the second alternative is tried, which is exactly the desired result
// backtrack // for a negative lookahead. In the case where the dispatch table
// second // determines that the first alternative cannot match we will save time
// end submatch scope // by not trying it. Things are not quite so well-optimized if the
// restore current position // dispatch table determines that the second alternative cannot match.
// succeed // In this case we could optimize by immediately backtracking.
ChoiceNode* try_node = ChoiceNode* choice_node = new ChoiceNode(2);
new ChoiceNode(1, ActionNode::RestorePosition(position_register, GuardedAlternative body_alt(
on_success)); body()->ToNode(
RegExpNode* body_node = body()->ToNode(
compiler, compiler,
ActionNode::EscapeSubmatch(stack_pointer_register, success = new NegativeSubmatchSuccess(stack_pointer_register,
false, // Don't also restore position position_register)));
0, // Unused arguments. choice_node->AddAlternative(body_alt);
on_failure), choice_node->AddAlternative(GuardedAlternative(on_success));
compiler->backtrack());
GuardedAlternative body_alt(body_node);
try_node->AddAlternative(body_alt);
return ActionNode::BeginSubmatch(stack_pointer_register, return ActionNode::BeginSubmatch(stack_pointer_register,
position_register, position_register,
try_node); choice_node);
} }
} }
RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpCapture::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) { return ToNode(body(), index(), compiler, on_success);
return ToNode(body(), index(), compiler, on_success, on_failure);
} }
RegExpNode* RegExpCapture::ToNode(RegExpTree* body, RegExpNode* RegExpCapture::ToNode(RegExpTree* body,
int index, int index,
RegExpCompiler* compiler, RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
int start_reg = RegExpCapture::StartRegister(index); int start_reg = RegExpCapture::StartRegister(index);
int end_reg = RegExpCapture::EndRegister(index); int end_reg = RegExpCapture::EndRegister(index);
RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success); RegExpNode* store_end = ActionNode::StorePosition(end_reg, on_success);
RegExpNode* body_node = body->ToNode(compiler, store_end, on_failure); RegExpNode* body_node = body->ToNode(compiler, store_end);
return ActionNode::StorePosition(start_reg, body_node); return ActionNode::StorePosition(start_reg, body_node);
} }
RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) {
RegExpNode* on_failure) {
ZoneList<RegExpTree*>* children = nodes(); ZoneList<RegExpTree*>* children = nodes();
RegExpNode* current = on_success; RegExpNode* current = on_success;
for (int i = children->length() - 1; i >= 0; i--) { for (int i = children->length() - 1; i >= 0; i--) {
current = children->at(i)->ToNode(compiler, current, on_failure); current = children->at(i)->ToNode(compiler, current);
} }
return current; return current;
} }
...@@ -2400,9 +2882,7 @@ RegExpNode* ActionNode::PropagateForward(NodeInfo* info) { ...@@ -2400,9 +2882,7 @@ RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
full_info.AddFromPreceding(info); full_info.AddFromPreceding(info);
bool cloned = false; bool cloned = false;
ActionNode* action = EnsureSibling(this, &full_info, &cloned); ActionNode* action = EnsureSibling(this, &full_info, &cloned);
if (cloned && type_ != ESCAPE_SUBMATCH) {
action->set_on_success(action->on_success()->PropagateForward(info)); action->set_on_success(action->on_success()->PropagateForward(info));
}
return action; return action;
} }
...@@ -2421,9 +2901,6 @@ RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) { ...@@ -2421,9 +2901,6 @@ RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
alternative.set_node(alternative.node()->PropagateForward(info)); alternative.set_node(alternative.node()->PropagateForward(info));
choice->alternatives()->Add(alternative); choice->alternatives()->Add(alternative);
} }
if (!choice->on_failure_->IsBacktrack()) {
choice->on_failure_ = choice->on_failure_->PropagateForward(info);
}
} }
return choice; return choice;
} }
...@@ -2624,12 +3101,29 @@ void Analysis::VisitEnd(EndNode* that) { ...@@ -2624,12 +3101,29 @@ void Analysis::VisitEnd(EndNode* that) {
} }
void TextNode::CalculateOffsets() {
int element_count = elements()->length();
// Set up the offsets of the elements relative to the start. This is a fixed
// quantity since a TextNode can only contain fixed-width things.
int cp_offset = 0;
for (int i = 0; i < element_count; i++) {
TextElement& elm = elements()->at(i);
elm.cp_offset = cp_offset;
if (elm.type == TextElement::ATOM) {
cp_offset += elm.data.u_atom->data().length();
} else {
cp_offset++;
Vector<const uc16> quarks = elm.data.u_atom->data();
}
}
}
void Analysis::VisitText(TextNode* that) { void Analysis::VisitText(TextNode* that) {
if (ignore_case_) { if (ignore_case_) {
that->MakeCaseIndependent(); that->MakeCaseIndependent();
} }
EnsureAnalyzed(that->on_success()); EnsureAnalyzed(that->on_success());
EnsureAnalyzed(that->on_failure());
NodeInfo* info = that->info(); NodeInfo* info = that->info();
NodeInfo* next_info = that->on_success()->info(); NodeInfo* next_info = that->on_success()->info();
// If the following node is interested in what it follows then this // If the following node is interested in what it follows then this
...@@ -2637,14 +3131,16 @@ void Analysis::VisitText(TextNode* that) { ...@@ -2637,14 +3131,16 @@ void Analysis::VisitText(TextNode* that) {
info->determine_newline = next_info->follows_newline_interest; info->determine_newline = next_info->follows_newline_interest;
info->determine_word = next_info->follows_word_interest; info->determine_word = next_info->follows_word_interest;
info->determine_start = next_info->follows_start_interest; info->determine_start = next_info->follows_start_interest;
that->CalculateOffsets();
} }
void Analysis::VisitAction(ActionNode* that) { void Analysis::VisitAction(ActionNode* that) {
EnsureAnalyzed(that->on_success()); RegExpNode* target = that->on_success();
EnsureAnalyzed(target);
// If the next node is interested in what it follows then this node // If the next node is interested in what it follows then this node
// has to be interested too so it can pass the information on. // has to be interested too so it can pass the information on.
that->info()->AddFromFollowing(that->on_success()->info()); that->info()->AddFromFollowing(target->info());
} }
...@@ -2657,13 +3153,11 @@ void Analysis::VisitChoice(ChoiceNode* that) { ...@@ -2657,13 +3153,11 @@ void Analysis::VisitChoice(ChoiceNode* that) {
// this node also, so it can pass it on. // this node also, so it can pass it on.
info->AddFromFollowing(node->info()); info->AddFromFollowing(node->info());
} }
EnsureAnalyzed(that->on_failure());
} }
void Analysis::VisitBackReference(BackReferenceNode* that) { void Analysis::VisitBackReference(BackReferenceNode* that) {
EnsureAnalyzed(that->on_success()); EnsureAnalyzed(that->on_success());
EnsureAnalyzed(that->on_failure());
} }
...@@ -2746,7 +3240,7 @@ RegExpNode* TextNode::ExpandLocal(NodeInfo* info) { ...@@ -2746,7 +3240,7 @@ RegExpNode* TextNode::ExpandLocal(NodeInfo* info) {
} else { } else {
// If this character class contains both word and non-word // If this character class contains both word and non-word
// characters we need to split it into two. // characters we need to split it into two.
ChoiceNode* result = new ChoiceNode(2, on_failure()); ChoiceNode* result = new ChoiceNode(2);
// Welcome to the family, son! // Welcome to the family, son!
result->set_siblings(this->siblings()); result->set_siblings(this->siblings());
*result->info() = *this->info(); *result->info() = *this->info();
...@@ -2754,16 +3248,14 @@ RegExpNode* TextNode::ExpandLocal(NodeInfo* info) { ...@@ -2754,16 +3248,14 @@ RegExpNode* TextNode::ExpandLocal(NodeInfo* info) {
result->info()->AddAssumptions(info); result->info()->AddAssumptions(info);
RegExpNode* word_node RegExpNode* word_node
= new TextNode(new RegExpCharacterClass(word, false), = new TextNode(new RegExpCharacterClass(word, false),
on_success(), on_success());
on_failure());
word_node->info()->determine_word = true; word_node->info()->determine_word = true;
word_node->info()->does_determine_word = true; word_node->info()->does_determine_word = true;
word_node->info()->is_word = NodeInfo::TRUE; word_node->info()->is_word = NodeInfo::TRUE;
result->alternatives()->Add(GuardedAlternative(word_node)); result->alternatives()->Add(GuardedAlternative(word_node));
RegExpNode* non_word_node RegExpNode* non_word_node
= new TextNode(new RegExpCharacterClass(non_word, false), = new TextNode(new RegExpCharacterClass(non_word, false),
on_success(), on_success());
on_failure());
non_word_node->info()->determine_word = true; non_word_node->info()->determine_word = true;
non_word_node->info()->does_determine_word = true; non_word_node->info()->does_determine_word = true;
non_word_node->info()->is_word = NodeInfo::FALSE; non_word_node->info()->is_word = NodeInfo::FALSE;
...@@ -2974,21 +3466,22 @@ void DispatchTableConstructor::VisitText(TextNode* that) { ...@@ -2974,21 +3466,22 @@ void DispatchTableConstructor::VisitText(TextNode* that) {
void DispatchTableConstructor::VisitAction(ActionNode* that) { void DispatchTableConstructor::VisitAction(ActionNode* that) {
that->on_success()->Accept(this); RegExpNode* target = that->on_success();
target->Accept(this);
} }
Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
RegExpNode** node_return, RegExpNode** node_return,
bool ignore_case, bool ignore_case,
bool is_multiline) { bool is_multiline,
Handle<String> pattern) {
RegExpCompiler compiler(input->capture_count, ignore_case); RegExpCompiler compiler(input->capture_count, ignore_case);
// Wrap the body of the regexp in capture #0. // Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(input->tree, RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
0, 0,
&compiler, &compiler,
compiler.accept(), compiler.accept());
compiler.backtrack());
// Add a .*? at the beginning, outside the body capture. // Add a .*? at the beginning, outside the body capture.
// Note: We could choose to not add this if the regexp is anchored at // Note: We could choose to not add this if the regexp is anchored at
// the start of the input but I'm not sure how best to do that and // the start of the input but I'm not sure how best to do that and
...@@ -2999,8 +3492,7 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, ...@@ -2999,8 +3492,7 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
false, false,
new RegExpCharacterClass('*'), new RegExpCharacterClass('*'),
&compiler, &compiler,
captured_body, captured_body);
compiler.backtrack());
if (node_return != NULL) *node_return = node; if (node_return != NULL) *node_return = node;
Analysis analysis(ignore_case); Analysis analysis(ignore_case);
analysis.EnsureAnalyzed(node); analysis.EnsureAnalyzed(node);
...@@ -3024,14 +3516,16 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input, ...@@ -3024,14 +3516,16 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
(input->capture_count + 1) * 2); (input->capture_count + 1) * 2);
return compiler.Assemble(&macro_assembler, return compiler.Assemble(&macro_assembler,
node, node,
input->capture_count); input->capture_count,
pattern);
#endif #endif
} }
EmbeddedVector<byte, 1024> codes; EmbeddedVector<byte, 1024> codes;
RegExpMacroAssemblerIrregexp macro_assembler(codes); RegExpMacroAssemblerIrregexp macro_assembler(codes);
return compiler.Assemble(&macro_assembler, return compiler.Assemble(&macro_assembler,
node, node,
input->capture_count); input->capture_count,
pattern);
} }
......
...@@ -202,6 +202,7 @@ class CharacterRange { ...@@ -202,6 +202,7 @@ class CharacterRange {
uc16 to() const { return to_; } uc16 to() const { return to_; }
void set_to(uc16 value) { to_ = value; } void set_to(uc16 value) { to_ = value; }
bool is_valid() { return from_ <= to_; } bool is_valid() { return from_ <= to_; }
bool IsEverything(uc16 max) { return from_ == 0 && to_ >= max; }
bool IsSingleton() { return (from_ == to_); } bool IsSingleton() { return (from_ == to_); }
void AddCaseEquivalents(ZoneList<CharacterRange>* ranges); void AddCaseEquivalents(ZoneList<CharacterRange>* ranges);
static void Split(ZoneList<CharacterRange>* base, static void Split(ZoneList<CharacterRange>* base,
...@@ -346,6 +347,7 @@ class OutSet: public ZoneObject { ...@@ -346,6 +347,7 @@ class OutSet: public ZoneObject {
uint32_t first_; uint32_t first_;
ZoneList<unsigned>* remaining_; ZoneList<unsigned>* remaining_;
ZoneList<OutSet*>* successors_; ZoneList<OutSet*>* successors_;
friend class GenerationVariant;
}; };
...@@ -432,7 +434,7 @@ class TextElement { ...@@ -432,7 +434,7 @@ class TextElement {
public: public:
enum Type {UNINITIALIZED, ATOM, CHAR_CLASS}; enum Type {UNINITIALIZED, ATOM, CHAR_CLASS};
TextElement() : type(UNINITIALIZED) { } TextElement() : type(UNINITIALIZED) { }
explicit TextElement(Type t) : type(t) { } explicit TextElement(Type t) : type(t), cp_offset(-1) { }
static TextElement Atom(RegExpAtom* atom); static TextElement Atom(RegExpAtom* atom);
static TextElement CharClass(RegExpCharacterClass* char_class); static TextElement CharClass(RegExpCharacterClass* char_class);
Type type; Type type;
...@@ -440,9 +442,13 @@ class TextElement { ...@@ -440,9 +442,13 @@ class TextElement {
RegExpAtom* u_atom; RegExpAtom* u_atom;
RegExpCharacterClass* u_char_class; RegExpCharacterClass* u_char_class;
} data; } data;
int cp_offset;
}; };
class GenerationVariant;
struct NodeInfo { struct NodeInfo {
enum TriBool { enum TriBool {
UNKNOWN = -1, FALSE = 0, TRUE = 1 UNKNOWN = -1, FALSE = 0, TRUE = 1
...@@ -607,17 +613,17 @@ class SiblingList { ...@@ -607,17 +613,17 @@ class SiblingList {
class RegExpNode: public ZoneObject { class RegExpNode: public ZoneObject {
public: public:
RegExpNode() : variants_generated_(0) { }
virtual ~RegExpNode() { } virtual ~RegExpNode() { }
virtual void Accept(NodeVisitor* visitor) = 0; virtual void Accept(NodeVisitor* visitor) = 0;
// Generates a goto to this node or actually generates the code at this point. // Generates a goto to this node or actually generates the code at this point.
// Until the implementation is complete we will return true for success and // Until the implementation is complete we will return true for success and
// false for failure. // false for failure.
virtual bool GoTo(RegExpCompiler* compiler); virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant) = 0;
Label* label(); static const int kNodeIsTooComplexForGreedyLoops = -1;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
// Until the implementation is complete we will return true for success and Label* label() { return &label_; }
// false for failure. static const int kMaxVariantsGenerated = 10;
virtual bool Emit(RegExpCompiler* compiler) = 0;
RegExpNode* EnsureExpanded(NodeInfo* info); RegExpNode* EnsureExpanded(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0; virtual RegExpNode* ExpandLocal(NodeInfo* info) = 0;
...@@ -630,7 +636,6 @@ class RegExpNode: public ZoneObject { ...@@ -630,7 +636,6 @@ class RegExpNode: public ZoneObject {
virtual RegExpNode* PropagateForward(NodeInfo* info) = 0; virtual RegExpNode* PropagateForward(NodeInfo* info) = 0;
NodeInfo* info() { return &info_; } NodeInfo* info() { return &info_; }
virtual bool IsBacktrack() { return false; }
void AddSibling(RegExpNode* node) { siblings_.Add(node); } void AddSibling(RegExpNode* node) { siblings_.Add(node); }
...@@ -645,6 +650,9 @@ class RegExpNode: public ZoneObject { ...@@ -645,6 +650,9 @@ class RegExpNode: public ZoneObject {
void set_siblings(SiblingList* other) { siblings_ = *other; } void set_siblings(SiblingList* other) { siblings_ = *other; }
protected: protected:
enum LimitResult { DONE, FAIL, CONTINUE };
LimitResult LimitVersions(RegExpCompiler* compiler,
GenerationVariant* variant);
// Returns a sibling of this node whose interests and assumptions // Returns a sibling of this node whose interests and assumptions
// match the ones in the given node info. If no sibling exists NULL // match the ones in the given node info. If no sibling exists NULL
...@@ -663,12 +671,11 @@ class RegExpNode: public ZoneObject { ...@@ -663,12 +671,11 @@ class RegExpNode: public ZoneObject {
// processed before it is on a useable state. // processed before it is on a useable state.
virtual RegExpNode* Clone() = 0; virtual RegExpNode* Clone() = 0;
inline void Bind(RegExpMacroAssembler* macro);
private: private:
Label label_; Label label_;
NodeInfo info_; NodeInfo info_;
SiblingList siblings_; SiblingList siblings_;
int variants_generated_;
}; };
...@@ -678,7 +685,6 @@ class SeqRegExpNode: public RegExpNode { ...@@ -678,7 +685,6 @@ class SeqRegExpNode: public RegExpNode {
: on_success_(on_success) { } : on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; } RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; } void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual bool Emit(RegExpCompiler* compiler) { return false; }
private: private:
RegExpNode* on_success_; RegExpNode* on_success_;
}; };
...@@ -687,29 +693,31 @@ class SeqRegExpNode: public RegExpNode { ...@@ -687,29 +693,31 @@ class SeqRegExpNode: public RegExpNode {
class ActionNode: public SeqRegExpNode { class ActionNode: public SeqRegExpNode {
public: public:
enum Type { enum Type {
STORE_REGISTER, SET_REGISTER,
INCREMENT_REGISTER, INCREMENT_REGISTER,
STORE_POSITION, STORE_POSITION,
RESTORE_POSITION,
BEGIN_SUBMATCH, BEGIN_SUBMATCH,
ESCAPE_SUBMATCH POSITIVE_SUBMATCH_SUCCESS
}; };
static ActionNode* StoreRegister(int reg, int val, RegExpNode* on_success); static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success); static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg, RegExpNode* on_success); static ActionNode* StorePosition(int reg, RegExpNode* on_success);
static ActionNode* RestorePosition(int reg, RegExpNode* on_success); static ActionNode* BeginSubmatch(
static ActionNode* BeginSubmatch(int stack_pointer_reg, int stack_pointer_reg,
int position_reg, int position_reg,
RegExpNode* on_success); RegExpNode* on_success);
static ActionNode* EscapeSubmatch(int stack_pointer_reg, static ActionNode* PositiveSubmatchSuccess(
bool and_restore_position, int stack_pointer_reg,
int restore_reg, int restore_reg,
RegExpNode* on_success); RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler); virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* ExpandLocal(NodeInfo* info); virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren(); virtual void ExpandChildren();
virtual RegExpNode* PropagateForward(NodeInfo* info); virtual RegExpNode* PropagateForward(NodeInfo* info);
Type type() { return type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops.
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
virtual ActionNode* Clone() { return new ActionNode(*this); } virtual ActionNode* Clone() { return new ActionNode(*this); }
private: private:
...@@ -740,16 +748,12 @@ class ActionNode: public SeqRegExpNode { ...@@ -740,16 +748,12 @@ class ActionNode: public SeqRegExpNode {
class TextNode: public SeqRegExpNode { class TextNode: public SeqRegExpNode {
public: public:
TextNode(ZoneList<TextElement>* elms, TextNode(ZoneList<TextElement>* elms,
RegExpNode* on_success, RegExpNode* on_success)
RegExpNode* on_failure)
: SeqRegExpNode(on_success), : SeqRegExpNode(on_success),
on_failure_(on_failure),
elms_(elms) { } elms_(elms) { }
TextNode(RegExpCharacterClass* that, TextNode(RegExpCharacterClass* that,
RegExpNode* on_success, RegExpNode* on_success)
RegExpNode* on_failure)
: SeqRegExpNode(on_success), : SeqRegExpNode(on_success),
on_failure_(on_failure),
elms_(new ZoneList<TextElement>(1)) { elms_(new ZoneList<TextElement>(1)) {
elms_->Add(TextElement::CharClass(that)); elms_->Add(TextElement::CharClass(that));
} }
...@@ -757,17 +761,20 @@ class TextNode: public SeqRegExpNode { ...@@ -757,17 +761,20 @@ class TextNode: public SeqRegExpNode {
virtual RegExpNode* PropagateForward(NodeInfo* info); virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info); virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren(); virtual void ExpandChildren();
RegExpNode* on_failure() { return on_failure_; } virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; } ZoneList<TextElement>* elements() { return elms_; }
void MakeCaseIndependent(); void MakeCaseIndependent();
virtual TextNode* Clone() { return new TextNode(*this); } virtual int GreedyLoopTextLength();
virtual TextNode* Clone() {
TextNode* result = new TextNode(*this);
result->CalculateOffsets();
return result;
}
void CalculateOffsets();
private: private:
void ExpandAtomChildren(RegExpAtom* that); void ExpandAtomChildren(RegExpAtom* that);
void ExpandCharClassChildren(RegExpCharacterClass* that); void ExpandCharClassChildren(RegExpCharacterClass* that);
RegExpNode* on_failure_;
ZoneList<TextElement>* elms_; ZoneList<TextElement>* elms_;
}; };
...@@ -776,24 +783,20 @@ class BackReferenceNode: public SeqRegExpNode { ...@@ -776,24 +783,20 @@ class BackReferenceNode: public SeqRegExpNode {
public: public:
BackReferenceNode(int start_reg, BackReferenceNode(int start_reg,
int end_reg, int end_reg,
RegExpNode* on_success, RegExpNode* on_success)
RegExpNode* on_failure)
: SeqRegExpNode(on_success), : SeqRegExpNode(on_success),
on_failure_(on_failure),
start_reg_(start_reg), start_reg_(start_reg),
end_reg_(end_reg) { } end_reg_(end_reg) { }
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
RegExpNode* on_failure() { return on_failure_; }
int start_register() { return start_reg_; } int start_register() { return start_reg_; }
int end_register() { return end_reg_; } int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler); virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* PropagateForward(NodeInfo* info); virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info); virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren(); virtual void ExpandChildren();
virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); } virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
private: private:
RegExpNode* on_failure_;
int start_reg_; int start_reg_;
int end_reg_; int end_reg_;
}; };
...@@ -801,22 +804,37 @@ class BackReferenceNode: public SeqRegExpNode { ...@@ -801,22 +804,37 @@ class BackReferenceNode: public SeqRegExpNode {
class EndNode: public RegExpNode { class EndNode: public RegExpNode {
public: public:
enum Action { ACCEPT, BACKTRACK }; enum Action { ACCEPT, BACKTRACK, NEGATIVE_SUBMATCH_SUCCESS };
explicit EndNode(Action action) : action_(action) { } explicit EndNode(Action action) : action_(action) { }
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler); virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual RegExpNode* PropagateForward(NodeInfo* info); virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info); virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren(); virtual void ExpandChildren();
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
virtual bool GoTo(RegExpCompiler* compiler);
virtual EndNode* Clone() { return new EndNode(*this); } virtual EndNode* Clone() { return new EndNode(*this); }
protected:
void EmitInfoChecks(RegExpMacroAssembler* macro, GenerationVariant* variant);
private: private:
Action action_; Action action_;
}; };
class NegativeSubmatchSuccess: public EndNode {
public:
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg)
: EndNode(NEGATIVE_SUBMATCH_SUCCESS),
stack_pointer_register_(stack_pointer_reg),
current_position_register_(position_reg) { }
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
private:
int stack_pointer_register_;
int current_position_register_;
};
class Guard: public ZoneObject { class Guard: public ZoneObject {
public: public:
enum Relation { LT, GEQ }; enum Relation { LT, GEQ };
...@@ -851,17 +869,15 @@ class GuardedAlternative { ...@@ -851,17 +869,15 @@ class GuardedAlternative {
class ChoiceNode: public RegExpNode { class ChoiceNode: public RegExpNode {
public: public:
explicit ChoiceNode(int expected_size, RegExpNode* on_failure) explicit ChoiceNode(int expected_size)
: on_failure_(on_failure), : alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
alternatives_(new ZoneList<GuardedAlternative>(expected_size)),
table_(NULL), table_(NULL),
being_calculated_(false) { } being_calculated_(false) { }
virtual void Accept(NodeVisitor* visitor); virtual void Accept(NodeVisitor* visitor);
void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); } void AddAlternative(GuardedAlternative node) { alternatives()->Add(node); }
ZoneList<GuardedAlternative>* alternatives() { return alternatives_; } ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
DispatchTable* GetTable(bool ignore_case); DispatchTable* GetTable(bool ignore_case);
RegExpNode* on_failure() { return on_failure_; } virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual bool Emit(RegExpCompiler* compiler);
virtual RegExpNode* PropagateForward(NodeInfo* info); virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual RegExpNode* ExpandLocal(NodeInfo* info); virtual RegExpNode* ExpandLocal(NodeInfo* info);
virtual void ExpandChildren(); virtual void ExpandChildren();
...@@ -870,19 +886,128 @@ class ChoiceNode: public RegExpNode { ...@@ -870,19 +886,128 @@ class ChoiceNode: public RegExpNode {
bool being_calculated() { return being_calculated_; } bool being_calculated() { return being_calculated_; }
void set_being_calculated(bool b) { being_calculated_ = b; } void set_being_calculated(bool b) { being_calculated_ = b; }
protected:
int GreedyLoopTextLength(GuardedAlternative *alternative);
ZoneList<GuardedAlternative>* alternatives_;
private: private:
friend class DispatchTableConstructor; friend class DispatchTableConstructor;
friend class Analysis; friend class Analysis;
void GenerateGuard(RegExpMacroAssembler* macro_assembler, void GenerateGuard(RegExpMacroAssembler* macro_assembler,
Guard *guard, Guard *guard,
Label* on_failure); GenerationVariant* variant);
RegExpNode* on_failure_;
ZoneList<GuardedAlternative>* alternatives_;
DispatchTable* table_; DispatchTable* table_;
bool being_calculated_; bool being_calculated_;
}; };
class LoopChoiceNode: public ChoiceNode {
public:
explicit LoopChoiceNode(int expected_size) : ChoiceNode(expected_size) { }
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
};
// There are many ways to generate code for a node. This class encapsulates
// the current way we should be generating. In other words it encapsulates
// the current state of the code generator.
class GenerationVariant {
public:
class DeferredAction {
public:
DeferredAction(ActionNode::Type type, int reg)
: type_(type), reg_(reg), next_(NULL) { }
DeferredAction* next() { return next_; }
int reg() { return reg_; }
ActionNode::Type type() { return type_; }
private:
ActionNode::Type type_;
int reg_;
DeferredAction* next_;
friend class GenerationVariant;
};
class DeferredCapture: public DeferredAction {
public:
DeferredCapture(int reg, GenerationVariant* variant)
: DeferredAction(ActionNode::STORE_POSITION, reg),
cp_offset_(variant->cp_offset()) { }
int cp_offset() { return cp_offset_; }
private:
int cp_offset_;
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
};
class DeferredSetRegister :public DeferredAction {
public:
DeferredSetRegister(int reg, int value)
: DeferredAction(ActionNode::SET_REGISTER, reg),
value_(value) { }
int value() { return value_; }
private:
int value_;
};
class DeferredIncrementRegister: public DeferredAction {
public:
explicit DeferredIncrementRegister(int reg)
: DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
};
explicit GenerationVariant(Label* backtrack)
: cp_offset_(0),
actions_(NULL),
backtrack_(backtrack),
stop_node_(NULL),
loop_label_(NULL) { }
GenerationVariant()
: cp_offset_(0),
actions_(NULL),
backtrack_(NULL),
stop_node_(NULL),
loop_label_(NULL) { }
bool Flush(RegExpCompiler* compiler, RegExpNode* successor);
int cp_offset() { return cp_offset_; }
DeferredAction* actions() { return actions_; }
bool is_trivial() {
return backtrack_ == NULL && actions_ == NULL && cp_offset_ == 0;
}
Label* backtrack() { return backtrack_; }
Label* loop_label() { return loop_label_; }
RegExpNode* stop_node() { return stop_node_; }
// These set methods should be used only on new GenerationVariants - the
// intention is that GenerationVariants are immutable after creation.
void add_action(DeferredAction* new_action) {
ASSERT(new_action->next_ == NULL);
new_action->next_ = actions_;
actions_ = new_action;
}
void set_cp_offset(int new_cp_offset) {
ASSERT(new_cp_offset >= cp_offset_);
cp_offset_ = new_cp_offset;
}
void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
void set_stop_node(RegExpNode* node) { stop_node_ = node; }
void set_loop_label(Label* label) { loop_label_ = label; }
bool mentions_reg(int reg);
private:
int FindAffectedRegisters(OutSet* affected_registers);
void PerformDeferredActions(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
void PushAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
int cp_offset_;
DeferredAction* actions_;
Label* backtrack_;
RegExpNode* stop_node_;
Label* loop_label_;
};
class NodeVisitor { class NodeVisitor {
public: public:
virtual ~NodeVisitor() { } virtual ~NodeVisitor() { }
...@@ -956,7 +1081,8 @@ class RegExpEngine: public AllStatic { ...@@ -956,7 +1081,8 @@ class RegExpEngine: public AllStatic {
static Handle<FixedArray> Compile(RegExpParseResult* input, static Handle<FixedArray> Compile(RegExpParseResult* input,
RegExpNode** node_return, RegExpNode** node_return,
bool ignore_case, bool ignore_case,
bool multiline); bool multiline,
Handle<String> pattern);
static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); static void DotPrint(const char* label, RegExpNode* node, bool ignore_case);
}; };
......
...@@ -184,11 +184,14 @@ void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) { ...@@ -184,11 +184,14 @@ void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str, void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure) { Label* on_failure,
bool check_end_of_string) {
int byte_length = str.length() * char_size(); int byte_length = str.length() * char_size();
int byte_offset = cp_offset * char_size(); int byte_offset = cp_offset * char_size();
if (check_end_of_string) {
__ cmp(Operand(edi), Immediate(-(byte_offset + byte_length))); __ cmp(Operand(edi), Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure); BranchOrBacktrack(greater, on_failure);
}
if (str.length() <= kMaxInlineStringTests) { if (str.length() <= kMaxInlineStringTests) {
for (int i = 0; i < str.length(); i++) { for (int i = 0; i < str.length(); i++) {
...@@ -233,10 +236,13 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str, ...@@ -233,10 +236,13 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
} }
void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index, void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
Label* on_equal) { Label fallthrough;
__ cmp(edi, register_location(register_index)); __ cmp(edi, Operand(esp, 0));
BranchOrBacktrack(equal, on_equal); __ j(not_equal, &fallthrough);
__ add(Operand(esp), Immediate(4)); // Pop.
BranchOrBacktrack(no_condition, on_equal);
__ bind(&fallthrough);
} }
...@@ -482,7 +488,7 @@ void RegExpMacroAssemblerIA32::Fail() { ...@@ -482,7 +488,7 @@ void RegExpMacroAssemblerIA32::Fail() {
} }
Handle<Object> RegExpMacroAssemblerIA32::GetCode() { Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
// Finalize code - write the entry point code now we know how many // Finalize code - write the entry point code now we know how many
// registers we need. // registers we need.
...@@ -521,7 +527,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() { ...@@ -521,7 +527,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
Label at_start; Label at_start;
__ cmp(Operand(ebp, kAtStart), Immediate(0)); __ cmp(Operand(ebp, kAtStart), Immediate(0));
__ j(not_equal, &at_start); __ j(not_equal, &at_start);
LoadCurrentCharToRegister(-1); // Load previous char. LoadCurrentCharacterUnchecked(-1); // Load previous char.
__ jmp(&start_label_); __ jmp(&start_label_);
__ bind(&at_start); __ bind(&at_start);
__ mov(current_character(), '\n'); __ mov(current_character(), '\n');
...@@ -562,7 +568,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() { ...@@ -562,7 +568,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
NULL, NULL,
Code::ComputeFlags(Code::REGEXP), Code::ComputeFlags(Code::REGEXP),
self_); self_);
LOG(CodeCreateEvent("RegExp", *code, "(Compiled RegExp)")); LOG(CodeCreateEvent("RegExp", *code, *(source->ToCString())));
return Handle<Object>::cast(code); return Handle<Object>::cast(code);
} }
...@@ -600,7 +606,7 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset, ...@@ -600,7 +606,7 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
__ cmp(edi, -cp_offset * char_size()); __ cmp(edi, -cp_offset * char_size());
BranchOrBacktrack(greater_equal, on_end_of_input); BranchOrBacktrack(greater_equal, on_end_of_input);
LoadCurrentCharToRegister(cp_offset); LoadCurrentCharacterUnchecked(cp_offset);
} }
...@@ -651,10 +657,17 @@ void RegExpMacroAssemblerIA32::Succeed() { ...@@ -651,10 +657,17 @@ void RegExpMacroAssemblerIA32::Succeed() {
} }
void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg) { void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
int cp_offset) {
if (cp_offset == 0) {
__ mov(register_location(reg), edi); __ mov(register_location(reg), edi);
} else {
__ lea(eax, Operand(edi, cp_offset));
__ mov(register_location(reg), eax);
}
} }
void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) { void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
__ mov(register_location(reg), esp); __ mov(register_location(reg), esp);
} }
...@@ -770,7 +783,7 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() { ...@@ -770,7 +783,7 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
} }
void RegExpMacroAssemblerIA32::LoadCurrentCharToRegister(int cp_offset) { void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset) {
if (mode_ == ASCII) { if (mode_ == ASCII) {
__ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset)); __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
return; return;
......
...@@ -47,8 +47,9 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -47,8 +47,9 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void CheckCharacterLT(uc16 limit, Label* on_less); virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str, virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure); Label* on_failure,
virtual void CheckCurrentPosition(int register_index, Label* on_equal); bool check_end_of_string);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start); virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match); virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
...@@ -70,12 +71,14 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -70,12 +71,14 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
const Vector<Label*>& destinations); const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label); virtual void EmitOrLink(Label* label);
virtual void Fail(); virtual void Fail();
virtual Handle<Object> GetCode(); virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label); virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation(); virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input); virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void PopCurrentPosition(); virtual void PopCurrentPosition();
virtual void PopRegister(int register_index); virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label); virtual void PushBacktrack(Label* label);
...@@ -85,7 +88,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -85,7 +88,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg); virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to); virtual void SetRegister(int register_index, int to);
virtual void Succeed(); virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void WriteStackPointerToRegister(int reg); virtual void WriteStackPointerToRegister(int reg);
template <typename T> template <typename T>
...@@ -139,10 +142,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -139,10 +142,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// is NULL, in which case it is a conditional Backtrack. // is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to); void BranchOrBacktrack(Condition condition, Label* to);
// Read a character from input at the given offset from the current
// position.
void LoadCurrentCharToRegister(int cp_offset);
// Load the address of a "constant buffer" (a slice of a byte array) // Load the address of a "constant buffer" (a slice of a byte array)
// into a register. The address is computed from the ByteArray* address // into a register. The address is computed from the ByteArray* address
// and an offset. Uses no extra registers. // and an offset. Uses no extra registers.
......
...@@ -68,6 +68,7 @@ void RegExpMacroAssemblerIrregexp::Bind(Label* l) { ...@@ -68,6 +68,7 @@ void RegExpMacroAssemblerIrregexp::Bind(Label* l) {
void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
if (l == NULL) l = &backtrack_;
if (l->is_bound()) { if (l->is_bound()) {
Emit32(l->pos()); Emit32(l->pos());
} else { } else {
...@@ -95,11 +96,11 @@ void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) { ...@@ -95,11 +96,11 @@ void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) {
void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
int register_index) { int register_index, int cp_offset) {
ASSERT(register_index >= 0); ASSERT(register_index >= 0);
Emit(BC_SET_REGISTER_TO_CP); Emit(BC_SET_REGISTER_TO_CP);
Emit(register_index); Emit(register_index);
Emit32(0); // Current position offset. Emit32(cp_offset); // Current position offset.
} }
...@@ -187,11 +188,10 @@ void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { ...@@ -187,11 +188,10 @@ void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
} }
void RegExpMacroAssemblerIrregexp::CheckCurrentPosition( void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
int register_index, Label* on_tos_equals_current_position) {
Label* on_equal) { Emit(BC_CHECK_GREEDY);
// TODO(erikcorry): Implement. EmitOrLink(on_tos_equals_current_position);
UNIMPLEMENTED();
} }
...@@ -203,6 +203,13 @@ void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, ...@@ -203,6 +203,13 @@ void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
} }
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacterUnchecked(
int cp_offset) {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
Label* on_less) { Label* on_less) {
Emit(BC_CHECK_LT); Emit(BC_CHECK_LT);
...@@ -323,11 +330,19 @@ void RegExpMacroAssemblerIrregexp::DispatchHighByteMap( ...@@ -323,11 +330,19 @@ void RegExpMacroAssemblerIrregexp::DispatchHighByteMap(
void RegExpMacroAssemblerIrregexp::CheckCharacters( void RegExpMacroAssemblerIrregexp::CheckCharacters(
Vector<const uc16> str, Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure) { Label* on_failure,
bool check_end_of_string) {
// It is vital that this loop is backwards due to the unchecked character
// load below.
for (int i = str.length() - 1; i >= 0; i--) { for (int i = str.length() - 1; i >= 0; i--) {
if (check_end_of_string && i == str.length() - 1) {
Emit(BC_LOAD_CURRENT_CHAR); Emit(BC_LOAD_CURRENT_CHAR);
Emit32(cp_offset + i); Emit32(cp_offset + i);
EmitOrLink(on_failure); EmitOrLink(on_failure);
} else {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset + i);
}
Emit(BC_CHECK_NOT_CHAR); Emit(BC_CHECK_NOT_CHAR);
Emit16(str[i]); Emit16(str[i]);
EmitOrLink(on_failure); EmitOrLink(on_failure);
...@@ -357,7 +372,9 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, ...@@ -357,7 +372,9 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
} }
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode() { Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT);
Handle<ByteArray> array = Factory::NewByteArray(length()); Handle<ByteArray> array = Factory::NewByteArray(length());
Copy(array->GetDataStartAddress()); Copy(array->GetDataStartAddress());
return array; return array;
......
...@@ -62,14 +62,16 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -62,14 +62,16 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void PushRegister(int register_index); virtual void PushRegister(int register_index);
virtual void AdvanceRegister(int reg, int by); // r[reg] += by. virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void SetRegister(int register_index, int to); virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ReadCurrentPositionFromRegister(int reg); virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg); virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg); virtual void ReadStackPointerFromRegister(int reg);
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input); virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void CheckCharacterLT(uc16 limit, Label* on_less); virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater); virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacter(uc16 c, Label* on_equal); virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start); virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal); virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal); virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
...@@ -82,8 +84,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -82,8 +84,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal); virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckCharacters(Vector<const uc16> str, virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure); Label* on_failure,
virtual void CheckCurrentPosition(int register_index, Label* on_equal); bool check_end_of_string);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero); virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void DispatchHalfNibbleMap(uc16 start, virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map, Label* half_nibble_map,
...@@ -98,7 +100,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -98,7 +100,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge); virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual IrregexpImplementation Implementation(); virtual IrregexpImplementation Implementation();
virtual Handle<Object> GetCode(); virtual Handle<Object> GetCode(Handle<String> source);
private: private:
void Expand(); void Expand();
// Code and bitmap emission. // Code and bitmap emission.
...@@ -109,14 +111,13 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -109,14 +111,13 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
int length(); int length();
void Copy(Address a); void Copy(Address a);
// The buffer into which code and relocation info are generated. // The buffer into which code and relocation info are generated.
Vector<byte> buffer_; Vector<byte> buffer_;
// The program counter. // The program counter.
int pc_; int pc_;
// True if the assembler owns the buffer, false if buffer is external. // True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_; bool own_buffer_;
Label backtrack_;
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpMacroAssemblerIrregexp); DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpMacroAssemblerIrregexp);
}; };
......
...@@ -64,6 +64,12 @@ void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) { ...@@ -64,6 +64,12 @@ void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) {
} }
void RegExpMacroAssemblerTracer::CheckGreedyLoop(Label* label) {
PrintF(" CheckGreedyLoop(label[%08x]);\n\n", label);
assembler_->CheckGreedyLoop(label);
}
void RegExpMacroAssemblerTracer::PopCurrentPosition() { void RegExpMacroAssemblerTracer::PopCurrentPosition() {
PrintF(" PopCurrentPosition();\n"); PrintF(" PopCurrentPosition();\n");
assembler_->PopCurrentPosition(); assembler_->PopCurrentPosition();
...@@ -130,9 +136,12 @@ void RegExpMacroAssemblerTracer::SetRegister(int register_index, int to) { ...@@ -130,9 +136,12 @@ void RegExpMacroAssemblerTracer::SetRegister(int register_index, int to) {
} }
void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg) { void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
PrintF(" WriteCurrentPositionToRegister(register=%d);\n", reg); int cp_offset) {
assembler_->WriteCurrentPositionToRegister(reg); PrintF(" WriteCurrentPositionToRegister(register=%d,cp_offset=%d);\n",
reg,
cp_offset);
assembler_->WriteCurrentPositionToRegister(reg, cp_offset);
} }
...@@ -156,12 +165,20 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) { ...@@ -156,12 +165,20 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) {
void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset, void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input) { Label* on_end_of_input) {
PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n", cp_offset, PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n",
cp_offset,
on_end_of_input); on_end_of_input);
assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input); assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input);
} }
void RegExpMacroAssemblerTracer::LoadCurrentCharacterUnchecked(int cp_offset) {
PrintF(" LoadCurrentCharacterUnchecked(cp_offset=%d);\n",
cp_offset);
assembler_->LoadCurrentCharacterUnchecked(cp_offset);
}
void RegExpMacroAssemblerTracer::CheckCharacterLT(uc16 limit, Label* on_less) { void RegExpMacroAssemblerTracer::CheckCharacterLT(uc16 limit, Label* on_less) {
PrintF(" CheckCharacterLT(c='u%04x', label[%08x]);\n", limit, on_less); PrintF(" CheckCharacterLT(c='u%04x', label[%08x]);\n", limit, on_less);
assembler_->CheckCharacterLT(limit, on_less); assembler_->CheckCharacterLT(limit, on_less);
...@@ -242,21 +259,15 @@ void RegExpMacroAssemblerTracer::CheckNotRegistersEqual(int reg1, ...@@ -242,21 +259,15 @@ void RegExpMacroAssemblerTracer::CheckNotRegistersEqual(int reg1,
void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str, void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure) { Label* on_failure,
PrintF(" CheckCharacters(str=\""); bool check_end_of_string) {
PrintF(" %s(str=\"",
check_end_of_string ? "CheckCharacters" : "CheckCharactersUnchecked");
for (int i = 0; i < str.length(); i++) { for (int i = 0; i < str.length(); i++) {
PrintF("u%04x", str[i]); PrintF("u%04x", str[i]);
} }
PrintF("\", cp_offset=%d, label[%08x])\n", cp_offset, on_failure); PrintF("\", cp_offset=%d, label[%08x])\n", cp_offset, on_failure);
assembler_->CheckCharacters(str, cp_offset, on_failure); assembler_->CheckCharacters(str, cp_offset, on_failure, check_end_of_string);
}
void RegExpMacroAssemblerTracer::CheckCurrentPosition(int register_index,
Label* on_equal) {
PrintF(" CheckCurrentPosition(register=%d, label[%08x]);\n", register_index,
on_equal);
assembler_->CheckCurrentPosition(register_index, on_equal);
} }
...@@ -334,9 +345,9 @@ RegExpMacroAssembler::IrregexpImplementation ...@@ -334,9 +345,9 @@ RegExpMacroAssembler::IrregexpImplementation
} }
Handle<Object> RegExpMacroAssemblerTracer::GetCode() { Handle<Object> RegExpMacroAssemblerTracer::GetCode(Handle<String> source) {
PrintF(" GetCode();\n"); PrintF(" GetCode(%s);\n", *(source->ToCString()));
return assembler_->GetCode(); return assembler_->GetCode(source);
} }
}} // namespace v8::internal }} // namespace v8::internal
...@@ -47,10 +47,9 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { ...@@ -47,10 +47,9 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void CheckCharacters( virtual void CheckCharacters(
Vector<const uc16> str, Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure); Label* on_failure,
virtual void CheckCurrentPosition( bool check_end_of_string);
int register_index, virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
Label* on_equal);
virtual void CheckNotAtStart(Label* on_not_at_start); virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match); virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
...@@ -77,12 +76,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { ...@@ -77,12 +76,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
const Vector<Label*>& destinations); const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label); virtual void EmitOrLink(Label* label);
virtual void Fail(); virtual void Fail();
virtual Handle<Object> GetCode(); virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label); virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation(); virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input); virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void PopCurrentPosition(); virtual void PopCurrentPosition();
virtual void PopRegister(int register_index); virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label); virtual void PushBacktrack(Label* label);
...@@ -92,7 +92,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { ...@@ -92,7 +92,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg); virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to); virtual void SetRegister(int register_index, int to);
virtual void Succeed(); virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void WriteStackPointerToRegister(int reg); virtual void WriteStackPointerToRegister(int reg);
private: private:
RegExpMacroAssembler* assembler_; RegExpMacroAssembler* assembler_;
......
...@@ -62,19 +62,17 @@ class RegExpMacroAssembler { ...@@ -62,19 +62,17 @@ class RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0; virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0; virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
// Check the current character for a match with a literal string. If we // Check the current character for a match with a literal string. If we
// fail to match then goto the on_failure label. End of input always // fail to match then goto the on_failure label. If check_eos is set then
// matches. If the label is NULL then we should pop a backtrack address off // the end of input always fails. If check_eos is clear then it is the
// the stack abnd go to that. // caller's responsibility to ensure that the end of string is not hit.
// If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
virtual void CheckCharacters( virtual void CheckCharacters(
Vector<const uc16> str, Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure) = 0; Label* on_failure,
// Check the current input position against a register. If the register is bool check_eos) = 0;
// equal to the current position then go to the label. If the label is NULL virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
// then backtrack instead.
virtual void CheckCurrentPosition(
int register_index,
Label* on_equal) = 0;
virtual void CheckNotAtStart(Label* on_not_at_start) = 0; virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0; virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
...@@ -115,7 +113,7 @@ class RegExpMacroAssembler { ...@@ -115,7 +113,7 @@ class RegExpMacroAssembler {
const Vector<Label*>& destinations) = 0; const Vector<Label*>& destinations) = 0;
virtual void EmitOrLink(Label* label) = 0; virtual void EmitOrLink(Label* label) = 0;
virtual void Fail() = 0; virtual void Fail() = 0;
virtual Handle<Object> GetCode() = 0; virtual Handle<Object> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0; virtual void GoTo(Label* label) = 0;
// Check whether a register is >= a given constant and go to a label if it // Check whether a register is >= a given constant and go to a label if it
// is. Backtracks instead if the label is NULL. // is. Backtracks instead if the label is NULL.
...@@ -125,6 +123,7 @@ class RegExpMacroAssembler { ...@@ -125,6 +123,7 @@ class RegExpMacroAssembler {
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0; virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
virtual IrregexpImplementation Implementation() = 0; virtual IrregexpImplementation Implementation() = 0;
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0; virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
virtual void LoadCurrentCharacterUnchecked(int cp_offset) = 0;
virtual void PopCurrentPosition() = 0; virtual void PopCurrentPosition() = 0;
virtual void PopRegister(int register_index) = 0; virtual void PopRegister(int register_index) = 0;
virtual void PushBacktrack(Label* label) = 0; virtual void PushBacktrack(Label* label) = 0;
...@@ -134,7 +133,7 @@ class RegExpMacroAssembler { ...@@ -134,7 +133,7 @@ class RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg) = 0; virtual void ReadStackPointerFromRegister(int reg) = 0;
virtual void SetRegister(int register_index, int to) = 0; virtual void SetRegister(int register_index, int to) = 0;
virtual void Succeed() = 0; virtual void Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg) = 0; virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0; virtual void WriteStackPointerToRegister(int reg) = 0;
private: private:
......
...@@ -362,7 +362,8 @@ static RegExpNode* Compile(const char* input, bool multiline) { ...@@ -362,7 +362,8 @@ static RegExpNode* Compile(const char* input, bool multiline) {
if (!v8::internal::ParseRegExp(&reader, multiline, &result)) if (!v8::internal::ParseRegExp(&reader, multiline, &result))
return NULL; return NULL;
RegExpNode* node = NULL; RegExpNode* node = NULL;
RegExpEngine::Compile(&result, &node, false, multiline); Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
RegExpEngine::Compile(&result, &node, false, multiline, pattern);
return node; return node;
} }
...@@ -520,16 +521,16 @@ TEST(MacroAssembler) { ...@@ -520,16 +521,16 @@ TEST(MacroAssembler) {
m.Fail(); m.Fail();
m.Bind(&start); m.Bind(&start);
m.PushBacktrack(&fail2); m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail); m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition(); m.PushCurrentPosition();
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition(); m.PopCurrentPosition();
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2); m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3); m.WriteCurrentPositionToRegister(3, 0);
m.Succeed(); m.Succeed();
m.Bind(&fail); m.Bind(&fail);
...@@ -542,7 +543,8 @@ TEST(MacroAssembler) { ...@@ -542,7 +543,8 @@ TEST(MacroAssembler) {
v8::HandleScope scope; v8::HandleScope scope;
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode()); Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5]; int captures[5];
Handle<String> f1 = Handle<String> f1 =
...@@ -576,7 +578,8 @@ TEST(MacroAssemblerIA32Success) { ...@@ -576,7 +578,8 @@ TEST(MacroAssemblerIA32Success) {
m.Succeed(); m.Succeed();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector(""));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117}; int captures[4] = {42, 37, 87, 117};
...@@ -614,15 +617,16 @@ TEST(MacroAssemblerIA32Simple) { ...@@ -614,15 +617,16 @@ TEST(MacroAssemblerIA32Simple) {
Vector<const uc16> foo(foo_chars, 3); Vector<const uc16> foo(foo_chars, 3);
Label fail; Label fail;
m.CheckCharacters(foo, 0, &fail); m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
m.Succeed(); m.Succeed();
m.Bind(&fail); m.Bind(&fail);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117}; int captures[4] = {42, 37, 87, 117};
...@@ -675,15 +679,16 @@ TEST(MacroAssemblerIA32SimpleUC16) { ...@@ -675,15 +679,16 @@ TEST(MacroAssemblerIA32SimpleUC16) {
Vector<const uc16> foo(foo_chars, 3); Vector<const uc16> foo(foo_chars, 3);
Label fail; Label fail;
m.CheckCharacters(foo, 0, &fail); m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
m.Succeed(); m.Succeed();
m.Bind(&fail); m.Bind(&fail);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117}; int captures[4] = {42, 37, 87, 117};
...@@ -735,9 +740,6 @@ TEST(MacroAssemblerIA32Backtrack) { ...@@ -735,9 +740,6 @@ TEST(MacroAssemblerIA32Backtrack) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0); RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label fail; Label fail;
Label backtrack; Label backtrack;
m.LoadCurrentCharacter(10, &fail); m.LoadCurrentCharacter(10, &fail);
...@@ -749,7 +751,8 @@ TEST(MacroAssemblerIA32Backtrack) { ...@@ -749,7 +751,8 @@ TEST(MacroAssemblerIA32Backtrack) {
m.Bind(&backtrack); m.Bind(&backtrack);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector(".........."));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo")); Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
...@@ -778,9 +781,9 @@ TEST(MacroAssemblerIA32BackReference) { ...@@ -778,9 +781,9 @@ TEST(MacroAssemblerIA32BackReference) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3); RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(2); m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
Label nomatch; Label nomatch;
m.CheckNotBackReference(0, &nomatch); m.CheckNotBackReference(0, &nomatch);
m.Fail(); m.Fail();
...@@ -788,12 +791,13 @@ TEST(MacroAssemblerIA32BackReference) { ...@@ -788,12 +791,13 @@ TEST(MacroAssemblerIA32BackReference) {
m.AdvanceCurrentPosition(2); m.AdvanceCurrentPosition(2);
Label missing_match; Label missing_match;
m.CheckNotBackReference(0, &missing_match); m.CheckNotBackReference(0, &missing_match);
m.WriteCurrentPositionToRegister(2); m.WriteCurrentPositionToRegister(2, 0);
m.Succeed(); m.Succeed();
m.Bind(&missing_match); m.Bind(&missing_match);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo")); Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
...@@ -826,9 +830,6 @@ TEST(MacroAssemblerIA32AtStart) { ...@@ -826,9 +830,6 @@ TEST(MacroAssemblerIA32AtStart) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0); RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label not_at_start, newline, fail; Label not_at_start, newline, fail;
m.CheckNotAtStart(&not_at_start); m.CheckNotAtStart(&not_at_start);
// Check that prevchar = '\n' and current = 'f'. // Check that prevchar = '\n' and current = 'f'.
...@@ -850,7 +851,8 @@ TEST(MacroAssemblerIA32AtStart) { ...@@ -850,7 +851,8 @@ TEST(MacroAssemblerIA32AtStart) {
m.CheckNotCharacter('b', &fail); m.CheckNotCharacter('b', &fail);
m.Succeed(); m.Succeed();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector("(^f|ob)"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar")); Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar"));
...@@ -893,10 +895,10 @@ TEST(MacroAssemblerIA32BackRefNoCase) { ...@@ -893,10 +895,10 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
Label fail, succ; Label fail, succ;
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.WriteCurrentPositionToRegister(2); m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(3); m.WriteCurrentPositionToRegister(3, 0);
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC". m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC". m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
Label expected_fail; Label expected_fail;
...@@ -910,10 +912,12 @@ TEST(MacroAssemblerIA32BackRefNoCase) { ...@@ -910,10 +912,12 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
m.Fail(); m.Fail();
m.Bind(&succ); m.Bind(&succ);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
m.Succeed(); m.Succeed();
Handle<Object> code_object = m.GetCode(); Handle<String> source =
Factory::NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Handle<String> input =
...@@ -955,13 +959,13 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -955,13 +959,13 @@ TEST(MacroAssemblerIA32Registers) {
enum registers { out1, out2, out3, out4, out5, sp, loop_cnt }; enum registers { out1, out2, out3, out4, out5, sp, loop_cnt };
Label fail; Label fail;
Label backtrack; Label backtrack;
m.WriteCurrentPositionToRegister(out1); // Output: [0] m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
m.PushRegister(out1); m.PushRegister(out1);
m.PushBacktrack(&backtrack); m.PushBacktrack(&backtrack);
m.WriteStackPointerToRegister(sp); m.WriteStackPointerToRegister(sp);
// Fill stack and registers // Fill stack and registers
m.AdvanceCurrentPosition(2); m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(out1); m.WriteCurrentPositionToRegister(out1, 0);
m.PushRegister(out1); m.PushRegister(out1);
m.PushBacktrack(&fail); m.PushBacktrack(&fail);
// Drop backtrack stack frames. // Drop backtrack stack frames.
...@@ -977,7 +981,7 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -977,7 +981,7 @@ TEST(MacroAssemblerIA32Registers) {
m.PopRegister(out1); m.PopRegister(out1);
m.ReadCurrentPositionFromRegister(out1); m.ReadCurrentPositionFromRegister(out1);
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(out2); // [0,3] m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
Label loop; Label loop;
m.SetRegister(loop_cnt, 0); // loop counter m.SetRegister(loop_cnt, 0); // loop counter
...@@ -985,7 +989,7 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -985,7 +989,7 @@ TEST(MacroAssemblerIA32Registers) {
m.AdvanceRegister(loop_cnt, 1); m.AdvanceRegister(loop_cnt, 1);
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.IfRegisterLT(loop_cnt, 3, &loop); m.IfRegisterLT(loop_cnt, 3, &loop);
m.WriteCurrentPositionToRegister(out3); // [0,3,6] m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
Label loop2; Label loop2;
m.SetRegister(loop_cnt, 2); // loop counter m.SetRegister(loop_cnt, 2); // loop counter
...@@ -993,24 +997,29 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -993,24 +997,29 @@ TEST(MacroAssemblerIA32Registers) {
m.AdvanceRegister(loop_cnt, -1); m.AdvanceRegister(loop_cnt, -1);
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.IfRegisterGE(loop_cnt, 0, &loop2); m.IfRegisterGE(loop_cnt, 0, &loop2);
m.WriteCurrentPositionToRegister(out4); // [0,3,6,9] m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
Label loop3; Label loop3;
Label exit_loop3; Label exit_loop3;
m.PushRegister(out4);
m.PushRegister(out4);
m.ReadCurrentPositionFromRegister(out3); m.ReadCurrentPositionFromRegister(out3);
m.Bind(&loop3); m.Bind(&loop3);
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.CheckCurrentPosition(out4, &exit_loop3); m.CheckGreedyLoop(&exit_loop3);
m.GoTo(&loop3); m.GoTo(&loop3);
m.Bind(&exit_loop3); m.Bind(&exit_loop3);
m.WriteCurrentPositionToRegister(out5); // [0,3,6,9,9] m.PopCurrentPosition();
m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9]
m.Succeed(); m.Succeed();
m.Bind(&fail); m.Bind(&fail);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source =
Factory::NewStringFromAscii(CStrVector("<loop test>"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
// String long enough for test (content doesn't matter). // String long enough for test (content doesn't matter).
...@@ -1291,5 +1300,5 @@ TEST(CharClassDifference) { ...@@ -1291,5 +1300,5 @@ TEST(CharClassDifference) {
TEST(Graph) { TEST(Graph) {
V8::Initialize(NULL); V8::Initialize(NULL);
Execute("\\b\\w", false, true); Execute("(?=[d#.])", false, true);
} }
...@@ -286,3 +286,23 @@ for (var i = 0; i < 128; i++) { ...@@ -286,3 +286,23 @@ for (var i = 0; i < 128; i++) {
} }
assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end"); assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");
// Check that we don't read past the end of the string.
assertFalse(/f/.test('b'));
assertFalse(/[abc]f/.test('x'));
assertFalse(/[abc]f/.test('xa'));
assertFalse(/[abc]</.test('x'));
assertFalse(/[abc]</.test('xa'));
assertFalse(/f/i.test('b'));
assertFalse(/[abc]f/i.test('x'));
assertFalse(/[abc]f/i.test('xa'));
assertFalse(/[abc]</i.test('x'));
assertFalse(/[abc]</i.test('xa'));
assertFalse(/f[abc]/.test('x'));
assertFalse(/f[abc]/.test('xa'));
assertFalse(/<[abc]/.test('x'));
assertFalse(/<[abc]/.test('xa'));
assertFalse(/f[abc]/i.test('x'));
assertFalse(/f[abc]/i.test('xa'));
assertFalse(/<[abc]/i.test('x'));
assertFalse(/<[abc]/i.test('xa'));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment