Commit ba09ec5e authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Irregexp:

* Facility for generating a node several ways.  This allows
  code to be generated for a node knowing where it is trying
  to match relative to the 'current position' and it allows
  code to be generated that knows where to backtrack to.  Both
  allow dramatic reductions in the amount of popping and pushing
  on the stack and the number of indirect jumps.
* Generate special backtracking for greedy quantifiers on
  constant-length atoms.  This allows .* to run in constant
  space relative to input string size.
* When we are checking a long sequence of characters or character
  classes in the input then we do them right to left and only the
  first (rightmost) needs to check for end-of-string.
* Record the pattern in the profile instead of just <CompiledRegExp>
* Nodes no longer contain an on_failure_ node.  This was only used
  for lookaheads and they are now handled with a choice node instead.
Review URL: http://codereview.chromium.org/12900

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@930 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent f306b978
...@@ -1216,8 +1216,7 @@ class RegExpTree: public ZoneObject { ...@@ -1216,8 +1216,7 @@ class RegExpTree: public ZoneObject {
virtual ~RegExpTree() { } virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0; virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success) = 0;
RegExpNode* on_failure) = 0;
virtual bool IsTextElement() { return false; } virtual bool IsTextElement() { return false; }
virtual void AppendToText(RegExpText* text); virtual void AppendToText(RegExpText* text);
SmartPointer<const char> ToString(); SmartPointer<const char> ToString();
...@@ -1235,8 +1234,7 @@ class RegExpDisjunction: public RegExpTree { ...@@ -1235,8 +1234,7 @@ class RegExpDisjunction: public RegExpTree {
: alternatives_(alternatives) { } : alternatives_(alternatives) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpDisjunction* AsDisjunction(); virtual RegExpDisjunction* AsDisjunction();
virtual bool IsDisjunction(); virtual bool IsDisjunction();
ZoneList<RegExpTree*>* alternatives() { return alternatives_; } ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
...@@ -1250,8 +1248,7 @@ class RegExpAlternative: public RegExpTree { ...@@ -1250,8 +1248,7 @@ class RegExpAlternative: public RegExpTree {
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { } explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpAlternative* AsAlternative(); virtual RegExpAlternative* AsAlternative();
virtual bool IsAlternative(); virtual bool IsAlternative();
ZoneList<RegExpTree*>* nodes() { return nodes_; } ZoneList<RegExpTree*>* nodes() { return nodes_; }
...@@ -1265,8 +1262,7 @@ class RegExpText: public RegExpTree { ...@@ -1265,8 +1262,7 @@ class RegExpText: public RegExpTree {
RegExpText() : elements_(2) { } RegExpText() : elements_(2) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpText* AsText(); virtual RegExpText* AsText();
virtual bool IsText(); virtual bool IsText();
virtual bool IsTextElement() { return true; } virtual bool IsTextElement() { return true; }
...@@ -1291,8 +1287,7 @@ class RegExpAssertion: public RegExpTree { ...@@ -1291,8 +1287,7 @@ class RegExpAssertion: public RegExpTree {
explicit RegExpAssertion(Type type) : type_(type) { } explicit RegExpAssertion(Type type) : type_(type) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpAssertion* AsAssertion(); virtual RegExpAssertion* AsAssertion();
virtual bool IsAssertion(); virtual bool IsAssertion();
Type type() { return type_; } Type type() { return type_; }
...@@ -1313,8 +1308,7 @@ class RegExpCharacterClass: public RegExpTree { ...@@ -1313,8 +1308,7 @@ class RegExpCharacterClass: public RegExpTree {
} }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpCharacterClass* AsCharacterClass(); virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass(); virtual bool IsCharacterClass();
virtual bool IsTextElement() { return true; } virtual bool IsTextElement() { return true; }
...@@ -1332,8 +1326,7 @@ class RegExpAtom: public RegExpTree { ...@@ -1332,8 +1326,7 @@ class RegExpAtom: public RegExpTree {
explicit RegExpAtom(Vector<const uc16> data) : data_(data) { } explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpAtom* AsAtom(); virtual RegExpAtom* AsAtom();
virtual bool IsAtom(); virtual bool IsAtom();
virtual bool IsTextElement() { return true; } virtual bool IsTextElement() { return true; }
...@@ -1353,15 +1346,13 @@ class RegExpQuantifier: public RegExpTree { ...@@ -1353,15 +1346,13 @@ class RegExpQuantifier: public RegExpTree {
body_(body) { } body_(body) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
static RegExpNode* ToNode(int min, static RegExpNode* ToNode(int min,
int max, int max,
bool is_greedy, bool is_greedy,
RegExpTree* body, RegExpTree* body,
RegExpCompiler* compiler, RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpQuantifier* AsQuantifier(); virtual RegExpQuantifier* AsQuantifier();
virtual bool IsQuantifier(); virtual bool IsQuantifier();
int min() { return min_; } int min() { return min_; }
...@@ -1391,13 +1382,11 @@ class RegExpCapture: public RegExpTree { ...@@ -1391,13 +1382,11 @@ class RegExpCapture: public RegExpTree {
: body_(body), index_(index), available_(CAPTURE_AVAILABLE) { } : body_(body), index_(index), available_(CAPTURE_AVAILABLE) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
static RegExpNode* ToNode(RegExpTree* body, static RegExpNode* ToNode(RegExpTree* body,
int index, int index,
RegExpCompiler* compiler, RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpCapture* AsCapture(); virtual RegExpCapture* AsCapture();
virtual bool IsCapture(); virtual bool IsCapture();
RegExpTree* body() { return body_; } RegExpTree* body() { return body_; }
...@@ -1422,8 +1411,7 @@ class RegExpLookahead: public RegExpTree { ...@@ -1422,8 +1411,7 @@ class RegExpLookahead: public RegExpTree {
is_positive_(is_positive) { } is_positive_(is_positive) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpLookahead* AsLookahead(); virtual RegExpLookahead* AsLookahead();
virtual bool IsLookahead(); virtual bool IsLookahead();
RegExpTree* body() { return body_; } RegExpTree* body() { return body_; }
...@@ -1440,8 +1428,7 @@ class RegExpBackReference: public RegExpTree { ...@@ -1440,8 +1428,7 @@ class RegExpBackReference: public RegExpTree {
: capture_(capture) { } : capture_(capture) { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpBackReference* AsBackReference(); virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference(); virtual bool IsBackReference();
int index() { return capture_->index(); } int index() { return capture_->index(); }
...@@ -1456,8 +1443,7 @@ class RegExpEmpty: public RegExpTree { ...@@ -1456,8 +1443,7 @@ class RegExpEmpty: public RegExpTree {
RegExpEmpty() { } RegExpEmpty() { }
virtual void* Accept(RegExpVisitor* visitor, void* data); virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler, virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success);
RegExpNode* on_failure);
virtual RegExpEmpty* AsEmpty(); virtual RegExpEmpty* AsEmpty();
virtual bool IsEmpty(); virtual bool IsEmpty();
static RegExpEmpty* GetInstance() { return &kInstance; } static RegExpEmpty* GetInstance() { return &kInstance; }
......
...@@ -50,22 +50,24 @@ V(SUCCEED, 14, 1) /* succeed */ \ ...@@ -50,22 +50,24 @@ V(SUCCEED, 14, 1) /* succeed */ \
V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \ V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \
V(GOTO, 16, 5) /* goto addr32 */ \ V(GOTO, 16, 5) /* goto addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \ V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
V(CHECK_CHAR, 18, 7) /* check_char uc16 addr32 */ \ V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32 */ \
V(CHECK_NOT_CHAR, 19, 7) /* check_not_char uc16 addr32 */ \ V(CHECK_CHAR, 19, 7) /* check_char uc16 addr32 */ \
V(OR_CHECK_NOT_CHAR, 20, 9) /* or_check_not_char uc16 uc16 addr32 */ \ V(CHECK_NOT_CHAR, 20, 7) /* check_not_char uc16 addr32 */ \
V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \ V(OR_CHECK_NOT_CHAR, 21, 9) /* or_check_not_char uc16 uc16 addr32 */ \
V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \ V(MINUS_OR_CHECK_NOT_CHAR, 22, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \ V(CHECK_LT, 23, 7) /* check_lt uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \ V(CHECK_GT, 24, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 25, 6) /* check_not_back_ref_no_case captu... */ \ V(CHECK_NOT_BACK_REF, 25, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_REGS_EQUAL, 26, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \ V(CHECK_NOT_BACK_REF_NO_CASE, 26, 6) /* check_not_back_ref_no_case captu... */ \
V(LOOKUP_MAP1, 27, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \ V(CHECK_NOT_REGS_EQUAL, 27, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP2, 28, 99) /* l_map2 start16 half_nibble_map_addr32* */ \ V(LOOKUP_MAP1, 28, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP8, 29, 99) /* l_map8 start16 byte_map addr32* */ \ V(LOOKUP_MAP2, 29, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_HI_MAP8, 30, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \ V(LOOKUP_MAP8, 30, 99) /* l_map8 start16 byte_map addr32* */ \
V(CHECK_REGISTER_LT, 31, 8) /* check_reg_lt register_index value16 addr32 */ \ V(LOOKUP_HI_MAP8, 31, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_GE, 32, 8) /* check_reg_ge register_index value16 addr32 */ \ V(CHECK_REGISTER_LT, 32, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 33, 5) /* check_not_at_start addr32 */ V(CHECK_REGISTER_GE, 33, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 34, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 35, 5) /* check_greedy addr32 */
#define DECLARE_BYTECODES(name, code, length) \ #define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code; static const int BC_##name = code;
......
...@@ -191,6 +191,15 @@ static bool RawMatch(const byte* code_base, ...@@ -191,6 +191,15 @@ static bool RawMatch(const byte* code_base,
BYTECODE(GOTO) BYTECODE(GOTO)
pc = code_base + Load32(pc + 1); pc = code_base + Load32(pc + 1);
break; break;
BYTECODE(CHECK_GREEDY)
if (current == backtrack_sp[-1]) {
backtrack_sp--;
backtrack_stack_space++;
pc = code_base + Load32(pc + 1);
} else {
pc += BC_CHECK_GREEDY_LENGTH;
}
break;
BYTECODE(LOAD_CURRENT_CHAR) { BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + Load32(pc + 1); int pos = current + Load32(pc + 1);
if (pos >= subject.length()) { if (pos >= subject.length()) {
...@@ -201,6 +210,12 @@ static bool RawMatch(const byte* code_base, ...@@ -201,6 +210,12 @@ static bool RawMatch(const byte* code_base,
} }
break; break;
} }
BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
int pos = current + Load32(pc + 1);
current_char = subject[pos];
pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
break;
}
BYTECODE(CHECK_CHAR) { BYTECODE(CHECK_CHAR) {
int c = Load16(pc + 1); int c = Load16(pc + 1);
if (c == current_char) { if (c == current_char) {
......
...@@ -253,11 +253,6 @@ static void DoForEach(Node* node, Callback* callback) { ...@@ -253,11 +253,6 @@ static void DoForEach(Node* node, Callback* callback) {
} }
void RegExpNode::Bind(RegExpMacroAssembler* macro) {
macro->Bind(&label_);
}
}} // namespace v8::internal }} // namespace v8::internal
......
This diff is collapsed.
This diff is collapsed.
...@@ -184,11 +184,14 @@ void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) { ...@@ -184,11 +184,14 @@ void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str, void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure) { Label* on_failure,
bool check_end_of_string) {
int byte_length = str.length() * char_size(); int byte_length = str.length() * char_size();
int byte_offset = cp_offset * char_size(); int byte_offset = cp_offset * char_size();
__ cmp(Operand(edi), Immediate(-(byte_offset + byte_length))); if (check_end_of_string) {
BranchOrBacktrack(greater, on_failure); __ cmp(Operand(edi), Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure);
}
if (str.length() <= kMaxInlineStringTests) { if (str.length() <= kMaxInlineStringTests) {
for (int i = 0; i < str.length(); i++) { for (int i = 0; i < str.length(); i++) {
...@@ -233,10 +236,13 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str, ...@@ -233,10 +236,13 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
} }
void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index, void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
Label* on_equal) { Label fallthrough;
__ cmp(edi, register_location(register_index)); __ cmp(edi, Operand(esp, 0));
BranchOrBacktrack(equal, on_equal); __ j(not_equal, &fallthrough);
__ add(Operand(esp), Immediate(4)); // Pop.
BranchOrBacktrack(no_condition, on_equal);
__ bind(&fallthrough);
} }
...@@ -482,7 +488,7 @@ void RegExpMacroAssemblerIA32::Fail() { ...@@ -482,7 +488,7 @@ void RegExpMacroAssemblerIA32::Fail() {
} }
Handle<Object> RegExpMacroAssemblerIA32::GetCode() { Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
// Finalize code - write the entry point code now we know how many // Finalize code - write the entry point code now we know how many
// registers we need. // registers we need.
...@@ -521,7 +527,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() { ...@@ -521,7 +527,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
Label at_start; Label at_start;
__ cmp(Operand(ebp, kAtStart), Immediate(0)); __ cmp(Operand(ebp, kAtStart), Immediate(0));
__ j(not_equal, &at_start); __ j(not_equal, &at_start);
LoadCurrentCharToRegister(-1); // Load previous char. LoadCurrentCharacterUnchecked(-1); // Load previous char.
__ jmp(&start_label_); __ jmp(&start_label_);
__ bind(&at_start); __ bind(&at_start);
__ mov(current_character(), '\n'); __ mov(current_character(), '\n');
...@@ -562,7 +568,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() { ...@@ -562,7 +568,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
NULL, NULL,
Code::ComputeFlags(Code::REGEXP), Code::ComputeFlags(Code::REGEXP),
self_); self_);
LOG(CodeCreateEvent("RegExp", *code, "(Compiled RegExp)")); LOG(CodeCreateEvent("RegExp", *code, *(source->ToCString())));
return Handle<Object>::cast(code); return Handle<Object>::cast(code);
} }
...@@ -600,7 +606,7 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset, ...@@ -600,7 +606,7 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works) ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
__ cmp(edi, -cp_offset * char_size()); __ cmp(edi, -cp_offset * char_size());
BranchOrBacktrack(greater_equal, on_end_of_input); BranchOrBacktrack(greater_equal, on_end_of_input);
LoadCurrentCharToRegister(cp_offset); LoadCurrentCharacterUnchecked(cp_offset);
} }
...@@ -651,10 +657,17 @@ void RegExpMacroAssemblerIA32::Succeed() { ...@@ -651,10 +657,17 @@ void RegExpMacroAssemblerIA32::Succeed() {
} }
void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg) { void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
__ mov(register_location(reg), edi); int cp_offset) {
if (cp_offset == 0) {
__ mov(register_location(reg), edi);
} else {
__ lea(eax, Operand(edi, cp_offset));
__ mov(register_location(reg), eax);
}
} }
void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) { void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
__ mov(register_location(reg), esp); __ mov(register_location(reg), esp);
} }
...@@ -770,7 +783,7 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() { ...@@ -770,7 +783,7 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
} }
void RegExpMacroAssemblerIA32::LoadCurrentCharToRegister(int cp_offset) { void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset) {
if (mode_ == ASCII) { if (mode_ == ASCII) {
__ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset)); __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
return; return;
......
...@@ -47,8 +47,9 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -47,8 +47,9 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void CheckCharacterLT(uc16 limit, Label* on_less); virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str, virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure); Label* on_failure,
virtual void CheckCurrentPosition(int register_index, Label* on_equal); bool check_end_of_string);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start); virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match); virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
...@@ -70,12 +71,14 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -70,12 +71,14 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
const Vector<Label*>& destinations); const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label); virtual void EmitOrLink(Label* label);
virtual void Fail(); virtual void Fail();
virtual Handle<Object> GetCode(); virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label); virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation(); virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input); virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void PopCurrentPosition(); virtual void PopCurrentPosition();
virtual void PopRegister(int register_index); virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label); virtual void PushBacktrack(Label* label);
...@@ -85,7 +88,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -85,7 +88,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg); virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to); virtual void SetRegister(int register_index, int to);
virtual void Succeed(); virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void WriteStackPointerToRegister(int reg); virtual void WriteStackPointerToRegister(int reg);
template <typename T> template <typename T>
...@@ -139,10 +142,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler { ...@@ -139,10 +142,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// is NULL, in which case it is a conditional Backtrack. // is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to); void BranchOrBacktrack(Condition condition, Label* to);
// Read a character from input at the given offset from the current
// position.
void LoadCurrentCharToRegister(int cp_offset);
// Load the address of a "constant buffer" (a slice of a byte array) // Load the address of a "constant buffer" (a slice of a byte array)
// into a register. The address is computed from the ByteArray* address // into a register. The address is computed from the ByteArray* address
// and an offset. Uses no extra registers. // and an offset. Uses no extra registers.
......
...@@ -68,6 +68,7 @@ void RegExpMacroAssemblerIrregexp::Bind(Label* l) { ...@@ -68,6 +68,7 @@ void RegExpMacroAssemblerIrregexp::Bind(Label* l) {
void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
if (l == NULL) l = &backtrack_;
if (l->is_bound()) { if (l->is_bound()) {
Emit32(l->pos()); Emit32(l->pos());
} else { } else {
...@@ -95,11 +96,11 @@ void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) { ...@@ -95,11 +96,11 @@ void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) {
void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
int register_index) { int register_index, int cp_offset) {
ASSERT(register_index >= 0); ASSERT(register_index >= 0);
Emit(BC_SET_REGISTER_TO_CP); Emit(BC_SET_REGISTER_TO_CP);
Emit(register_index); Emit(register_index);
Emit32(0); // Current position offset. Emit32(cp_offset); // Current position offset.
} }
...@@ -187,11 +188,10 @@ void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { ...@@ -187,11 +188,10 @@ void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
} }
void RegExpMacroAssemblerIrregexp::CheckCurrentPosition( void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
int register_index, Label* on_tos_equals_current_position) {
Label* on_equal) { Emit(BC_CHECK_GREEDY);
// TODO(erikcorry): Implement. EmitOrLink(on_tos_equals_current_position);
UNIMPLEMENTED();
} }
...@@ -203,6 +203,13 @@ void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, ...@@ -203,6 +203,13 @@ void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
} }
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacterUnchecked(
int cp_offset) {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
Label* on_less) { Label* on_less) {
Emit(BC_CHECK_LT); Emit(BC_CHECK_LT);
...@@ -263,7 +270,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr( ...@@ -263,7 +270,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr(
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
Label* on_not_equal) { Label* on_not_equal) {
Emit(BC_CHECK_NOT_BACK_REF); Emit(BC_CHECK_NOT_BACK_REF);
Emit(start_reg); Emit(start_reg);
EmitOrLink(on_not_equal); EmitOrLink(on_not_equal);
...@@ -323,11 +330,19 @@ void RegExpMacroAssemblerIrregexp::DispatchHighByteMap( ...@@ -323,11 +330,19 @@ void RegExpMacroAssemblerIrregexp::DispatchHighByteMap(
void RegExpMacroAssemblerIrregexp::CheckCharacters( void RegExpMacroAssemblerIrregexp::CheckCharacters(
Vector<const uc16> str, Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure) { Label* on_failure,
bool check_end_of_string) {
// It is vital that this loop is backwards due to the unchecked character
// load below.
for (int i = str.length() - 1; i >= 0; i--) { for (int i = str.length() - 1; i >= 0; i--) {
Emit(BC_LOAD_CURRENT_CHAR); if (check_end_of_string && i == str.length() - 1) {
Emit32(cp_offset + i); Emit(BC_LOAD_CURRENT_CHAR);
EmitOrLink(on_failure); Emit32(cp_offset + i);
EmitOrLink(on_failure);
} else {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset + i);
}
Emit(BC_CHECK_NOT_CHAR); Emit(BC_CHECK_NOT_CHAR);
Emit16(str[i]); Emit16(str[i]);
EmitOrLink(on_failure); EmitOrLink(on_failure);
...@@ -357,7 +372,9 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, ...@@ -357,7 +372,9 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
} }
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode() { Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT);
Handle<ByteArray> array = Factory::NewByteArray(length()); Handle<ByteArray> array = Factory::NewByteArray(length());
Copy(array->GetDataStartAddress()); Copy(array->GetDataStartAddress());
return array; return array;
......
...@@ -62,14 +62,16 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -62,14 +62,16 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void PushRegister(int register_index); virtual void PushRegister(int register_index);
virtual void AdvanceRegister(int reg, int by); // r[reg] += by. virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void SetRegister(int register_index, int to); virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ReadCurrentPositionFromRegister(int reg); virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg); virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg); virtual void ReadStackPointerFromRegister(int reg);
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input); virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void CheckCharacterLT(uc16 limit, Label* on_less); virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater); virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacter(uc16 c, Label* on_equal); virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start); virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal); virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal); virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
...@@ -82,8 +84,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -82,8 +84,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal); virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckCharacters(Vector<const uc16> str, virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure); Label* on_failure,
virtual void CheckCurrentPosition(int register_index, Label* on_equal); bool check_end_of_string);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero); virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void DispatchHalfNibbleMap(uc16 start, virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map, Label* half_nibble_map,
...@@ -98,7 +100,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -98,7 +100,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge); virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual IrregexpImplementation Implementation(); virtual IrregexpImplementation Implementation();
virtual Handle<Object> GetCode(); virtual Handle<Object> GetCode(Handle<String> source);
private: private:
void Expand(); void Expand();
// Code and bitmap emission. // Code and bitmap emission.
...@@ -109,14 +111,13 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { ...@@ -109,14 +111,13 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
int length(); int length();
void Copy(Address a); void Copy(Address a);
// The buffer into which code and relocation info are generated. // The buffer into which code and relocation info are generated.
Vector<byte> buffer_; Vector<byte> buffer_;
// The program counter. // The program counter.
int pc_; int pc_;
// True if the assembler owns the buffer, false if buffer is external. // True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_; bool own_buffer_;
Label backtrack_;
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpMacroAssemblerIrregexp); DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpMacroAssemblerIrregexp);
}; };
......
...@@ -64,6 +64,12 @@ void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) { ...@@ -64,6 +64,12 @@ void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) {
} }
void RegExpMacroAssemblerTracer::CheckGreedyLoop(Label* label) {
PrintF(" CheckGreedyLoop(label[%08x]);\n\n", label);
assembler_->CheckGreedyLoop(label);
}
void RegExpMacroAssemblerTracer::PopCurrentPosition() { void RegExpMacroAssemblerTracer::PopCurrentPosition() {
PrintF(" PopCurrentPosition();\n"); PrintF(" PopCurrentPosition();\n");
assembler_->PopCurrentPosition(); assembler_->PopCurrentPosition();
...@@ -130,9 +136,12 @@ void RegExpMacroAssemblerTracer::SetRegister(int register_index, int to) { ...@@ -130,9 +136,12 @@ void RegExpMacroAssemblerTracer::SetRegister(int register_index, int to) {
} }
void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg) { void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
PrintF(" WriteCurrentPositionToRegister(register=%d);\n", reg); int cp_offset) {
assembler_->WriteCurrentPositionToRegister(reg); PrintF(" WriteCurrentPositionToRegister(register=%d,cp_offset=%d);\n",
reg,
cp_offset);
assembler_->WriteCurrentPositionToRegister(reg, cp_offset);
} }
...@@ -156,12 +165,20 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) { ...@@ -156,12 +165,20 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) {
void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset, void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input) { Label* on_end_of_input) {
PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n", cp_offset, PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n",
cp_offset,
on_end_of_input); on_end_of_input);
assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input); assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input);
} }
void RegExpMacroAssemblerTracer::LoadCurrentCharacterUnchecked(int cp_offset) {
PrintF(" LoadCurrentCharacterUnchecked(cp_offset=%d);\n",
cp_offset);
assembler_->LoadCurrentCharacterUnchecked(cp_offset);
}
void RegExpMacroAssemblerTracer::CheckCharacterLT(uc16 limit, Label* on_less) { void RegExpMacroAssemblerTracer::CheckCharacterLT(uc16 limit, Label* on_less) {
PrintF(" CheckCharacterLT(c='u%04x', label[%08x]);\n", limit, on_less); PrintF(" CheckCharacterLT(c='u%04x', label[%08x]);\n", limit, on_less);
assembler_->CheckCharacterLT(limit, on_less); assembler_->CheckCharacterLT(limit, on_less);
...@@ -242,21 +259,15 @@ void RegExpMacroAssemblerTracer::CheckNotRegistersEqual(int reg1, ...@@ -242,21 +259,15 @@ void RegExpMacroAssemblerTracer::CheckNotRegistersEqual(int reg1,
void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str, void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure) { Label* on_failure,
PrintF(" CheckCharacters(str=\""); bool check_end_of_string) {
PrintF(" %s(str=\"",
check_end_of_string ? "CheckCharacters" : "CheckCharactersUnchecked");
for (int i = 0; i < str.length(); i++) { for (int i = 0; i < str.length(); i++) {
PrintF("u%04x", str[i]); PrintF("u%04x", str[i]);
} }
PrintF("\", cp_offset=%d, label[%08x])\n", cp_offset, on_failure); PrintF("\", cp_offset=%d, label[%08x])\n", cp_offset, on_failure);
assembler_->CheckCharacters(str, cp_offset, on_failure); assembler_->CheckCharacters(str, cp_offset, on_failure, check_end_of_string);
}
void RegExpMacroAssemblerTracer::CheckCurrentPosition(int register_index,
Label* on_equal) {
PrintF(" CheckCurrentPosition(register=%d, label[%08x]);\n", register_index,
on_equal);
assembler_->CheckCurrentPosition(register_index, on_equal);
} }
...@@ -334,9 +345,9 @@ RegExpMacroAssembler::IrregexpImplementation ...@@ -334,9 +345,9 @@ RegExpMacroAssembler::IrregexpImplementation
} }
Handle<Object> RegExpMacroAssemblerTracer::GetCode() { Handle<Object> RegExpMacroAssemblerTracer::GetCode(Handle<String> source) {
PrintF(" GetCode();\n"); PrintF(" GetCode(%s);\n", *(source->ToCString()));
return assembler_->GetCode(); return assembler_->GetCode(source);
} }
}} // namespace v8::internal }} // namespace v8::internal
...@@ -47,10 +47,9 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { ...@@ -47,10 +47,9 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void CheckCharacters( virtual void CheckCharacters(
Vector<const uc16> str, Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure); Label* on_failure,
virtual void CheckCurrentPosition( bool check_end_of_string);
int register_index, virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
Label* on_equal);
virtual void CheckNotAtStart(Label* on_not_at_start); virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match); virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
...@@ -77,12 +76,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { ...@@ -77,12 +76,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
const Vector<Label*>& destinations); const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label); virtual void EmitOrLink(Label* label);
virtual void Fail(); virtual void Fail();
virtual Handle<Object> GetCode(); virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label); virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge); virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt); virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation(); virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input); virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void PopCurrentPosition(); virtual void PopCurrentPosition();
virtual void PopRegister(int register_index); virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label); virtual void PushBacktrack(Label* label);
...@@ -92,7 +92,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler { ...@@ -92,7 +92,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg); virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to); virtual void SetRegister(int register_index, int to);
virtual void Succeed(); virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg); virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void WriteStackPointerToRegister(int reg); virtual void WriteStackPointerToRegister(int reg);
private: private:
RegExpMacroAssembler* assembler_; RegExpMacroAssembler* assembler_;
......
...@@ -62,19 +62,17 @@ class RegExpMacroAssembler { ...@@ -62,19 +62,17 @@ class RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0; virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0; virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
// Check the current character for a match with a literal string. If we // Check the current character for a match with a literal string. If we
// fail to match then goto the on_failure label. End of input always // fail to match then goto the on_failure label. If check_eos is set then
// matches. If the label is NULL then we should pop a backtrack address off // the end of input always fails. If check_eos is clear then it is the
// the stack abnd go to that. // caller's responsibility to ensure that the end of string is not hit.
// If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
virtual void CheckCharacters( virtual void CheckCharacters(
Vector<const uc16> str, Vector<const uc16> str,
int cp_offset, int cp_offset,
Label* on_failure) = 0; Label* on_failure,
// Check the current input position against a register. If the register is bool check_eos) = 0;
// equal to the current position then go to the label. If the label is NULL virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
// then backtrack instead.
virtual void CheckCurrentPosition(
int register_index,
Label* on_equal) = 0;
virtual void CheckNotAtStart(Label* on_not_at_start) = 0; virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0; virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg, virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
...@@ -115,7 +113,7 @@ class RegExpMacroAssembler { ...@@ -115,7 +113,7 @@ class RegExpMacroAssembler {
const Vector<Label*>& destinations) = 0; const Vector<Label*>& destinations) = 0;
virtual void EmitOrLink(Label* label) = 0; virtual void EmitOrLink(Label* label) = 0;
virtual void Fail() = 0; virtual void Fail() = 0;
virtual Handle<Object> GetCode() = 0; virtual Handle<Object> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0; virtual void GoTo(Label* label) = 0;
// Check whether a register is >= a given constant and go to a label if it // Check whether a register is >= a given constant and go to a label if it
// is. Backtracks instead if the label is NULL. // is. Backtracks instead if the label is NULL.
...@@ -125,6 +123,7 @@ class RegExpMacroAssembler { ...@@ -125,6 +123,7 @@ class RegExpMacroAssembler {
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0; virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
virtual IrregexpImplementation Implementation() = 0; virtual IrregexpImplementation Implementation() = 0;
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0; virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
virtual void LoadCurrentCharacterUnchecked(int cp_offset) = 0;
virtual void PopCurrentPosition() = 0; virtual void PopCurrentPosition() = 0;
virtual void PopRegister(int register_index) = 0; virtual void PopRegister(int register_index) = 0;
virtual void PushBacktrack(Label* label) = 0; virtual void PushBacktrack(Label* label) = 0;
...@@ -134,7 +133,7 @@ class RegExpMacroAssembler { ...@@ -134,7 +133,7 @@ class RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg) = 0; virtual void ReadStackPointerFromRegister(int reg) = 0;
virtual void SetRegister(int register_index, int to) = 0; virtual void SetRegister(int register_index, int to) = 0;
virtual void Succeed() = 0; virtual void Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg) = 0; virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0; virtual void WriteStackPointerToRegister(int reg) = 0;
private: private:
......
...@@ -362,7 +362,8 @@ static RegExpNode* Compile(const char* input, bool multiline) { ...@@ -362,7 +362,8 @@ static RegExpNode* Compile(const char* input, bool multiline) {
if (!v8::internal::ParseRegExp(&reader, multiline, &result)) if (!v8::internal::ParseRegExp(&reader, multiline, &result))
return NULL; return NULL;
RegExpNode* node = NULL; RegExpNode* node = NULL;
RegExpEngine::Compile(&result, &node, false, multiline); Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
RegExpEngine::Compile(&result, &node, false, multiline, pattern);
return node; return node;
} }
...@@ -520,16 +521,16 @@ TEST(MacroAssembler) { ...@@ -520,16 +521,16 @@ TEST(MacroAssembler) {
m.Fail(); m.Fail();
m.Bind(&start); m.Bind(&start);
m.PushBacktrack(&fail2); m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail); m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition(); m.PushCurrentPosition();
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition(); m.PopCurrentPosition();
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2); m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3); m.WriteCurrentPositionToRegister(3, 0);
m.Succeed(); m.Succeed();
m.Bind(&fail); m.Bind(&fail);
...@@ -542,7 +543,8 @@ TEST(MacroAssembler) { ...@@ -542,7 +543,8 @@ TEST(MacroAssembler) {
v8::HandleScope scope; v8::HandleScope scope;
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode()); Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5]; int captures[5];
Handle<String> f1 = Handle<String> f1 =
...@@ -576,7 +578,8 @@ TEST(MacroAssemblerIA32Success) { ...@@ -576,7 +578,8 @@ TEST(MacroAssemblerIA32Success) {
m.Succeed(); m.Succeed();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector(""));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117}; int captures[4] = {42, 37, 87, 117};
...@@ -614,15 +617,16 @@ TEST(MacroAssemblerIA32Simple) { ...@@ -614,15 +617,16 @@ TEST(MacroAssemblerIA32Simple) {
Vector<const uc16> foo(foo_chars, 3); Vector<const uc16> foo(foo_chars, 3);
Label fail; Label fail;
m.CheckCharacters(foo, 0, &fail); m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
m.Succeed(); m.Succeed();
m.Bind(&fail); m.Bind(&fail);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117}; int captures[4] = {42, 37, 87, 117};
...@@ -675,15 +679,16 @@ TEST(MacroAssemblerIA32SimpleUC16) { ...@@ -675,15 +679,16 @@ TEST(MacroAssemblerIA32SimpleUC16) {
Vector<const uc16> foo(foo_chars, 3); Vector<const uc16> foo(foo_chars, 3);
Label fail; Label fail;
m.CheckCharacters(foo, 0, &fail); m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
m.Succeed(); m.Succeed();
m.Bind(&fail); m.Bind(&fail);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117}; int captures[4] = {42, 37, 87, 117};
...@@ -735,9 +740,6 @@ TEST(MacroAssemblerIA32Backtrack) { ...@@ -735,9 +740,6 @@ TEST(MacroAssemblerIA32Backtrack) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0); RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label fail; Label fail;
Label backtrack; Label backtrack;
m.LoadCurrentCharacter(10, &fail); m.LoadCurrentCharacter(10, &fail);
...@@ -749,7 +751,8 @@ TEST(MacroAssemblerIA32Backtrack) { ...@@ -749,7 +751,8 @@ TEST(MacroAssemblerIA32Backtrack) {
m.Bind(&backtrack); m.Bind(&backtrack);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector(".........."));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo")); Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
...@@ -778,9 +781,9 @@ TEST(MacroAssemblerIA32BackReference) { ...@@ -778,9 +781,9 @@ TEST(MacroAssemblerIA32BackReference) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3); RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(2); m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
Label nomatch; Label nomatch;
m.CheckNotBackReference(0, &nomatch); m.CheckNotBackReference(0, &nomatch);
m.Fail(); m.Fail();
...@@ -788,12 +791,13 @@ TEST(MacroAssemblerIA32BackReference) { ...@@ -788,12 +791,13 @@ TEST(MacroAssemblerIA32BackReference) {
m.AdvanceCurrentPosition(2); m.AdvanceCurrentPosition(2);
Label missing_match; Label missing_match;
m.CheckNotBackReference(0, &missing_match); m.CheckNotBackReference(0, &missing_match);
m.WriteCurrentPositionToRegister(2); m.WriteCurrentPositionToRegister(2, 0);
m.Succeed(); m.Succeed();
m.Bind(&missing_match); m.Bind(&missing_match);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo")); Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
...@@ -826,9 +830,6 @@ TEST(MacroAssemblerIA32AtStart) { ...@@ -826,9 +830,6 @@ TEST(MacroAssemblerIA32AtStart) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0); RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label not_at_start, newline, fail; Label not_at_start, newline, fail;
m.CheckNotAtStart(&not_at_start); m.CheckNotAtStart(&not_at_start);
// Check that prevchar = '\n' and current = 'f'. // Check that prevchar = '\n' and current = 'f'.
...@@ -850,7 +851,8 @@ TEST(MacroAssemblerIA32AtStart) { ...@@ -850,7 +851,8 @@ TEST(MacroAssemblerIA32AtStart) {
m.CheckNotCharacter('b', &fail); m.CheckNotCharacter('b', &fail);
m.Succeed(); m.Succeed();
Handle<Object> code_object = m.GetCode(); Handle<String> source = Factory::NewStringFromAscii(CStrVector("(^f|ob)"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar")); Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar"));
...@@ -893,10 +895,10 @@ TEST(MacroAssemblerIA32BackRefNoCase) { ...@@ -893,10 +895,10 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
Label fail, succ; Label fail, succ;
m.WriteCurrentPositionToRegister(0); m.WriteCurrentPositionToRegister(0, 0);
m.WriteCurrentPositionToRegister(2); m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(3); m.WriteCurrentPositionToRegister(3, 0);
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC". m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC". m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
Label expected_fail; Label expected_fail;
...@@ -910,10 +912,12 @@ TEST(MacroAssemblerIA32BackRefNoCase) { ...@@ -910,10 +912,12 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
m.Fail(); m.Fail();
m.Bind(&succ); m.Bind(&succ);
m.WriteCurrentPositionToRegister(1); m.WriteCurrentPositionToRegister(1, 0);
m.Succeed(); m.Succeed();
Handle<Object> code_object = m.GetCode(); Handle<String> source =
Factory::NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Handle<String> input =
...@@ -955,13 +959,13 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -955,13 +959,13 @@ TEST(MacroAssemblerIA32Registers) {
enum registers { out1, out2, out3, out4, out5, sp, loop_cnt }; enum registers { out1, out2, out3, out4, out5, sp, loop_cnt };
Label fail; Label fail;
Label backtrack; Label backtrack;
m.WriteCurrentPositionToRegister(out1); // Output: [0] m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
m.PushRegister(out1); m.PushRegister(out1);
m.PushBacktrack(&backtrack); m.PushBacktrack(&backtrack);
m.WriteStackPointerToRegister(sp); m.WriteStackPointerToRegister(sp);
// Fill stack and registers // Fill stack and registers
m.AdvanceCurrentPosition(2); m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(out1); m.WriteCurrentPositionToRegister(out1, 0);
m.PushRegister(out1); m.PushRegister(out1);
m.PushBacktrack(&fail); m.PushBacktrack(&fail);
// Drop backtrack stack frames. // Drop backtrack stack frames.
...@@ -977,7 +981,7 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -977,7 +981,7 @@ TEST(MacroAssemblerIA32Registers) {
m.PopRegister(out1); m.PopRegister(out1);
m.ReadCurrentPositionFromRegister(out1); m.ReadCurrentPositionFromRegister(out1);
m.AdvanceCurrentPosition(3); m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(out2); // [0,3] m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
Label loop; Label loop;
m.SetRegister(loop_cnt, 0); // loop counter m.SetRegister(loop_cnt, 0); // loop counter
...@@ -985,7 +989,7 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -985,7 +989,7 @@ TEST(MacroAssemblerIA32Registers) {
m.AdvanceRegister(loop_cnt, 1); m.AdvanceRegister(loop_cnt, 1);
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.IfRegisterLT(loop_cnt, 3, &loop); m.IfRegisterLT(loop_cnt, 3, &loop);
m.WriteCurrentPositionToRegister(out3); // [0,3,6] m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
Label loop2; Label loop2;
m.SetRegister(loop_cnt, 2); // loop counter m.SetRegister(loop_cnt, 2); // loop counter
...@@ -993,24 +997,29 @@ TEST(MacroAssemblerIA32Registers) { ...@@ -993,24 +997,29 @@ TEST(MacroAssemblerIA32Registers) {
m.AdvanceRegister(loop_cnt, -1); m.AdvanceRegister(loop_cnt, -1);
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.IfRegisterGE(loop_cnt, 0, &loop2); m.IfRegisterGE(loop_cnt, 0, &loop2);
m.WriteCurrentPositionToRegister(out4); // [0,3,6,9] m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
Label loop3; Label loop3;
Label exit_loop3; Label exit_loop3;
m.PushRegister(out4);
m.PushRegister(out4);
m.ReadCurrentPositionFromRegister(out3); m.ReadCurrentPositionFromRegister(out3);
m.Bind(&loop3); m.Bind(&loop3);
m.AdvanceCurrentPosition(1); m.AdvanceCurrentPosition(1);
m.CheckCurrentPosition(out4, &exit_loop3); m.CheckGreedyLoop(&exit_loop3);
m.GoTo(&loop3); m.GoTo(&loop3);
m.Bind(&exit_loop3); m.Bind(&exit_loop3);
m.WriteCurrentPositionToRegister(out5); // [0,3,6,9,9] m.PopCurrentPosition();
m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9]
m.Succeed(); m.Succeed();
m.Bind(&fail); m.Bind(&fail);
m.Fail(); m.Fail();
Handle<Object> code_object = m.GetCode(); Handle<String> source =
Factory::NewStringFromAscii(CStrVector("<loop test>"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object); Handle<Code> code = Handle<Code>::cast(code_object);
// String long enough for test (content doesn't matter). // String long enough for test (content doesn't matter).
...@@ -1291,5 +1300,5 @@ TEST(CharClassDifference) { ...@@ -1291,5 +1300,5 @@ TEST(CharClassDifference) {
TEST(Graph) { TEST(Graph) {
V8::Initialize(NULL); V8::Initialize(NULL);
Execute("\\b\\w", false, true); Execute("(?=[d#.])", false, true);
} }
...@@ -286,3 +286,23 @@ for (var i = 0; i < 128; i++) { ...@@ -286,3 +286,23 @@ for (var i = 0; i < 128; i++) {
} }
assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end"); assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");
// Check that we don't read past the end of the string.
assertFalse(/f/.test('b'));
assertFalse(/[abc]f/.test('x'));
assertFalse(/[abc]f/.test('xa'));
assertFalse(/[abc]</.test('x'));
assertFalse(/[abc]</.test('xa'));
assertFalse(/f/i.test('b'));
assertFalse(/[abc]f/i.test('x'));
assertFalse(/[abc]f/i.test('xa'));
assertFalse(/[abc]</i.test('x'));
assertFalse(/[abc]</i.test('xa'));
assertFalse(/f[abc]/.test('x'));
assertFalse(/f[abc]/.test('xa'));
assertFalse(/<[abc]/.test('x'));
assertFalse(/<[abc]/.test('xa'));
assertFalse(/f[abc]/i.test('x'));
assertFalse(/f[abc]/i.test('xa'));
assertFalse(/<[abc]/i.test('x'));
assertFalse(/<[abc]/i.test('xa'));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment