Commit ba09ec5e authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Irregexp:

* Facility for generating a node several ways.  This allows
  code to be generated for a node knowing where it is trying
  to match relative to the 'current position' and it allows
  code to be generated that knows where to backtrack to.  Both
  allow dramatic reductions in the amount of popping and pushing
  on the stack and the number of indirect jumps.
* Generate special backtracking for greedy quantifiers on
  constant-length atoms.  This allows .* to run in constant
  space relative to input string size.
* When we are checking a long sequence of characters or character
  classes in the input then we do them right to left and only the
  first (rightmost) needs to check for end-of-string.
* Record the pattern in the profile instead of just <CompiledRegExp>
* Nodes no longer contain an on_failure_ node.  This was only used
  for lookaheads and they are now handled with a choice node instead.
Review URL: http://codereview.chromium.org/12900

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@930 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent f306b978
......@@ -1216,8 +1216,7 @@ class RegExpTree: public ZoneObject {
virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure) = 0;
RegExpNode* on_success) = 0;
virtual bool IsTextElement() { return false; }
virtual void AppendToText(RegExpText* text);
SmartPointer<const char> ToString();
......@@ -1235,8 +1234,7 @@ class RegExpDisjunction: public RegExpTree {
: alternatives_(alternatives) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpDisjunction* AsDisjunction();
virtual bool IsDisjunction();
ZoneList<RegExpTree*>* alternatives() { return alternatives_; }
......@@ -1250,8 +1248,7 @@ class RegExpAlternative: public RegExpTree {
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpAlternative* AsAlternative();
virtual bool IsAlternative();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
......@@ -1265,8 +1262,7 @@ class RegExpText: public RegExpTree {
RegExpText() : elements_(2) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpText* AsText();
virtual bool IsText();
virtual bool IsTextElement() { return true; }
......@@ -1291,8 +1287,7 @@ class RegExpAssertion: public RegExpTree {
explicit RegExpAssertion(Type type) : type_(type) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpAssertion* AsAssertion();
virtual bool IsAssertion();
Type type() { return type_; }
......@@ -1313,8 +1308,7 @@ class RegExpCharacterClass: public RegExpTree {
}
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpCharacterClass* AsCharacterClass();
virtual bool IsCharacterClass();
virtual bool IsTextElement() { return true; }
......@@ -1332,8 +1326,7 @@ class RegExpAtom: public RegExpTree {
explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpAtom* AsAtom();
virtual bool IsAtom();
virtual bool IsTextElement() { return true; }
......@@ -1353,15 +1346,13 @@ class RegExpQuantifier: public RegExpTree {
body_(body) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
static RegExpNode* ToNode(int min,
int max,
bool is_greedy,
RegExpTree* body,
RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpQuantifier* AsQuantifier();
virtual bool IsQuantifier();
int min() { return min_; }
......@@ -1391,13 +1382,11 @@ class RegExpCapture: public RegExpTree {
: body_(body), index_(index), available_(CAPTURE_AVAILABLE) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
static RegExpNode* ToNode(RegExpTree* body,
int index,
RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpCapture* AsCapture();
virtual bool IsCapture();
RegExpTree* body() { return body_; }
......@@ -1422,8 +1411,7 @@ class RegExpLookahead: public RegExpTree {
is_positive_(is_positive) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpLookahead* AsLookahead();
virtual bool IsLookahead();
RegExpTree* body() { return body_; }
......@@ -1440,8 +1428,7 @@ class RegExpBackReference: public RegExpTree {
: capture_(capture) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpBackReference* AsBackReference();
virtual bool IsBackReference();
int index() { return capture_->index(); }
......@@ -1456,8 +1443,7 @@ class RegExpEmpty: public RegExpTree {
RegExpEmpty() { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success,
RegExpNode* on_failure);
RegExpNode* on_success);
virtual RegExpEmpty* AsEmpty();
virtual bool IsEmpty();
static RegExpEmpty* GetInstance() { return &kInstance; }
......
......@@ -50,22 +50,24 @@ V(SUCCEED, 14, 1) /* succeed */ \
V(ADVANCE_CP, 15, 5) /* advance_cp offset32 */ \
V(GOTO, 16, 5) /* goto addr32 */ \
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32 */ \
V(CHECK_CHAR, 18, 7) /* check_char uc16 addr32 */ \
V(CHECK_NOT_CHAR, 19, 7) /* check_not_char uc16 addr32 */ \
V(OR_CHECK_NOT_CHAR, 20, 9) /* or_check_not_char uc16 uc16 addr32 */ \
V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_LT, 22, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 23, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 25, 6) /* check_not_back_ref_no_case captu... */ \
V(CHECK_NOT_REGS_EQUAL, 26, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP1, 27, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 28, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 29, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 30, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 31, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 32, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 33, 5) /* check_not_at_start addr32 */
V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32 */ \
V(CHECK_CHAR, 19, 7) /* check_char uc16 addr32 */ \
V(CHECK_NOT_CHAR, 20, 7) /* check_not_char uc16 addr32 */ \
V(OR_CHECK_NOT_CHAR, 21, 9) /* or_check_not_char uc16 uc16 addr32 */ \
V(MINUS_OR_CHECK_NOT_CHAR, 22, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
V(CHECK_LT, 23, 7) /* check_lt uc16 addr32 */ \
V(CHECK_GT, 24, 7) /* check_gr uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 25, 6) /* check_not_back_ref capture_idx addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 26, 6) /* check_not_back_ref_no_case captu... */ \
V(CHECK_NOT_REGS_EQUAL, 27, 7) /* check_not_regs_equal reg1 reg2 addr32 */ \
V(LOOKUP_MAP1, 28, 11) /* l_map1 start16 bit_map_addr32 addr32 */ \
V(LOOKUP_MAP2, 29, 99) /* l_map2 start16 half_nibble_map_addr32* */ \
V(LOOKUP_MAP8, 30, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 31, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 32, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 33, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 34, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 35, 5) /* check_greedy addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
......
......@@ -191,6 +191,15 @@ static bool RawMatch(const byte* code_base,
BYTECODE(GOTO)
pc = code_base + Load32(pc + 1);
break;
BYTECODE(CHECK_GREEDY)
if (current == backtrack_sp[-1]) {
backtrack_sp--;
backtrack_stack_space++;
pc = code_base + Load32(pc + 1);
} else {
pc += BC_CHECK_GREEDY_LENGTH;
}
break;
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + Load32(pc + 1);
if (pos >= subject.length()) {
......@@ -201,6 +210,12 @@ static bool RawMatch(const byte* code_base,
}
break;
}
BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
int pos = current + Load32(pc + 1);
current_char = subject[pos];
pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
break;
}
BYTECODE(CHECK_CHAR) {
int c = Load16(pc + 1);
if (c == current_char) {
......
......@@ -253,11 +253,6 @@ static void DoForEach(Node* node, Callback* callback) {
}
void RegExpNode::Bind(RegExpMacroAssembler* macro) {
macro->Bind(&label_);
}
}} // namespace v8::internal
......
This diff is collapsed.
This diff is collapsed.
......@@ -184,11 +184,14 @@ void RegExpMacroAssemblerIA32::CheckCharacterLT(uc16 limit, Label* on_less) {
void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
Label* on_failure,
bool check_end_of_string) {
int byte_length = str.length() * char_size();
int byte_offset = cp_offset * char_size();
__ cmp(Operand(edi), Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure);
if (check_end_of_string) {
__ cmp(Operand(edi), Immediate(-(byte_offset + byte_length)));
BranchOrBacktrack(greater, on_failure);
}
if (str.length() <= kMaxInlineStringTests) {
for (int i = 0; i < str.length(); i++) {
......@@ -233,10 +236,13 @@ void RegExpMacroAssemblerIA32::CheckCharacters(Vector<const uc16> str,
}
void RegExpMacroAssemblerIA32::CheckCurrentPosition(int register_index,
Label* on_equal) {
__ cmp(edi, register_location(register_index));
BranchOrBacktrack(equal, on_equal);
void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
Label fallthrough;
__ cmp(edi, Operand(esp, 0));
__ j(not_equal, &fallthrough);
__ add(Operand(esp), Immediate(4)); // Pop.
BranchOrBacktrack(no_condition, on_equal);
__ bind(&fallthrough);
}
......@@ -482,7 +488,7 @@ void RegExpMacroAssemblerIA32::Fail() {
}
Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
// Finalize code - write the entry point code now we know how many
// registers we need.
......@@ -521,7 +527,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
Label at_start;
__ cmp(Operand(ebp, kAtStart), Immediate(0));
__ j(not_equal, &at_start);
LoadCurrentCharToRegister(-1); // Load previous char.
LoadCurrentCharacterUnchecked(-1); // Load previous char.
__ jmp(&start_label_);
__ bind(&at_start);
__ mov(current_character(), '\n');
......@@ -562,7 +568,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode() {
NULL,
Code::ComputeFlags(Code::REGEXP),
self_);
LOG(CodeCreateEvent("RegExp", *code, "(Compiled RegExp)"));
LOG(CodeCreateEvent("RegExp", *code, *(source->ToCString())));
return Handle<Object>::cast(code);
}
......@@ -600,7 +606,7 @@ void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
ASSERT(cp_offset < (1<<30)); // Be sane! (And ensure negation works)
__ cmp(edi, -cp_offset * char_size());
BranchOrBacktrack(greater_equal, on_end_of_input);
LoadCurrentCharToRegister(cp_offset);
LoadCurrentCharacterUnchecked(cp_offset);
}
......@@ -651,10 +657,17 @@ void RegExpMacroAssemblerIA32::Succeed() {
}
void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg) {
__ mov(register_location(reg), edi);
void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
int cp_offset) {
if (cp_offset == 0) {
__ mov(register_location(reg), edi);
} else {
__ lea(eax, Operand(edi, cp_offset));
__ mov(register_location(reg), eax);
}
}
void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
__ mov(register_location(reg), esp);
}
......@@ -770,7 +783,7 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
}
void RegExpMacroAssemblerIA32::LoadCurrentCharToRegister(int cp_offset) {
void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset) {
if (mode_ == ASCII) {
__ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
return;
......
......@@ -47,8 +47,9 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
Label* on_failure,
bool check_end_of_string);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
......@@ -70,12 +71,14 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail();
virtual Handle<Object> GetCode();
virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
......@@ -85,7 +88,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void WriteStackPointerToRegister(int reg);
template <typename T>
......@@ -139,10 +142,6 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
// is NULL, in which case it is a conditional Backtrack.
void BranchOrBacktrack(Condition condition, Label* to);
// Read a character from input at the given offset from the current
// position.
void LoadCurrentCharToRegister(int cp_offset);
// Load the address of a "constant buffer" (a slice of a byte array)
// into a register. The address is computed from the ByteArray* address
// and an offset. Uses no extra registers.
......
......@@ -68,6 +68,7 @@ void RegExpMacroAssemblerIrregexp::Bind(Label* l) {
void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) {
if (l == NULL) l = &backtrack_;
if (l->is_bound()) {
Emit32(l->pos());
} else {
......@@ -95,11 +96,11 @@ void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) {
void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
int register_index) {
int register_index, int cp_offset) {
ASSERT(register_index >= 0);
Emit(BC_SET_REGISTER_TO_CP);
Emit(register_index);
Emit32(0); // Current position offset.
Emit32(cp_offset); // Current position offset.
}
......@@ -187,11 +188,10 @@ void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) {
}
void RegExpMacroAssemblerIrregexp::CheckCurrentPosition(
int register_index,
Label* on_equal) {
// TODO(erikcorry): Implement.
UNIMPLEMENTED();
void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
Label* on_tos_equals_current_position) {
Emit(BC_CHECK_GREEDY);
EmitOrLink(on_tos_equals_current_position);
}
......@@ -203,6 +203,13 @@ void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
}
void RegExpMacroAssemblerIrregexp::LoadCurrentCharacterUnchecked(
int cp_offset) {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset);
}
void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit,
Label* on_less) {
Emit(BC_CHECK_LT);
......@@ -263,7 +270,7 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr(
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
Label* on_not_equal) {
Label* on_not_equal) {
Emit(BC_CHECK_NOT_BACK_REF);
Emit(start_reg);
EmitOrLink(on_not_equal);
......@@ -323,11 +330,19 @@ void RegExpMacroAssemblerIrregexp::DispatchHighByteMap(
void RegExpMacroAssemblerIrregexp::CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
Label* on_failure,
bool check_end_of_string) {
// It is vital that this loop is backwards due to the unchecked character
// load below.
for (int i = str.length() - 1; i >= 0; i--) {
Emit(BC_LOAD_CURRENT_CHAR);
Emit32(cp_offset + i);
EmitOrLink(on_failure);
if (check_end_of_string && i == str.length() - 1) {
Emit(BC_LOAD_CURRENT_CHAR);
Emit32(cp_offset + i);
EmitOrLink(on_failure);
} else {
Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
Emit32(cp_offset + i);
}
Emit(BC_CHECK_NOT_CHAR);
Emit16(str[i]);
EmitOrLink(on_failure);
......@@ -357,7 +372,9 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
}
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode() {
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String> source) {
Bind(&backtrack_);
Emit(BC_POP_BT);
Handle<ByteArray> array = Factory::NewByteArray(length());
Copy(array->GetDataStartAddress());
return array;
......
......@@ -62,14 +62,16 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void PushRegister(int register_index);
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
......@@ -82,8 +84,8 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
Label* on_failure,
bool check_end_of_string);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void DispatchHalfNibbleMap(uc16 start,
Label* half_nibble_map,
......@@ -98,7 +100,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual IrregexpImplementation Implementation();
virtual Handle<Object> GetCode();
virtual Handle<Object> GetCode(Handle<String> source);
private:
void Expand();
// Code and bitmap emission.
......@@ -109,14 +111,13 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
int length();
void Copy(Address a);
// The buffer into which code and relocation info are generated.
Vector<byte> buffer_;
// The program counter.
int pc_;
// True if the assembler owns the buffer, false if buffer is external.
bool own_buffer_;
Label backtrack_;
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpMacroAssemblerIrregexp);
};
......
......@@ -64,6 +64,12 @@ void RegExpMacroAssemblerTracer::AdvanceCurrentPosition(int by) {
}
void RegExpMacroAssemblerTracer::CheckGreedyLoop(Label* label) {
PrintF(" CheckGreedyLoop(label[%08x]);\n\n", label);
assembler_->CheckGreedyLoop(label);
}
void RegExpMacroAssemblerTracer::PopCurrentPosition() {
PrintF(" PopCurrentPosition();\n");
assembler_->PopCurrentPosition();
......@@ -130,9 +136,12 @@ void RegExpMacroAssemblerTracer::SetRegister(int register_index, int to) {
}
void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg) {
PrintF(" WriteCurrentPositionToRegister(register=%d);\n", reg);
assembler_->WriteCurrentPositionToRegister(reg);
void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
int cp_offset) {
PrintF(" WriteCurrentPositionToRegister(register=%d,cp_offset=%d);\n",
reg,
cp_offset);
assembler_->WriteCurrentPositionToRegister(reg, cp_offset);
}
......@@ -156,12 +165,20 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) {
void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input) {
PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n", cp_offset,
PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n",
cp_offset,
on_end_of_input);
assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input);
}
void RegExpMacroAssemblerTracer::LoadCurrentCharacterUnchecked(int cp_offset) {
PrintF(" LoadCurrentCharacterUnchecked(cp_offset=%d);\n",
cp_offset);
assembler_->LoadCurrentCharacterUnchecked(cp_offset);
}
void RegExpMacroAssemblerTracer::CheckCharacterLT(uc16 limit, Label* on_less) {
PrintF(" CheckCharacterLT(c='u%04x', label[%08x]);\n", limit, on_less);
assembler_->CheckCharacterLT(limit, on_less);
......@@ -242,21 +259,15 @@ void RegExpMacroAssemblerTracer::CheckNotRegistersEqual(int reg1,
void RegExpMacroAssemblerTracer::CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure) {
PrintF(" CheckCharacters(str=\"");
Label* on_failure,
bool check_end_of_string) {
PrintF(" %s(str=\"",
check_end_of_string ? "CheckCharacters" : "CheckCharactersUnchecked");
for (int i = 0; i < str.length(); i++) {
PrintF("u%04x", str[i]);
}
PrintF("\", cp_offset=%d, label[%08x])\n", cp_offset, on_failure);
assembler_->CheckCharacters(str, cp_offset, on_failure);
}
void RegExpMacroAssemblerTracer::CheckCurrentPosition(int register_index,
Label* on_equal) {
PrintF(" CheckCurrentPosition(register=%d, label[%08x]);\n", register_index,
on_equal);
assembler_->CheckCurrentPosition(register_index, on_equal);
assembler_->CheckCharacters(str, cp_offset, on_failure, check_end_of_string);
}
......@@ -334,9 +345,9 @@ RegExpMacroAssembler::IrregexpImplementation
}
Handle<Object> RegExpMacroAssemblerTracer::GetCode() {
PrintF(" GetCode();\n");
return assembler_->GetCode();
Handle<Object> RegExpMacroAssemblerTracer::GetCode(Handle<String> source) {
PrintF(" GetCode(%s);\n", *(source->ToCString()));
return assembler_->GetCode(source);
}
}} // namespace v8::internal
......@@ -47,10 +47,9 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure);
virtual void CheckCurrentPosition(
int register_index,
Label* on_equal);
Label* on_failure,
bool check_end_of_string);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
......@@ -77,12 +76,13 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
const Vector<Label*>& destinations);
virtual void EmitOrLink(Label* label);
virtual void Fail();
virtual Handle<Object> GetCode();
virtual Handle<Object> GetCode(Handle<String> source);
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
virtual void LoadCurrentCharacterUnchecked(int cp_offset);
virtual void PopCurrentPosition();
virtual void PopRegister(int register_index);
virtual void PushBacktrack(Label* label);
......@@ -92,7 +92,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void WriteStackPointerToRegister(int reg);
private:
RegExpMacroAssembler* assembler_;
......
......@@ -62,19 +62,17 @@ class RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
// Check the current character for a match with a literal string. If we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address off
// the stack abnd go to that.
// fail to match then goto the on_failure label. If check_eos is set then
// the end of input always fails. If check_eos is clear then it is the
// caller's responsibility to ensure that the end of string is not hit.
// If the label is NULL then we should pop a backtrack address off
// the stack and go to that.
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
Label* on_failure) = 0;
// Check the current input position against a register. If the register is
// equal to the current position then go to the label. If the label is NULL
// then backtrack instead.
virtual void CheckCurrentPosition(
int register_index,
Label* on_equal) = 0;
Label* on_failure,
bool check_eos) = 0;
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
......@@ -115,7 +113,7 @@ class RegExpMacroAssembler {
const Vector<Label*>& destinations) = 0;
virtual void EmitOrLink(Label* label) = 0;
virtual void Fail() = 0;
virtual Handle<Object> GetCode() = 0;
virtual Handle<Object> GetCode(Handle<String> source) = 0;
virtual void GoTo(Label* label) = 0;
// Check whether a register is >= a given constant and go to a label if it
// is. Backtracks instead if the label is NULL.
......@@ -125,6 +123,7 @@ class RegExpMacroAssembler {
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
virtual IrregexpImplementation Implementation() = 0;
virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
virtual void LoadCurrentCharacterUnchecked(int cp_offset) = 0;
virtual void PopCurrentPosition() = 0;
virtual void PopRegister(int register_index) = 0;
virtual void PushBacktrack(Label* label) = 0;
......@@ -134,7 +133,7 @@ class RegExpMacroAssembler {
virtual void ReadStackPointerFromRegister(int reg) = 0;
virtual void SetRegister(int register_index, int to) = 0;
virtual void Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg) = 0;
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
private:
......
......@@ -362,7 +362,8 @@ static RegExpNode* Compile(const char* input, bool multiline) {
if (!v8::internal::ParseRegExp(&reader, multiline, &result))
return NULL;
RegExpNode* node = NULL;
RegExpEngine::Compile(&result, &node, false, multiline);
Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
RegExpEngine::Compile(&result, &node, false, multiline, pattern);
return node;
}
......@@ -520,16 +521,16 @@ TEST(MacroAssembler) {
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail2);
m.CheckCharacters(foo, 0, &fail);
m.WriteCurrentPositionToRegister(0);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.PushCurrentPosition();
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
m.PopCurrentPosition();
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(2);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(1);
m.WriteCurrentPositionToRegister(3);
m.WriteCurrentPositionToRegister(3, 0);
m.Succeed();
m.Bind(&fail);
......@@ -542,7 +543,8 @@ TEST(MacroAssembler) {
v8::HandleScope scope;
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode());
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^f(o)o"));
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5];
Handle<String> f1 =
......@@ -576,7 +578,8 @@ TEST(MacroAssemblerIA32Success) {
m.Succeed();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector(""));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117};
......@@ -614,15 +617,16 @@ TEST(MacroAssemblerIA32Simple) {
Vector<const uc16> foo(foo_chars, 3);
Label fail;
m.CheckCharacters(foo, 0, &fail);
m.WriteCurrentPositionToRegister(0);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
m.Succeed();
m.Bind(&fail);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117};
......@@ -675,15 +679,16 @@ TEST(MacroAssemblerIA32SimpleUC16) {
Vector<const uc16> foo(foo_chars, 3);
Label fail;
m.CheckCharacters(foo, 0, &fail);
m.WriteCurrentPositionToRegister(0);
m.CheckCharacters(foo, 0, &fail, true);
m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
m.Succeed();
m.Bind(&fail);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^foo"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
int captures[4] = {42, 37, 87, 117};
......@@ -735,9 +740,6 @@ TEST(MacroAssemblerIA32Backtrack) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label fail;
Label backtrack;
m.LoadCurrentCharacter(10, &fail);
......@@ -749,7 +751,8 @@ TEST(MacroAssemblerIA32Backtrack) {
m.Bind(&backtrack);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector(".........."));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("foofoo"));
......@@ -778,9 +781,9 @@ TEST(MacroAssemblerIA32BackReference) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 3);
m.WriteCurrentPositionToRegister(0);
m.WriteCurrentPositionToRegister(0, 0);
m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
Label nomatch;
m.CheckNotBackReference(0, &nomatch);
m.Fail();
......@@ -788,12 +791,13 @@ TEST(MacroAssemblerIA32BackReference) {
m.AdvanceCurrentPosition(2);
Label missing_match;
m.CheckNotBackReference(0, &missing_match);
m.WriteCurrentPositionToRegister(2);
m.WriteCurrentPositionToRegister(2, 0);
m.Succeed();
m.Bind(&missing_match);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector("^(..)..\1"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("fooofo"));
......@@ -826,9 +830,6 @@ TEST(MacroAssemblerIA32AtStart) {
RegExpMacroAssemblerIA32 m(RegExpMacroAssemblerIA32::ASCII, 0);
uc16 foo_chars[3] = {'f', 'o', 'o'};
Vector<const uc16> foo(foo_chars, 3);
Label not_at_start, newline, fail;
m.CheckNotAtStart(&not_at_start);
// Check that prevchar = '\n' and current = 'f'.
......@@ -850,7 +851,8 @@ TEST(MacroAssemblerIA32AtStart) {
m.CheckNotCharacter('b', &fail);
m.Succeed();
Handle<Object> code_object = m.GetCode();
Handle<String> source = Factory::NewStringFromAscii(CStrVector("(^f|ob)"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input = Factory::NewStringFromAscii(CStrVector("foobar"));
......@@ -893,10 +895,10 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
Label fail, succ;
m.WriteCurrentPositionToRegister(0);
m.WriteCurrentPositionToRegister(2);
m.WriteCurrentPositionToRegister(0, 0);
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(3);
m.WriteCurrentPositionToRegister(3, 0);
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
Label expected_fail;
......@@ -910,10 +912,12 @@ TEST(MacroAssemblerIA32BackRefNoCase) {
m.Fail();
m.Bind(&succ);
m.WriteCurrentPositionToRegister(1);
m.WriteCurrentPositionToRegister(1, 0);
m.Succeed();
Handle<Object> code_object = m.GetCode();
Handle<String> source =
Factory::NewStringFromAscii(CStrVector("^(abc)\1\1(?!\1)...(?!\1)"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
Handle<String> input =
......@@ -955,13 +959,13 @@ TEST(MacroAssemblerIA32Registers) {
enum registers { out1, out2, out3, out4, out5, sp, loop_cnt };
Label fail;
Label backtrack;
m.WriteCurrentPositionToRegister(out1); // Output: [0]
m.WriteCurrentPositionToRegister(out1, 0); // Output: [0]
m.PushRegister(out1);
m.PushBacktrack(&backtrack);
m.WriteStackPointerToRegister(sp);
// Fill stack and registers
m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(out1);
m.WriteCurrentPositionToRegister(out1, 0);
m.PushRegister(out1);
m.PushBacktrack(&fail);
// Drop backtrack stack frames.
......@@ -977,7 +981,7 @@ TEST(MacroAssemblerIA32Registers) {
m.PopRegister(out1);
m.ReadCurrentPositionFromRegister(out1);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(out2); // [0,3]
m.WriteCurrentPositionToRegister(out2, 0); // [0,3]
Label loop;
m.SetRegister(loop_cnt, 0); // loop counter
......@@ -985,7 +989,7 @@ TEST(MacroAssemblerIA32Registers) {
m.AdvanceRegister(loop_cnt, 1);
m.AdvanceCurrentPosition(1);
m.IfRegisterLT(loop_cnt, 3, &loop);
m.WriteCurrentPositionToRegister(out3); // [0,3,6]
m.WriteCurrentPositionToRegister(out3, 0); // [0,3,6]
Label loop2;
m.SetRegister(loop_cnt, 2); // loop counter
......@@ -993,24 +997,29 @@ TEST(MacroAssemblerIA32Registers) {
m.AdvanceRegister(loop_cnt, -1);
m.AdvanceCurrentPosition(1);
m.IfRegisterGE(loop_cnt, 0, &loop2);
m.WriteCurrentPositionToRegister(out4); // [0,3,6,9]
m.WriteCurrentPositionToRegister(out4, 0); // [0,3,6,9]
Label loop3;
Label exit_loop3;
m.PushRegister(out4);
m.PushRegister(out4);
m.ReadCurrentPositionFromRegister(out3);
m.Bind(&loop3);
m.AdvanceCurrentPosition(1);
m.CheckCurrentPosition(out4, &exit_loop3);
m.CheckGreedyLoop(&exit_loop3);
m.GoTo(&loop3);
m.Bind(&exit_loop3);
m.WriteCurrentPositionToRegister(out5); // [0,3,6,9,9]
m.PopCurrentPosition();
m.WriteCurrentPositionToRegister(out5, 0); // [0,3,6,9,9]
m.Succeed();
m.Bind(&fail);
m.Fail();
Handle<Object> code_object = m.GetCode();
Handle<String> source =
Factory::NewStringFromAscii(CStrVector("<loop test>"));
Handle<Object> code_object = m.GetCode(source);
Handle<Code> code = Handle<Code>::cast(code_object);
// String long enough for test (content doesn't matter).
......@@ -1291,5 +1300,5 @@ TEST(CharClassDifference) {
TEST(Graph) {
V8::Initialize(NULL);
Execute("\\b\\w", false, true);
Execute("(?=[d#.])", false, true);
}
......@@ -286,3 +286,23 @@ for (var i = 0; i < 128; i++) {
}
assertFalse(/f(o)$\1/.test('foo'), "backref detects at_end");
// Check that we don't read past the end of the string.
assertFalse(/f/.test('b'));
assertFalse(/[abc]f/.test('x'));
assertFalse(/[abc]f/.test('xa'));
assertFalse(/[abc]</.test('x'));
assertFalse(/[abc]</.test('xa'));
assertFalse(/f/i.test('b'));
assertFalse(/[abc]f/i.test('x'));
assertFalse(/[abc]f/i.test('xa'));
assertFalse(/[abc]</i.test('x'));
assertFalse(/[abc]</i.test('xa'));
assertFalse(/f[abc]/.test('x'));
assertFalse(/f[abc]/.test('xa'));
assertFalse(/<[abc]/.test('x'));
assertFalse(/<[abc]/.test('xa'));
assertFalse(/f[abc]/i.test('x'));
assertFalse(/f[abc]/i.test('xa'));
assertFalse(/<[abc]/i.test('x'));
assertFalse(/<[abc]/i.test('xa'));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment