Commit 2b77e718 authored by erik.corry@gmail.com's avatar erik.corry@gmail.com

Add support for \b and ^ and $ in multiline mode, completing Irregexp

features.  Switch on Irregexp by default.
Review URL: http://codereview.chromium.org/18193

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1104 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent d6e33a94
......@@ -1382,7 +1382,7 @@ class RegExpCharacterClass: public RegExpTree {
// W : non-ASCII word character
// d : ASCII digit
// D : non-ASCII digit
// . : non-unicode newline
// . : non-unicode non-newline
// * : All characters
uc16 standard_type() { return set_.standard_set_type(); }
ZoneList<CharacterRange>* ranges() { return set_.ranges(); }
......
......@@ -72,8 +72,9 @@ V(LOOKUP_HI_MAP8, 36, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 39, 6) /* check_register_eq_pos index addr32 */ \
V(CHECK_NOT_AT_START, 40, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 41, 5) /* check_greedy addr32 */
V(CHECK_AT_START, 40, 5) /* check_at_start addr32 */ \
V(CHECK_NOT_AT_START, 41, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 42, 5) /* check_greedy addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
......
......@@ -199,12 +199,11 @@ DEFINE_bool(usage_computation, true, "compute variable usage counts")
DEFINE_bool(preemption, false,
"activate a 100ms timer that switches between V8 threads")
// irregexp
// Irregexp
DEFINE_bool(irregexp, false, "new regular expression code")
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
DEFINE_bool(irregexp_native, false, "use native code Irregexp implementation (IA32 only)")
DEFINE_bool(disable_jscre, false, "abort if JSCRE is used. Only useful with --irregexp")
DEFINE_bool(attempt_multiline_irregexp, false, "attempt to use Irregexp for multiline regexps")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_bool(testing_bool_flag, true, "testing_bool_flag")
......
......@@ -490,6 +490,13 @@ static bool RawMatch(const byte* code_base,
}
break;
}
BYTECODE(CHECK_AT_START)
if (current == 0) {
pc = code_base + Load32(pc + 1);
} else {
pc += BC_CHECK_AT_START_LENGTH;
}
break;
BYTECODE(CHECK_NOT_AT_START)
if (current == 0) {
pc += BC_CHECK_NOT_AT_START_LENGTH;
......
......@@ -1522,18 +1522,6 @@ bool Trace::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
}
void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler, Trace* trace) {
if (info()->at_end) {
Label succeed;
// LoadCurrentCharacter will go to the label if we are at the end of the
// input string.
assembler->LoadCurrentCharacter(0, &succeed);
assembler->GoTo(trace->backtrack());
assembler->Bind(&succeed);
}
}
bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
if (!trace->is_trivial()) {
return trace->Flush(compiler, this);
......@@ -1542,7 +1530,6 @@ bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler, Trace* trace) {
if (!label()->is_bound()) {
assembler->Bind(label());
}
EmitInfoChecks(assembler, trace);
assembler->ReadCurrentPositionFromRegister(current_position_register_);
assembler->ReadStackPointerFromRegister(stack_pointer_register_);
// Now that we have unwound the stack we find at the top of the stack the
......@@ -1562,11 +1549,9 @@ bool EndNode::Emit(RegExpCompiler* compiler, Trace* trace) {
}
switch (action_) {
case ACCEPT:
EmitInfoChecks(assembler, trace);
assembler->Succeed();
return true;
case BACKTRACK:
ASSERT(!info()->at_end);
assembler->GoTo(trace->backtrack());
return true;
case NEGATIVE_SUBMATCH_SUCCESS:
......@@ -1935,13 +1920,6 @@ RegExpNode::~RegExpNode() {
RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
Trace* trace) {
// TODO(erikcorry): Implement support.
if (info_.follows_word_interest ||
info_.follows_newline_interest ||
info_.follows_start_interest) {
return FAIL;
}
// If we are generating a greedy loop then don't stop and don't reuse code.
if (trace->stop_node() != NULL) {
return CONTINUE;
......@@ -1990,6 +1968,19 @@ int ActionNode::EatsAtLeast(int recursion_depth) {
}
int AssertionNode::EatsAtLeast(int recursion_depth) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
return on_success()->EatsAtLeast(recursion_depth + 1);
}
int BackReferenceNode::EatsAtLeast(int recursion_depth) {
if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
return on_success()->EatsAtLeast(recursion_depth + 1);
}
int TextNode::EatsAtLeast(int recursion_depth) {
int answer = Length();
if (answer >= 4) return answer;
......@@ -2257,7 +2248,7 @@ void QuickCheckDetails::Clear() {
void QuickCheckDetails::Advance(int by, bool ascii) {
ASSERT(by > 0);
ASSERT(by >= 0);
if (by >= characters_) {
Clear();
return;
......@@ -2342,6 +2333,148 @@ void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
}
// Check for [0-9A-Z_a-z].
static void EmitWordCheck(RegExpMacroAssembler* assembler,
Label* word,
Label* non_word,
bool fall_through_on_word) {
assembler->CheckCharacterGT('z', non_word);
assembler->CheckCharacterLT('0', non_word);
assembler->CheckCharacterGT('a' - 1, word);
assembler->CheckCharacterLT('9' + 1, word);
assembler->CheckCharacterLT('A', non_word);
assembler->CheckCharacterLT('Z' + 1, word);
if (fall_through_on_word) {
assembler->CheckNotCharacter('_', non_word);
} else {
assembler->CheckCharacter('_', word);
}
}
// Emit the code to check for a ^ in multiline mode (1-character lookbehind
// that matches newline or the start of input).
static bool EmitHat(RegExpCompiler* compiler,
RegExpNode* on_success,
Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
// We will be loading the previous character into the current character
// register.
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
Label ok;
if (new_trace.cp_offset() == 0) {
// The start of input counts as a newline in this context, so skip to
// ok if we are at the start.
assembler->CheckAtStart(&ok);
}
// We already checked that we are not at the start of input so it must be
// OK to load the previous character.
assembler->LoadCurrentCharacter(new_trace.cp_offset() -1,
new_trace.backtrack(),
false);
// Newline means \n, \r, 0x2028 or 0x2029.
if (!compiler->ascii()) {
assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
}
assembler->CheckCharacter('\n', &ok);
assembler->CheckNotCharacter('\r', new_trace.backtrack());
assembler->Bind(&ok);
return on_success->Emit(compiler, &new_trace);
}
// Emit the code to handle \b and \B (word-boundary or non-word-boundary).
static bool EmitBoundaryCheck(AssertionNode::AssertionNodeType type,
RegExpCompiler* compiler,
RegExpNode* on_success,
Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
Label before_non_word;
Label before_word;
if (trace->characters_preloaded() != 1) {
assembler->LoadCurrentCharacter(trace->cp_offset(), &before_non_word);
}
// Fall through on non-word.
EmitWordCheck(assembler, &before_word, &before_non_word, false);
// We will be loading the previous character into the current character
// register.
Trace new_trace(*trace);
new_trace.InvalidateCurrentCharacter();
Label ok;
Label* boundary;
Label* not_boundary;
if (type == AssertionNode::AT_BOUNDARY) {
boundary = &ok;
not_boundary = new_trace.backtrack();
} else {
not_boundary = &ok;
boundary = new_trace.backtrack();
}
// Next character is not a word character.
assembler->Bind(&before_non_word);
if (new_trace.cp_offset() == 0) {
// The start of input counts as a non-word character, so the question is
// decided if we are at the start.
assembler->CheckAtStart(not_boundary);
}
// We already checked that we are not at the start of input so it must be
// OK to load the previous character.
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
&ok, // Unused dummy label in this call.
false);
// Fall through on non-word.
EmitWordCheck(assembler, boundary, not_boundary, false);
assembler->GoTo(not_boundary);
// Next character is a word character.
assembler->Bind(&before_word);
if (new_trace.cp_offset() == 0) {
// The start of input counts as a non-word character, so the question is
// decided if we are at the start.
assembler->CheckAtStart(boundary);
}
// We already checked that we are not at the start of input so it must be
// OK to load the previous character.
assembler->LoadCurrentCharacter(new_trace.cp_offset() - 1,
&ok, // Unused dummy label in this call.
false);
bool fall_through_on_word = (type == AssertionNode::AT_NON_BOUNDARY);
EmitWordCheck(assembler, not_boundary, boundary, fall_through_on_word);
assembler->Bind(&ok);
return on_success->Emit(compiler, &new_trace);
}
bool AssertionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
switch (type_) {
case AT_END: {
Label ok;
assembler->LoadCurrentCharacter(trace->cp_offset(), &ok);
assembler->GoTo(trace->backtrack());
assembler->Bind(&ok);
break;
}
case AT_START:
assembler->CheckNotAtStart(trace->backtrack());
break;
case AFTER_NEWLINE:
return EmitHat(compiler, on_success(), trace);
case AT_NON_BOUNDARY:
case AT_BOUNDARY:
return EmitBoundaryCheck(type_, compiler, on_success(), trace);
}
return on_success()->Emit(compiler, trace);
}
// We call this repeatedly to generate code for each pass over the text node.
// The passes are in increasing order of difficulty because we hope one
// of the first passes will fail in which case we are saved the work of the
......@@ -2487,17 +2620,6 @@ bool TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
if (limit_result == DONE) return true;
ASSERT(limit_result == CONTINUE);
if (info()->follows_word_interest ||
info()->follows_newline_interest ||
info()->follows_start_interest) {
return false;
}
if (info()->at_end) {
compiler->macro_assembler()->GoTo(trace->backtrack());
return true;
}
if (compiler->ascii()) {
int dummy = 0;
TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
......@@ -2561,6 +2683,11 @@ bool TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
}
void Trace::InvalidateCurrentCharacter() {
characters_preloaded_ = 0;
}
void Trace::AdvanceCurrentPositionInTrace(int by, bool ascii) {
ASSERT(by > 0);
// We don't have an instruction for shifting the current character register
......@@ -2616,12 +2743,6 @@ int ChoiceNode::GreedyLoopTextLength(GuardedAlternative* alternative) {
if (recursion_depth++ > RegExpCompiler::kMaxRecursion) {
return kNodeIsTooComplexForGreedyLoops;
}
NodeInfo* info = node->info();
if (info->follows_word_interest ||
info->follows_newline_interest ||
info->follows_start_interest) {
return kNodeIsTooComplexForGreedyLoops;
}
int node_length = node->GreedyLoopTextLength();
if (node_length == kNodeIsTooComplexForGreedyLoops) {
return kNodeIsTooComplexForGreedyLoops;
......@@ -3096,20 +3217,6 @@ bool ActionNode::Emit(RegExpCompiler* compiler, Trace* trace) {
}
case POSITIVE_SUBMATCH_SUCCESS:
if (!trace->is_trivial()) return trace->Flush(compiler, this);
// TODO(erikcorry): Implement support.
if (info()->follows_word_interest ||
info()->follows_newline_interest ||
info()->follows_start_interest) {
return false;
}
if (info()->at_end) {
Label at_end;
// Load current character jumps to the label if we are beyond the string
// end.
assembler->LoadCurrentCharacter(0, &at_end);
assembler->GoTo(trace->backtrack());
assembler->Bind(&at_end);
}
assembler->ReadCurrentPositionFromRegister(
data_.u_submatch.current_position_register);
assembler->ReadStackPointerFromRegister(
......@@ -3136,19 +3243,11 @@ bool BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RecursionCheck rc(compiler);
ASSERT_EQ(start_reg_ + 1, end_reg_);
if (info()->at_end) {
// If we are constrained to match at the end of the input then succeed
// iff the back reference is empty.
assembler->CheckNotRegistersEqual(start_reg_,
end_reg_,
trace->backtrack());
if (compiler->ignore_case()) {
assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
trace->backtrack());
} else {
if (compiler->ignore_case()) {
assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
trace->backtrack());
} else {
assembler->CheckNotBackReference(start_reg_, trace->backtrack());
}
assembler->CheckNotBackReference(start_reg_, trace->backtrack());
}
return on_success()->Emit(compiler, trace);
}
......@@ -3389,6 +3488,33 @@ void DotPrinter::VisitEnd(EndNode* that) {
}
void DotPrinter::VisitAssertion(AssertionNode* that) {
stream()->Add(" n%p [", that);
switch (that->type()) {
case AssertionNode::AT_END:
stream()->Add("label=\"$\", shape=septagon");
break;
case AssertionNode::AT_START:
stream()->Add("label=\"^\", shape=septagon");
break;
case AssertionNode::AT_BOUNDARY:
stream()->Add("label=\"\\b\", shape=septagon");
break;
case AssertionNode::AT_NON_BOUNDARY:
stream()->Add("label=\"\\B\", shape=septagon");
break;
case AssertionNode::AFTER_NEWLINE:
stream()->Add("label=\"(?<=\\n)\", shape=septagon");
break;
}
stream()->Add("];\n");
PrintAttributes(that);
RegExpNode* successor = that->on_success();
stream()->Add(" n%p -> n%p;\n", that, successor);
Visit(successor);
}
void DotPrinter::VisitAction(ActionNode* that) {
stream()->Add(" n%p [", that);
switch (that->type_) {
......@@ -3749,22 +3875,49 @@ RegExpNode* RegExpAssertion::ToNode(RegExpCompiler* compiler,
NodeInfo info;
switch (type()) {
case START_OF_LINE:
info.follows_newline_interest = true;
break;
return AssertionNode::AfterNewline(on_success);
case START_OF_INPUT:
info.follows_start_interest = true;
break;
case BOUNDARY: case NON_BOUNDARY:
info.follows_word_interest = true;
break;
return AssertionNode::AtStart(on_success);
case BOUNDARY:
return AssertionNode::AtBoundary(on_success);
case NON_BOUNDARY:
return AssertionNode::AtNonBoundary(on_success);
case END_OF_INPUT:
info.at_end = true;
break;
case END_OF_LINE:
// This is wrong but has the effect of making the compiler abort.
info.at_end = true;
return AssertionNode::AtEnd(on_success);
case END_OF_LINE: {
// Compile $ in multiline regexps as an alternation with a positive
// lookahead in one side and an end-of-input on the other side.
// We need two registers for the lookahead.
int stack_pointer_register = compiler->AllocateRegister();
int position_register = compiler->AllocateRegister();
// The ChoiceNode to distinguish between a newline and end-of-input.
ChoiceNode* result = new ChoiceNode(2);
// Create a newline atom.
ZoneList<CharacterRange>* newline_ranges =
new ZoneList<CharacterRange>(3);
CharacterRange::AddClassEscape('n', newline_ranges);
RegExpCharacterClass* newline_atom = new RegExpCharacterClass('n');
TextNode* newline_matcher = new TextNode(
newline_atom,
ActionNode::PositiveSubmatchSuccess(stack_pointer_register,
position_register,
on_success));
// Create an end-of-input matcher.
RegExpNode* end_of_line = ActionNode::BeginSubmatch(
stack_pointer_register,
position_register,
newline_matcher);
// Add the two alternatives to the ChoiceNode.
GuardedAlternative eol_alternative(end_of_line);
result->AddAlternative(eol_alternative);
GuardedAlternative end_alternative(AssertionNode::AtEnd(on_success));
result->AddAlternative(end_alternative);
return result;
}
default:
UNREACHABLE();
}
return on_success->PropagateForward(&info);
return on_success;
}
......@@ -3911,6 +4064,13 @@ void CharacterRange::AddClassEscape(uc16 type,
case '*':
ranges->Add(CharacterRange::Everything());
break;
// This is the set of characters matched by the $ and ^ symbols
// in multiline mode.
case 'n':
AddClass(kLineTerminatorRanges,
kLineTerminatorRangeCount,
ranges);
break;
default:
UNREACHABLE();
}
......@@ -4096,62 +4256,6 @@ static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
}
RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
bool cloned = false;
ActionNode* action = EnsureSibling(this, &full_info, &cloned);
action->set_on_success(action->on_success()->PropagateForward(info));
return action;
}
RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
bool cloned = false;
ChoiceNode* choice = EnsureSibling(this, &full_info, &cloned);
if (cloned) {
ZoneList<GuardedAlternative>* old_alternatives = alternatives();
int count = old_alternatives->length();
choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
for (int i = 0; i < count; i++) {
GuardedAlternative alternative = old_alternatives->at(i);
alternative.set_node(alternative.node()->PropagateForward(info));
choice->alternatives()->Add(alternative);
}
}
return choice;
}
RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
}
RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
NodeInfo full_info(*this->info());
full_info.AddFromPreceding(info);
bool cloned = false;
BackReferenceNode* back_ref = EnsureSibling(this, &full_info, &cloned);
if (cloned) {
// TODO(erikcorry): A back reference has to have two successors (by default
// the same node). The first is used if the back reference matches a non-
// empty back reference, the second if it matches an empty one. This
// doesn't matter for at_end, which is the only one implemented right now,
// but it will matter for other pieces of info.
back_ref->set_on_success(back_ref->on_success()->PropagateForward(info));
}
return back_ref;
}
RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
}
// -------------------------------------------------------------------
// Splay tree
......@@ -4389,6 +4493,11 @@ void Analysis::VisitBackReference(BackReferenceNode* that) {
}
void Analysis::VisitAssertion(AssertionNode* that) {
EnsureAnalyzed(that->on_success());
}
// -------------------------------------------------------------------
// Dispatch table construction
......@@ -4441,6 +4550,12 @@ void DispatchTableConstructor::VisitBackReference(BackReferenceNode* that) {
}
void DispatchTableConstructor::VisitAssertion(AssertionNode* that) {
RegExpNode* target = that->on_success();
target->Accept(this);
}
static int CompareRangeByFrom(const CharacterRange* a,
const CharacterRange* b) {
......@@ -4527,10 +4642,6 @@ Handle<FixedArray> RegExpEngine::Compile(RegExpCompileData* data,
NodeInfo info = *node->info();
if (is_multiline && !FLAG_attempt_multiline_irregexp) {
return Handle<FixedArray>::null();
}
if (FLAG_irregexp_native) {
#ifdef ARM
// Unimplemented, fall-through to bytecode implementation.
......
......@@ -410,6 +410,7 @@ class DispatchTable : public ZoneObject {
VISIT(Action) \
VISIT(Choice) \
VISIT(BackReference) \
VISIT(Assertion) \
VISIT(Text)
......@@ -619,12 +620,6 @@ class RegExpNode: public ZoneObject {
// the deferred actions in the current trace and generating a goto.
static const int kMaxCopiesCodeGenerated = 10;
// Propagates the given interest information forward. When seeing
// \bfoo for instance, the \b is implemented by propagating forward
// to the 'foo' string that it should only succeed if its first
// character is a letter xor the previous character was a letter.
virtual RegExpNode* PropagateForward(NodeInfo* info) = 0;
NodeInfo* info() { return &info_; }
void AddSibling(RegExpNode* node) { siblings_.Add(node); }
......@@ -744,7 +739,6 @@ class ActionNode: public SeqRegExpNode {
int filled_in) {
return on_success()->GetQuickCheckDetails(details, compiler, filled_in);
}
virtual RegExpNode* PropagateForward(NodeInfo* info);
Type type() { return type_; }
// TODO(erikcorry): We should allow some action nodes in greedy loops.
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
......@@ -797,7 +791,6 @@ class TextNode: public SeqRegExpNode {
elms_->Add(TextElement::CharClass(that));
}
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual bool Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
......@@ -831,6 +824,47 @@ class TextNode: public SeqRegExpNode {
};
class AssertionNode: public SeqRegExpNode {
public:
enum AssertionNodeType {
AT_END,
AT_START,
AT_BOUNDARY,
AT_NON_BOUNDARY,
AFTER_NEWLINE
};
static AssertionNode* AtEnd(RegExpNode* on_success) {
return new AssertionNode(AT_END, on_success);
}
static AssertionNode* AtStart(RegExpNode* on_success) {
return new AssertionNode(AT_START, on_success);
}
static AssertionNode* AtBoundary(RegExpNode* on_success) {
return new AssertionNode(AT_BOUNDARY, on_success);
}
static AssertionNode* AtNonBoundary(RegExpNode* on_success) {
return new AssertionNode(AT_NON_BOUNDARY, on_success);
}
static AssertionNode* AfterNewline(RegExpNode* on_success) {
return new AssertionNode(AFTER_NEWLINE, on_success);
}
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int filled_in) {
return on_success()->GetQuickCheckDetails(details, compiler, filled_in);
}
virtual AssertionNode* Clone() { return new AssertionNode(*this); }
AssertionNodeType type() { return type_; }
private:
AssertionNode(AssertionNodeType t, RegExpNode* on_success)
: SeqRegExpNode(on_success), type_(t) { }
AssertionNodeType type_;
};
class BackReferenceNode: public SeqRegExpNode {
public:
BackReferenceNode(int start_reg,
......@@ -843,13 +877,12 @@ class BackReferenceNode: public SeqRegExpNode {
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int recursion_depth) { return 0; }
virtual int EatsAtLeast(int recursion_depth);
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int characters_filled_in) {
return;
}
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
private:
......@@ -871,12 +904,8 @@ class EndNode: public RegExpNode {
// Returning 0 from EatsAtLeast should ensure we never get here.
UNREACHABLE();
}
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual EndNode* Clone() { return new EndNode(*this); }
protected:
void EmitInfoChecks(RegExpMacroAssembler* macro, Trace* trace);
private:
Action action_;
};
......@@ -947,7 +976,6 @@ class ChoiceNode: public RegExpNode {
virtual void GetQuickCheckDetails(QuickCheckDetails* details,
RegExpCompiler* compiler,
int characters_filled_in);
virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
bool being_calculated() { return being_calculated_; }
......@@ -1133,8 +1161,7 @@ class Trace {
void set_quick_check_performed(QuickCheckDetails* d) {
quick_check_performed_ = *d;
}
void clear_quick_check_performed() {
}
void InvalidateCurrentCharacter();
void AdvanceCurrentPositionInTrace(int by, bool ascii);
private:
int FindAffectedRegisters(OutSet* affected_registers);
......
......@@ -174,6 +174,20 @@ void RegExpMacroAssemblerIA32::CheckCharacterGT(uc16 limit, Label* on_greater) {
}
void RegExpMacroAssemblerIA32::CheckAtStart(Label* on_at_start) {
Label ok;
// Did we start the match at the start of the string at all?
__ cmp(Operand(ebp, kAtStart), Immediate(0));
BranchOrBacktrack(equal, &ok);
// If we did, are we still at the start of the input?
__ mov(eax, Operand(ebp, kInputEndOffset));
__ add(eax, Operand(edi));
__ cmp(eax, Operand(ebp, kInputStartOffset));
BranchOrBacktrack(equal, on_at_start);
__ bind(&ok);
}
void RegExpMacroAssemblerIA32::CheckNotAtStart(Label* on_not_at_start) {
// Did we start the match at the start of the string at all?
__ cmp(Operand(ebp, kAtStart), Immediate(0));
......
......@@ -43,6 +43,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void AdvanceRegister(int reg, int by);
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckAtStart(Label* on_at_start);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
......
......@@ -256,6 +256,12 @@ void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) {
}
void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) {
Emit(BC_CHECK_AT_START);
EmitOrLink(on_at_start);
}
void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) {
Emit(BC_CHECK_NOT_AT_START);
EmitOrLink(on_not_at_start);
......
......@@ -81,6 +81,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckAtStart(Label* on_at_start);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
......
......@@ -210,6 +210,12 @@ void RegExpMacroAssemblerTracer::CheckCharacter(uint32_t c, Label* on_equal) {
}
void RegExpMacroAssemblerTracer::CheckAtStart(Label* on_at_start) {
PrintF(" CheckAtStart(label[%08x]);\n", on_at_start);
assembler_->CheckAtStart(on_at_start);
}
void RegExpMacroAssemblerTracer::CheckNotAtStart(Label* on_not_at_start) {
PrintF(" CheckNotAtStart(label[%08x]);\n", on_not_at_start);
assembler_->CheckNotAtStart(on_not_at_start);
......
......@@ -41,6 +41,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void Backtrack();
virtual void Bind(Label* label);
virtual void CheckAtStart(Label* on_at_start);
virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
virtual void CheckCharacter(uint32_t c, Label* on_equal);
virtual void CheckCharacterAfterAnd(uint32_t c,
......
......@@ -61,6 +61,7 @@ class RegExpMacroAssembler {
// stack by an earlier PushBacktrack(Label*).
virtual void Backtrack() = 0;
virtual void Bind(Label* label) = 0;
virtual void CheckAtStart(Label* on_at_start) = 0;
// Check the current character against a bitmap. The range of the current
// character must be from start to start + length_of_bitmap_in_bits.
virtual void CheckBitmap(
......
......@@ -572,6 +572,9 @@ function splitMatch(separator, subject, current_index, start_index) {
if (ovector == null) return null;
var nof_results = ovector.length >> 1;
var result = new $Array(nof_results + 1);
// Section 15.5.4.14 paragraph two says that we do not allow zero length
// matches at the end of the string.
if (ovector[0] === subject.length) return null;
result[0] = ovector[1];
result[1] = subject.slice(current_index, ovector[0]);
for (var i = 1; i < nof_results; i++) {
......
......@@ -240,11 +240,8 @@ ecma_3/RegExp/regress-119909: PASS || FAIL_OK
# 'minimum repeat count' is reached, the empty string must not match.
# In this case, we are similar but not identical to JSC. Hard to
# support the JS behavior with PCRE, so maybe emulate JSC?
#
# Note: We do not support toSource currently so we cannot run this
# test. We should make an isolated test case for the regexp issue.
ecma_3/RegExp/regress-209919: FAIL_OK
js1_5/extensions/regress-459606: FAIL_OK
ecma_3/RegExp/regress-209919: PASS || FAIL_OK
js1_5/extensions/regress-459606: PASS || FAIL_OK
# PCRE's match limit is reached. SpiderMonkey hangs on the first one,
......@@ -265,11 +262,6 @@ ecma_3/RegExp/regress-307456: PASS || FAIL_OK
js1_5/Regress/regress-230216-2: FAIL_OK
# According to ECMA-262, \b is a 'word' boundary, where words are only
# ASCII characters. PCRE supports non-ASCII word characters.
js1_5/Regress/regress-247179: FAIL_OK
# Regexp too long for PCRE.
js1_5/Regress/regress-280769: PASS || FAIL
js1_5/Regress/regress-280769-1: PASS || FAIL
......@@ -471,7 +463,7 @@ ecma_3/Unicode/uc-001: FAIL_OK
# A non-breaking space doesn't match \s in a regular expression. This behaviour
# matches JSC. All the VMs have different behaviours in which characters match
# \s so we do the same as JSC until they change.
ecma_3/Unicode/uc-002: FAIL_OK
ecma_3/Unicode/uc-002: PASS || FAIL_OK
# String.prototype.split on empty strings always returns an array
......@@ -521,10 +513,12 @@ js1_5/Regress/regress-336100: FAIL_OK
# Regular expression test failures due to PCRE. We match JSC (ie, perl)
# behavior and not the ECMA spec.
ecma_3/RegExp/15.10.2-1: FAIL_OK
ecma_3/RegExp/perlstress-001: FAIL_OK
ecma_3/RegExp/perlstress-001: PASS || FAIL_OK
ecma_3/RegExp/regress-334158: PASS || FAIL
# This test fails due to http://code.google.com/p/v8/issues/detail?id=187
# Failure to clear captures when a lookahead is unwound.
ecma_3/RegExp/15.10.2-1: PASS || FAIL_OK
# This test requires a failure if we try to compile a function with more
# than 65536 arguments. This seems to be a Mozilla restriction.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment