Added check that bails out of a repetition when the body is empty.

git-svn-id: ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 66c2603a
......@@ -71,8 +71,9 @@ V(LOOKUP_MAP8, 35, 99) /* l_map8 start16 byte_map addr32* */ \
V(LOOKUP_HI_MAP8, 36, 99) /* l_himap8 start8 byte_map_addr32 addr32* */ \
V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16 addr32 */ \
V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16 addr32 */ \
V(CHECK_NOT_AT_START, 39, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 40, 5) /* check_greedy addr32 */
V(CHECK_REGISTER_EQ_POS, 39, 6) /* check_register_eq_pos index addr32 */ \
V(CHECK_NOT_AT_START, 40, 5) /* check_not_at_start addr32 */ \
V(CHECK_GREEDY, 41, 5) /* check_greedy addr32 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
......@@ -379,6 +379,13 @@ static bool RawMatch(const byte* code_base,
if (registers[pc[1]] == current) {
pc = code_base + Load32(pc + 2);
} else {
// Look up character in a bitmap. If we find a 0, then jump to the
// location at pc + 7. Otherwise fall through!
......@@ -1222,6 +1222,7 @@ class RegExpCompiler {
inline bool ignore_case() { return ignore_case_; }
inline bool ascii() { return ascii_; }
static const int kNoRegister = -1;
EndNode* accept_;
int next_register_;
......@@ -1313,8 +1314,26 @@ bool GenerationVariant::mentions_reg(int reg) {
bool GenerationVariant::GetStoredPosition(int reg, int* cp_offset) {
ASSERT_EQ(0, *cp_offset);
for (DeferredAction* action = actions_;
action != NULL;
action = action->next()) {
if (reg == action->reg()) {
if (action->type() == ActionNode::STORE_POSITION) {
*cp_offset = static_cast<DeferredCapture*>(action)->cp_offset();
return true;
} else {
return false;
return false;
int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) {
int max_register = -1;
int max_register = RegExpCompiler::kNoRegister;
for (DeferredAction* action = actions_;
action != NULL;
action = action->next()) {
......@@ -1576,6 +1595,18 @@ ActionNode* ActionNode::PositiveSubmatchSuccess(int stack_reg,
ActionNode* ActionNode::EmptyMatchCheck(int start_register,
int repetition_register,
int repetition_limit,
RegExpNode* on_success) {
ActionNode* result = new ActionNode(EMPTY_MATCH_CHECK, on_success);
result->data_.u_empty_match_check.start_register = start_register;
result->data_.u_empty_match_check.repetition_register = repetition_register;
result->data_.u_empty_match_check.repetition_limit = repetition_limit;
return result;
#define DEFINE_ACCEPT(Type) \
void Type##Node::Accept(NodeVisitor* visitor) { \
visitor->Visit##Type(this); \
......@@ -2967,6 +2998,37 @@ bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
return on_success()->Emit(compiler, variant);
int start_pos_reg = data_.u_empty_match_check.start_register;
int stored_pos = 0;
int rep_reg = data_.u_empty_match_check.repetition_register;
bool has_minimum = (rep_reg != RegExpCompiler::kNoRegister);
bool know_dist = variant->GetStoredPosition(start_pos_reg, &stored_pos);
if (know_dist && !has_minimum && stored_pos == variant->cp_offset()) {
// If we know we haven't advanced and there is no minimum we
// can just backtrack immediately.
return true;
} else if (know_dist && stored_pos < variant->cp_offset()) {
// If we know we've advanced we can generate the continuation
// immediately.
return on_success()->Emit(compiler, variant);
if (!variant->is_trivial()) return variant->Flush(compiler, this);
Label skip_empty_check;
// If we have a minimum number of repetitions we check the current
// number first and skip the empty check if it's not enough.
if (has_minimum) {
int limit = data_.u_empty_match_check.repetition_limit;
assembler->IfRegisterLT(rep_reg, limit, &skip_empty_check);
// If the match is empty we bail out, otherwise we fall through
// to the on-success continuation.
return on_success()->Emit(compiler, variant);
if (!variant->is_trivial()) return variant->Flush(compiler, this);
// TODO(erikcorry): Implement support.
......@@ -3286,6 +3348,12 @@ void DotPrinter::VisitAction(ActionNode* that) {
stream()->Add("label=\"escape\", shape=septagon");
case ActionNode::EMPTY_MATCH_CHECK:
stream()->Add("label=\"$%i=$pos?,$%i<%i?\", shape=septagon",
......@@ -3511,7 +3579,11 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
static const int kMaxUnrolledMinMatches = 3; // Unroll (foo)+ and (foo){3,}
static const int kMaxUnrolledMaxMatches = 3; // Unroll (foo)? and (foo){x,3}
if (max == 0) return on_success; // This can happen due to recursion.
if (body->min_match() > 0) {
bool body_can_be_empty = (body->min_match() == 0);
int body_start_reg = RegExpCompiler::kNoRegister;
if (body_can_be_empty) {
body_start_reg = compiler->AllocateRegister();
} else {
if (min > 0 && min <= kMaxUnrolledMinMatches) {
int new_max = (max == kInfinity) ? max : max - min;
// Recurse once to get the loop or optional matches after the fixed ones.
......@@ -3548,12 +3620,27 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
bool has_min = min > 0;
bool has_max = max < RegExpTree::kInfinity;
bool needs_counter = has_min || has_max;
int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
int reg_ctr = needs_counter
? compiler->AllocateRegister()
: RegExpCompiler::kNoRegister;
LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
RegExpNode* loop_return = needs_counter
? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
: static_cast<RegExpNode*>(center);
if (body_can_be_empty) {
// If the body can be empty we need to check if it was and then
// backtrack.
loop_return = ActionNode::EmptyMatchCheck(body_start_reg,
RegExpNode* body_node = body->ToNode(compiler, loop_return);
if (body_can_be_empty) {
// If the body can be empty we need to store the start position
// so we can bail out if it was empty.
body_node = ActionNode::StorePosition(body_start_reg, body_node);
GuardedAlternative body_alt(body_node);
if (has_max) {
Guard* body_guard = new Guard(reg_ctr, Guard::LT, max);
......@@ -682,7 +682,8 @@ class ActionNode: public SeqRegExpNode {
static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
......@@ -695,6 +696,11 @@ class ActionNode: public SeqRegExpNode {
int stack_pointer_reg,
int restore_reg,
RegExpNode* on_success);
static ActionNode* EmptyMatchCheck(
int start_register,
int repetition_register,
int repetition_limit,
RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
virtual int EatsAtLeast(int recursion_depth);
......@@ -725,6 +731,11 @@ class ActionNode: public SeqRegExpNode {
int stack_pointer_register;
int current_position_register;
} u_submatch;
struct {
int start_register;
int repetition_register;
int repetition_limit;
} u_empty_match_check;
} data_;
ActionNode(Type type, RegExpNode* on_success)
: SeqRegExpNode(on_success),
......@@ -1031,6 +1042,10 @@ class GenerationVariant {
int bound_checked_up_to() { return bound_checked_up_to_; }
QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
bool mentions_reg(int reg);
// Returns true if a deferred position store exists to the specified
// register and stores the offset in the out-parameter. Otherwise
// returns false.
bool GetStoredPosition(int reg, int* cp_offset);
// These set methods and AdvanceVariant should be used only on new
// GenerationVariants - the intention is that GenerationVariants are
// immutable after creation.
......@@ -744,6 +744,13 @@ void RegExpMacroAssemblerIA32::IfRegisterLT(int reg,
void RegExpMacroAssemblerIA32::IfRegisterEqPos(int reg,
Label* if_eq) {
__ cmp(edi, register_location(reg));
BranchOrBacktrack(equal, if_eq);
RegExpMacroAssemblerIA32::Implementation() {
return kIA32Implementation;
......@@ -86,6 +86,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
......@@ -401,6 +401,14 @@ void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index,
void RegExpMacroAssemblerIrregexp::IfRegisterEqPos(int register_index,
Label* on_eq) {
Handle<Object> RegExpMacroAssemblerIrregexp::GetCode(Handle<String> source) {
......@@ -106,6 +106,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
const Vector<Label*>& destinations);
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
virtual void IfRegisterEqPos(int register_index, Label* if_eq);
virtual IrregexpImplementation Implementation();
virtual Handle<Object> GetCode(Handle<String> source);
......@@ -373,6 +373,14 @@ void RegExpMacroAssemblerTracer::IfRegisterLT(int register_index,
void RegExpMacroAssemblerTracer::IfRegisterEqPos(int register_index,
Label* if_eq) {
PrintF(" IfRegisterEqPos(register=%d, label[%08x]);\n",
register_index, if_eq);
assembler_->IfRegisterEqPos(register_index, if_eq);
void RegExpMacroAssemblerTracer::IfRegisterGE(int register_index,
int comparand, Label* if_ge) {
PrintF(" IfRegisterGE(register=%d, number=%d, label[%08x]);\n",
......@@ -88,6 +88,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void GoTo(Label* label);
virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
virtual void IfRegisterEqPos(int reg, Label* if_eq);
virtual IrregexpImplementation Implementation();
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
......@@ -135,6 +135,9 @@ class RegExpMacroAssembler {
// Check whether a register is < a given constant and go to a label if it is.
// Backtracks instead if the label is NULL.
virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
// Check whether a register is == to the current position and go to a
// label if it is.
virtual void IfRegisterEqPos(int reg, Label* if_eq) = 0;
virtual IrregexpImplementation Implementation() = 0;
virtual void LoadCurrentCharacter(int cp_offset,
Label* on_end_of_input,
......@@ -1466,5 +1466,5 @@ TEST(CharClassDifference) {
TEST(Graph) {
Execute("\\b\\w+\\b", false, true, true);
Execute("(?:a|)*", false, true, true);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment