Commit 2de5de49 authored by lrn@chromium.org's avatar lrn@chromium.org

Irregexp: Backtrack past look-aheads works correctly.

Allows backtracking to clear registers instead of pushing and popping
them to restore state.
Redo of 1135 with bug fixed.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1156 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent b7ca939e
......@@ -1521,9 +1521,15 @@ class RegExpCapture: public RegExpTree {
class RegExpLookahead: public RegExpTree {
public:
RegExpLookahead(RegExpTree* body, bool is_positive)
RegExpLookahead(RegExpTree* body,
bool is_positive,
int capture_count,
int capture_from)
: body_(body),
is_positive_(is_positive) { }
is_positive_(is_positive),
capture_count_(capture_count),
capture_from_(capture_from) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
virtual RegExpNode* ToNode(RegExpCompiler* compiler,
RegExpNode* on_success);
......@@ -1535,9 +1541,13 @@ class RegExpLookahead: public RegExpTree {
virtual int max_match() { return 0; }
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
int capture_count() { return capture_count_; }
int capture_from() { return capture_from_; }
private:
RegExpTree* body_;
bool is_positive_;
int capture_count_;
int capture_from_;
};
......
This diff is collapsed.
......@@ -719,13 +719,17 @@ class ActionNode: public SeqRegExpNode {
};
static ActionNode* SetRegister(int reg, int val, RegExpNode* on_success);
static ActionNode* IncrementRegister(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg, RegExpNode* on_success);
static ActionNode* StorePosition(int reg,
bool is_capture,
RegExpNode* on_success);
static ActionNode* ClearCaptures(Interval range, RegExpNode* on_success);
static ActionNode* BeginSubmatch(int stack_pointer_reg,
int position_reg,
RegExpNode* on_success);
static ActionNode* PositiveSubmatchSuccess(int stack_pointer_reg,
int restore_reg,
int clear_capture_count,
int clear_capture_from,
RegExpNode* on_success);
static ActionNode* EmptyMatchCheck(int start_register,
int repetition_register,
......@@ -755,10 +759,13 @@ class ActionNode: public SeqRegExpNode {
} u_increment_register;
struct {
int reg;
bool is_capture;
} u_position_register;
struct {
int stack_pointer_register;
int current_position_register;
int clear_register_count;
int clear_register_from;
} u_submatch;
struct {
int start_register;
......@@ -913,15 +920,22 @@ class EndNode: public RegExpNode {
class NegativeSubmatchSuccess: public EndNode {
public:
NegativeSubmatchSuccess(int stack_pointer_reg, int position_reg)
NegativeSubmatchSuccess(int stack_pointer_reg,
int position_reg,
int clear_capture_count,
int clear_capture_start)
: EndNode(NEGATIVE_SUBMATCH_SUCCESS),
stack_pointer_register_(stack_pointer_reg),
current_position_register_(position_reg) { }
current_position_register_(position_reg),
clear_capture_count_(clear_capture_count),
clear_capture_start_(clear_capture_start) { }
virtual bool Emit(RegExpCompiler* compiler, Trace* trace);
private:
int stack_pointer_register_;
int current_position_register_;
int clear_capture_count_;
int clear_capture_start_;
};
......@@ -1087,18 +1101,20 @@ class Trace {
friend class Trace;
};
class DeferredCapture: public DeferredAction {
class DeferredCapture : public DeferredAction {
public:
DeferredCapture(int reg, Trace* trace)
DeferredCapture(int reg, bool is_capture, Trace* trace)
: DeferredAction(ActionNode::STORE_POSITION, reg),
cp_offset_(trace->cp_offset()) { }
int cp_offset() { return cp_offset_; }
bool is_capture() { return is_capture_; }
private:
int cp_offset_;
bool is_capture_;
void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
};
class DeferredSetRegister :public DeferredAction {
class DeferredSetRegister : public DeferredAction {
public:
DeferredSetRegister(int reg, int value)
: DeferredAction(ActionNode::SET_REGISTER, reg),
......@@ -1118,7 +1134,7 @@ class Trace {
Interval range_;
};
class DeferredIncrementRegister: public DeferredAction {
class DeferredIncrementRegister : public DeferredAction {
public:
explicit DeferredIncrementRegister(int reg)
: DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
......@@ -1189,13 +1205,13 @@ class Trace {
int FindAffectedRegisters(OutSet* affected_registers);
void PerformDeferredActions(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
OutSet& affected_registers,
OutSet* registers_to_pop,
OutSet* registers_to_clear);
void RestoreAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
void PushAffectedRegisters(RegExpMacroAssembler* macro,
int max_register,
OutSet& affected_registers);
OutSet& registers_to_pop,
OutSet& registers_to_clear);
int cp_offset_;
DeferredAction* actions_;
Label* backtrack_;
......
......@@ -4149,7 +4149,10 @@ RegExpTree* RegExpParser::ParseGroup() {
} else {
ASSERT(type == '=' || type == '!');
bool is_positive = (type == '=');
return new RegExpLookahead(body, is_positive);
return new RegExpLookahead(body,
is_positive,
end_capture_index - capture_index,
capture_index);
}
}
......
......@@ -332,15 +332,29 @@ void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
__ push(ebx);
const int four_arguments = 4;
FrameAlign(four_arguments, ecx);
// Put arguments into allocated stack area.
// Put arguments into allocated stack area, last argument highest on stack.
// Parameters are
// UC16** buffer - really the String** of the input string
// int byte_offset1 - byte offset from *buffer of start of capture
// int byte_offset2 - byte offset from *buffer of current position
// size_t byte_length - length of capture in bytes(!)
// Set byte_length.
__ mov(Operand(esp, 3 * kPointerSize), ebx);
// Set byte_offset2.
// Found by adding negative string-end offset of current position (edi)
// to String** offset of end of string.
__ mov(ecx, Operand(ebp, kInputEndOffset));
__ add(edi, Operand(ecx));
__ mov(Operand(esp, 2 * kPointerSize), edi);
// Set byte_offset1.
// Start of capture, where eax already holds string-end negative offset.
__ add(eax, Operand(ecx));
__ mov(Operand(esp, 1 * kPointerSize), eax);
// Set buffer. Original String** parameter to regexp code.
__ mov(eax, Operand(ebp, kInputBuffer));
__ mov(Operand(esp, 0 * kPointerSize), eax);
Address function_address = FUNCTION_ADDR(&CaseInsensitiveCompareUC16);
CallCFunction(function_address, four_arguments);
// Pop original values before reacting on result value.
......@@ -946,9 +960,12 @@ void RegExpMacroAssemblerIA32::WriteCurrentPositionToRegister(int reg,
}
void RegExpMacroAssemblerIA32::ClearRegister(int reg) {
void RegExpMacroAssemblerIA32::ClearRegisters(int reg_from, int reg_to) {
ASSERT(reg_from <= reg_to);
__ mov(eax, Operand(ebp, kInputStartMinusOne));
__ mov(register_location(reg), eax);
for (int reg = reg_from; reg <= reg_to; reg++) {
__ mov(register_location(reg), eax);
}
}
......@@ -987,8 +1004,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Execute(
stack_top);
if (result < 0 && !Top::has_pending_exception()) {
// We detected a stack overflow in RegExp code, but haven't created
// the exception yet.
// We detected a stack overflow (on the backtrack stack) in RegExp code,
// but haven't created the exception yet.
Top::StackOverflow();
}
return (result < 0) ? EXCEPTION : (result ? SUCCESS : FAILURE);
......@@ -1170,6 +1187,9 @@ void RegExpMacroAssemblerIA32::CheckStackLimit() {
void RegExpMacroAssemblerIA32::FrameAlign(int num_arguments, Register scratch) {
// TODO(lrn): Since we no longer use the system stack arbitrarily, we
// know the current stack alignment - esp points to the last regexp register.
// We can do this simpler then.
int frameAlignment = OS::ActivationFrameAlignment();
if (frameAlignment != 0) {
// Make stack end at alignment and make room for num_arguments words
......
......@@ -110,7 +110,7 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegister(int reg);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static Result Execute(Code* code,
......
......@@ -104,8 +104,11 @@ void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister(
}
void RegExpMacroAssemblerIrregexp::ClearRegister(int reg) {
SetRegister(reg, -1);
void RegExpMacroAssemblerIrregexp::ClearRegisters(int reg_from, int reg_to) {
ASSERT(reg_from <= reg_to);
for (int reg = reg_from; reg <= reg_to; reg++) {
SetRegister(reg, -1);
}
}
......
......@@ -66,7 +66,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void AdvanceRegister(int reg, int by); // r[reg] += by.
virtual void SetRegister(int register_index, int to);
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegister(int reg);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void ReadCurrentPositionFromRegister(int reg);
virtual void WriteStackPointerToRegister(int reg);
virtual void ReadStackPointerFromRegister(int reg);
......
......@@ -150,9 +150,9 @@ void RegExpMacroAssemblerTracer::WriteCurrentPositionToRegister(int reg,
}
void RegExpMacroAssemblerTracer::ClearRegister(int reg) {
PrintF(" ClearRegister(register=%d);\n", reg);
assembler_->ClearRegister(reg);
void RegExpMacroAssemblerTracer::ClearRegisters(int reg_from, int reg_to) {
PrintF(" ClearRegister(from=%d, to=%d);\n", reg_from, reg_to);
assembler_->ClearRegisters(reg_from, reg_to);
}
......
......@@ -107,7 +107,7 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void SetRegister(int register_index, int to);
virtual void Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegister(int reg);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
private:
RegExpMacroAssembler* assembler_;
......
......@@ -174,7 +174,7 @@ class RegExpMacroAssembler {
virtual void SetRegister(int register_index, int to) = 0;
virtual void Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void ClearRegister(int reg) = 0;
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
private:
......
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Tests from http://blog.stevenlevithan.com/archives/npcg-javascript
assertEquals(true, /(x)?\1y/.test("y"));
assertEquals(["y", undefined], /(x)?\1y/.exec("y"));
assertEquals(["y", undefined], /(x)?y/.exec("y"));
assertEquals(["y", undefined], "y".match(/(x)?\1y/));
assertEquals(["y", undefined], "y".match(/(x)?y/));
assertEquals(["y"], "y".match(/(x)?\1y/g));
assertEquals(["", undefined, ""], "y".split(/(x)?\1y/));
assertEquals(["", undefined, ""], "y".split(/(x)?y/));
assertEquals(0, "y".search(/(x)?\1y/));
assertEquals("z", "y".replace(/(x)?\1y/, "z"));
assertEquals("", "y".replace(/(x)?y/, "$1"));
assertEquals("undefined", "y".replace(/(x)?\1y/,
function($0, $1){
return String($1);
}));
assertEquals("undefined", "y".replace(/(x)?y/,
function($0, $1){
return String($1);
}));
assertEquals("undefined", "y".replace(/(x)?y/,
function($0, $1){
return $1;
}));
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// Tests captures in positive and negative look-ahead in regular expressions.
function stringEscape(string) {
// Converts string to source literal.
return '"' + string.replace(/["\\]/g, "\\$1") + '"';
}
function testRE(re, input, expected_result) {
var testName = re + ".test(" + stringEscape(input) +")";
if (expected_result) {
assertTrue(re.test(input), testName);
} else {
assertFalse(re.test(input), testName);
}
}
function execRE(re, input, expected_result) {
var testName = re + ".exec('" + stringEscape(input) +"')";
assertEquals(expected_result, re.exec(input), testName);
}
// Test of simple positive lookahead.
var re = /^(?=a)/;
testRE(re, "a", true);
testRE(re, "b", false);
execRE(re, "a", [""]);
re = /^(?=\woo)f\w/;
testRE(re, "foo", true);
testRE(re, "boo", false);
testRE(re, "fao", false);
testRE(re, "foa", false);
execRE(re, "foo", ["fo"]);
re = /(?=\w).(?=\W)/;
testRE(re, ".a! ", true);
testRE(re, ".! ", false);
testRE(re, ".ab! ", true);
execRE(re, ".ab! ", ["b"]);
re = /(?=f(?=[^f]o))../;
testRE(re, ", foo!", true);
testRE(re, ", fo!", false);
testRE(re, ", ffo", false);
execRE(re, ", foo!", ["fo"]);
// Positive lookahead with captures.
re = /^[^\'\"]*(?=([\'\"])).*\1(\w+)\1/;
testRE(re, " 'foo' ", true);
testRE(re, ' "foo" ', true);
testRE(re, " \" 'foo' ", false);
testRE(re, " ' \"foo\" ", false);
testRE(re, " 'foo\" ", false);
testRE(re, " \"foo' ", false);
execRE(re, " 'foo' ", [" 'foo'", "'", "foo"]);
execRE(re, ' "foo" ', [' "foo"', '"', 'foo']);
// Captures are cleared on backtrack past the look-ahead.
re = /^(?:(?=(.))a|b)\1$/;
testRE(re, "aa", true);
testRE(re, "b", true);
testRE(re, "bb", false);
testRE(re, "a", false);
execRE(re, "aa", ["aa", "a"]);
execRE(re, "b", ["b", undefined]);
re = /^(?=(.)(?=(.)\1\2)\2\1)\1\2/;
testRE(re, "abab", true);
testRE(re, "ababxxxxxxxx", true);
testRE(re, "aba", false);
execRE(re, "abab", ["ab", "a", "b"]);
re = /^(?:(?=(.))a|b|c)$/;
testRE(re, "a", true);
testRE(re, "b", true);
testRE(re, "c", true);
testRE(re, "d", false);
execRE(re, "a", ["a", "a"]);
execRE(re, "b", ["b", undefined]);
execRE(re, "c", ["c", undefined]);
execRE(/^(?=(b))b/, "b", ["b", "b"]);
execRE(/^(?:(?=(b))|a)b/, "ab", ["ab", undefined]);
execRE(/^(?:(?=(b)(?:(?=(c))|d))|)bd/, "bd", ["bd", "b", undefined]);
// Test of Negative Look-Ahead.
re = /(?!x)./;
testRE(re, "y", true);
testRE(re, "x", false);
execRE(re, "y", ["y"]);
re = /(?!(\d))|\d/;
testRE(re, "4", true);
execRE(re, "4", ["4", undefined]);
execRE(re, "x", ["", undefined]);
// Test mixed nested look-ahead with captures.
re = /^(?=(x)(?=(y)))/;
testRE(re, "xy", true);
testRE(re, "xz", false);
execRE(re, "xy", ["", "x", "y"]);
re = /^(?!(x)(?!(y)))/;
testRE(re, "xy", true);
testRE(re, "xz", false);
execRE(re, "xy", ["", undefined, undefined]);
re = /^(?=(x)(?!(y)))/;
testRE(re, "xz", true);
testRE(re, "xy", false)
execRE(re, "xz", ["", "x", undefined]);
re = /^(?!(x)(?=(y)))/;
testRE(re, "xz", true);
testRE(re, "xy", false);
execRE(re, "xz", ["", undefined, undefined]);
re = /^(?=(x)(?!(y)(?=(z))))/;
testRE(re, "xaz", true);
testRE(re, "xya", true);
testRE(re, "xyz", false);
testRE(re, "a", false);
execRE(re, "xaz", ["", "x", undefined, undefined]);
execRE(re, "xya", ["", "x", undefined, undefined]);
re = /^(?!(x)(?=(y)(?!(z))))/;
testRE(re, "a", true);
testRE(re, "xa", true);
testRE(re, "xyz", true);
testRE(re, "xya", false);
execRE(re, "a", ["", undefined, undefined, undefined]);
execRE(re, "xa", ["", undefined, undefined, undefined]);
execRE(re, "xyz", ["", undefined, undefined, undefined]);
// Copyright 2008 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
// See http://code.google.com/p/v8/issues/detail?id=187
assertEquals("f,", "foo".match(/(?:(?=(f)o)fx|)./));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment