Commit 906903ac authored by yangguo's avatar yangguo Committed by Commit bot

Experimental support for RegExp lookbehind.

R=erikcorry@chromium.org, littledan@chromium.org
BUG=v8:4545
LOG=N

Committed: https://crrev.com/37632606bbce1418238b13fd90cb6ef6705871cd
Cr-Commit-Position: refs/heads/master@{#32029}

Review URL: https://codereview.chromium.org/1418963009

Cr-Commit-Position: refs/heads/master@{#32043}
parent ed357f96
......@@ -839,7 +839,7 @@ Interval RegExpDisjunction::CaptureRegisters() {
}
Interval RegExpLookahead::CaptureRegisters() {
Interval RegExpLookaround::CaptureRegisters() {
return body()->CaptureRegisters();
}
......@@ -907,8 +907,8 @@ bool RegExpDisjunction::IsAnchoredAtEnd() {
}
bool RegExpLookahead::IsAnchoredAtStart() {
return is_positive() && body()->IsAnchoredAtStart();
bool RegExpLookaround::IsAnchoredAtStart() {
return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart();
}
......@@ -1057,8 +1057,10 @@ void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) {
}
void* RegExpUnparser::VisitLookahead(RegExpLookahead* that, void* data) {
os_ << "(-> " << (that->is_positive() ? "+ " : "- ");
void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) {
os_ << "(";
os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-");
os_ << (that->is_positive() ? " + " : " - ");
that->body()->Accept(this, data);
os_ << ")";
return NULL;
......
......@@ -119,7 +119,7 @@ class RegExpCharacterClass;
class RegExpCompiler;
class RegExpDisjunction;
class RegExpEmpty;
class RegExpLookahead;
class RegExpLookaround;
class RegExpQuantifier;
class RegExpText;
......@@ -3075,8 +3075,7 @@ class RegExpQuantifier final : public RegExpTree {
class RegExpCapture final : public RegExpTree {
public:
explicit RegExpCapture(RegExpTree* body, int index)
: body_(body), index_(index) { }
explicit RegExpCapture(int index) : body_(NULL), index_(index) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
static RegExpNode* ToNode(RegExpTree* body,
......@@ -3091,6 +3090,7 @@ class RegExpCapture final : public RegExpTree {
int min_match() override { return body_->min_match(); }
int max_match() override { return body_->max_match(); }
RegExpTree* body() { return body_; }
void set_body(RegExpTree* body) { body_ = body; }
int index() { return index_; }
static int StartRegister(int index) { return index * 2; }
static int EndRegister(int index) { return index * 2 + 1; }
......@@ -3101,22 +3101,23 @@ class RegExpCapture final : public RegExpTree {
};
class RegExpLookahead final : public RegExpTree {
class RegExpLookaround final : public RegExpTree {
public:
RegExpLookahead(RegExpTree* body,
bool is_positive,
int capture_count,
int capture_from)
enum Type { LOOKAHEAD, LOOKBEHIND };
RegExpLookaround(RegExpTree* body, bool is_positive, int capture_count,
int capture_from, Type type)
: body_(body),
is_positive_(is_positive),
capture_count_(capture_count),
capture_from_(capture_from) { }
capture_from_(capture_from),
type_(type) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpLookahead* AsLookahead() override;
RegExpLookaround* AsLookaround() override;
Interval CaptureRegisters() override;
bool IsLookahead() override;
bool IsLookaround() override;
bool IsAnchoredAtStart() override;
int min_match() override { return 0; }
int max_match() override { return 0; }
......@@ -3124,12 +3125,14 @@ class RegExpLookahead final : public RegExpTree {
bool is_positive() { return is_positive_; }
int capture_count() { return capture_count_; }
int capture_from() { return capture_from_; }
Type type() { return type_; }
private:
RegExpTree* body_;
bool is_positive_;
int capture_count_;
int capture_from_;
Type type_;
};
......@@ -3142,7 +3145,14 @@ class RegExpBackReference final : public RegExpTree {
RegExpBackReference* AsBackReference() override;
bool IsBackReference() override;
int min_match() override { return 0; }
int max_match() override { return capture_->max_match(); }
// The capture may not be completely parsed yet, if the reference occurs
// before the capture. In the ordinary case, nothing has been captured yet,
// so the back reference must have the length 0. If the back reference is
// inside a lookbehind, effectively making it a forward reference, we return
// 0 since lookbehinds have a length of 0.
int max_match() override {
return capture_->body() ? capture_->max_match() : 0;
}
int index() { return capture_->index(); }
RegExpCapture* capture() { return capture_; }
private:
......
......@@ -2049,6 +2049,7 @@ EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_unicode_regexps)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_completion)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_tolength)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_do_expressions)
EMPTY_INITIALIZE_GLOBAL_FOR_FEATURE(harmony_regexp_lookbehind)
static void SimpleInstallFunction(Handle<JSObject>& base, const char* name,
......@@ -2561,6 +2562,7 @@ bool Genesis::InstallExperimentalNatives() {
static const char* harmony_completion_natives[] = {nullptr};
static const char* harmony_do_expressions_natives[] = {nullptr};
static const char* harmony_regexp_subclass_natives[] = {nullptr};
static const char* harmony_regexp_lookbehind_natives[] = {nullptr};
for (int i = ExperimentalNatives::GetDebuggerCount();
i < ExperimentalNatives::GetBuiltinsCount(); i++) {
......
......@@ -202,7 +202,8 @@ DEFINE_IMPLICATION(es_staging, harmony_destructuring)
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
V(harmony_simd, "harmony simd") \
V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_regexp_subclass, "harmony regexp subclassing")
V(harmony_regexp_subclass, "harmony regexp subclassing") \
V(harmony_regexp_lookbehind, "harmony regexp lookbehind")
// Features that are complete (but still behind --harmony/es-staging flag).
#define HARMONY_STAGED(V) \
......
......@@ -5182,6 +5182,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
in_(in),
current_(kEndMarker),
next_pos_(0),
captures_started_(0),
capture_count_(0),
has_more_(true),
multiline_(multiline),
......@@ -5285,25 +5286,26 @@ RegExpTree* RegExpParser::ParsePattern() {
// Atom Quantifier
RegExpTree* RegExpParser::ParseDisjunction() {
// Used to store current state while parsing subexpressions.
RegExpParserState initial_state(NULL, INITIAL, 0, zone());
RegExpParserState* stored_state = &initial_state;
RegExpParserState initial_state(NULL, INITIAL, RegExpLookaround::LOOKAHEAD, 0,
zone());
RegExpParserState* state = &initial_state;
// Cache the builder in a local variable for quick access.
RegExpBuilder* builder = initial_state.builder();
while (true) {
switch (current()) {
case kEndMarker:
if (stored_state->IsSubexpression()) {
if (state->IsSubexpression()) {
// Inside a parenthesized group when hitting end of input.
ReportError(CStrVector("Unterminated group") CHECK_FAILED);
}
DCHECK_EQ(INITIAL, stored_state->group_type());
DCHECK_EQ(INITIAL, state->group_type());
// Parsing completed successfully.
return builder->ToRegExp();
case ')': {
if (!stored_state->IsSubexpression()) {
if (!state->IsSubexpression()) {
ReportError(CStrVector("Unmatched ')'") CHECK_FAILED);
}
DCHECK_NE(INITIAL, stored_state->group_type());
DCHECK_NE(INITIAL, state->group_type());
Advance();
// End disjunction parsing and convert builder content to new single
......@@ -5312,27 +5314,27 @@ RegExpTree* RegExpParser::ParseDisjunction() {
int end_capture_index = captures_started();
int capture_index = stored_state->capture_index();
SubexpressionType group_type = stored_state->group_type();
// Restore previous state.
stored_state = stored_state->previous_state();
builder = stored_state->builder();
int capture_index = state->capture_index();
SubexpressionType group_type = state->group_type();
// Build result of subexpression.
if (group_type == CAPTURE) {
RegExpCapture* capture = new(zone()) RegExpCapture(body, capture_index);
captures_->at(capture_index - 1) = capture;
RegExpCapture* capture = GetCapture(capture_index);
capture->set_body(body);
body = capture;
} else if (group_type != GROUPING) {
DCHECK(group_type == POSITIVE_LOOKAHEAD ||
group_type == NEGATIVE_LOOKAHEAD);
bool is_positive = (group_type == POSITIVE_LOOKAHEAD);
body = new(zone()) RegExpLookahead(body,
is_positive,
end_capture_index - capture_index,
capture_index);
DCHECK(group_type == POSITIVE_LOOKAROUND ||
group_type == NEGATIVE_LOOKAROUND);
bool is_positive = (group_type == POSITIVE_LOOKAROUND);
body = new (zone()) RegExpLookaround(
body, is_positive, end_capture_index - capture_index, capture_index,
state->lookaround_type());
}
// Restore previous state.
state = state->previous_state();
builder = state->builder();
builder->AddAtom(body);
// For compatability with JSC and ES3, we allow quantifiers after
// lookaheads, and break in all cases.
......@@ -5379,6 +5381,7 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
case '(': {
SubexpressionType subexpr_type = CAPTURE;
RegExpLookaround::Type lookaround_type = state->lookaround_type();
Advance();
if (current() == '?') {
switch (Next()) {
......@@ -5386,29 +5389,41 @@ RegExpTree* RegExpParser::ParseDisjunction() {
subexpr_type = GROUPING;
break;
case '=':
subexpr_type = POSITIVE_LOOKAHEAD;
lookaround_type = RegExpLookaround::LOOKAHEAD;
subexpr_type = POSITIVE_LOOKAROUND;
break;
case '!':
subexpr_type = NEGATIVE_LOOKAHEAD;
lookaround_type = RegExpLookaround::LOOKAHEAD;
subexpr_type = NEGATIVE_LOOKAROUND;
break;
case '<':
if (FLAG_harmony_regexp_lookbehind) {
Advance();
lookaround_type = RegExpLookaround::LOOKBEHIND;
if (Next() == '=') {
subexpr_type = POSITIVE_LOOKAROUND;
break;
} else if (Next() == '!') {
subexpr_type = NEGATIVE_LOOKAROUND;
break;
}
}
// Fall through.
default:
ReportError(CStrVector("Invalid group") CHECK_FAILED);
break;
}
Advance(2);
} else {
if (captures_ == NULL) {
captures_ = new(zone()) ZoneList<RegExpCapture*>(2, zone());
}
if (captures_started() >= kMaxCaptures) {
if (captures_started_ >= kMaxCaptures) {
ReportError(CStrVector("Too many captures") CHECK_FAILED);
}
captures_->Add(NULL, zone());
captures_started_++;
}
// Store current state and begin new disjunction parsing.
stored_state = new(zone()) RegExpParserState(stored_state, subexpr_type,
captures_started(), zone());
builder = stored_state->builder();
state = new (zone()) RegExpParserState(
state, subexpr_type, lookaround_type, captures_started_, zone());
builder = state->builder();
continue;
}
case '[': {
......@@ -5451,16 +5466,15 @@ RegExpTree* RegExpParser::ParseDisjunction() {
case '7': case '8': case '9': {
int index = 0;
if (ParseBackReferenceIndex(&index)) {
RegExpCapture* capture = NULL;
if (captures_ != NULL && index <= captures_->length()) {
capture = captures_->at(index - 1);
}
if (capture == NULL) {
if (state->IsInsideCaptureGroup(index)) {
// The backreference is inside the capture group it refers to.
// Nothing can possibly have been captured yet.
builder->AddEmpty();
break;
} else {
RegExpCapture* capture = GetCapture(index);
RegExpTree* atom = new (zone()) RegExpBackReference(capture);
builder->AddAtom(atom);
}
RegExpTree* atom = new(zone()) RegExpBackReference(capture);
builder->AddAtom(atom);
break;
}
uc32 first_digit = Next();
......@@ -5721,6 +5735,34 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
}
RegExpCapture* RegExpParser::GetCapture(int index) {
// The index for the capture groups are one-based. Its index in the list is
// zero-based.
int know_captures =
is_scanned_for_captures_ ? capture_count_ : captures_started_;
DCHECK(index <= know_captures);
if (captures_ == NULL) {
captures_ = new (zone()) ZoneList<RegExpCapture*>(know_captures, zone());
}
while (captures_->length() < know_captures) {
captures_->Add(new (zone()) RegExpCapture(captures_->length() + 1), zone());
}
return captures_->at(index - 1);
}
bool RegExpParser::RegExpParserState::IsInsideCaptureGroup(int index) {
for (RegExpParserState* s = this; s != NULL; s = s->previous_state()) {
if (s->group_type() != CAPTURE) continue;
// Return true if we found the matching capture index.
if (index == s->capture_index()) return true;
// Abort if index is larger than what has been parsed up till this state.
if (index > s->capture_index()) return false;
}
return false;
}
// QuantifierPrefix ::
// { DecimalDigits }
// { DecimalDigits , }
......@@ -6052,7 +6094,7 @@ RegExpTree* RegExpParser::ParseCharacterClass() {
ranges->Add(CharacterRange::Everything(), zone());
is_negated = !is_negated;
}
return new(zone()) RegExpCharacterClass(ranges, is_negated);
return new (zone()) RegExpCharacterClass(ranges, is_negated);
}
......
......@@ -450,7 +450,7 @@ class RegExpParser BASE_EMBEDDED {
bool simple();
bool contains_anchor() { return contains_anchor_; }
void set_contains_anchor() { contains_anchor_ = true; }
int captures_started() { return captures_ == NULL ? 0 : captures_->length(); }
int captures_started() { return captures_started_; }
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
......@@ -463,8 +463,8 @@ class RegExpParser BASE_EMBEDDED {
enum SubexpressionType {
INITIAL,
CAPTURE, // All positive values represent captures.
POSITIVE_LOOKAHEAD,
NEGATIVE_LOOKAHEAD,
POSITIVE_LOOKAROUND,
NEGATIVE_LOOKAROUND,
GROUPING
};
......@@ -472,11 +472,12 @@ class RegExpParser BASE_EMBEDDED {
public:
RegExpParserState(RegExpParserState* previous_state,
SubexpressionType group_type,
int disjunction_capture_index,
Zone* zone)
RegExpLookaround::Type lookaround_type,
int disjunction_capture_index, Zone* zone)
: previous_state_(previous_state),
builder_(new(zone) RegExpBuilder(zone)),
builder_(new (zone) RegExpBuilder(zone)),
group_type_(group_type),
lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index) {}
// Parser state of containing expression, if any.
RegExpParserState* previous_state() { return previous_state_; }
......@@ -485,11 +486,16 @@ class RegExpParser BASE_EMBEDDED {
RegExpBuilder* builder() { return builder_; }
// Type of regexp being parsed (parenthesized group or entire regexp).
SubexpressionType group_type() { return group_type_; }
// Lookahead or Lookbehind.
RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
// Index in captures array of first capture in this sub-expression, if any.
// Also the capture index of this sub-expression itself, if group_type
// is CAPTURE.
int capture_index() { return disjunction_capture_index_; }
// Check whether the parser is inside a capture group with the given index.
bool IsInsideCaptureGroup(int index);
private:
// Linked list implementation of stack of states.
RegExpParserState* previous_state_;
......@@ -497,10 +503,15 @@ class RegExpParser BASE_EMBEDDED {
RegExpBuilder* builder_;
// Stored disjunction type (capture, look-ahead or grouping), if any.
SubexpressionType group_type_;
// Stored read direction.
RegExpLookaround::Type lookaround_type_;
// Stored disjunction's capture index (if any).
int disjunction_capture_index_;
};
// Return the 1-indexed RegExpCapture object, allocate if necessary.
RegExpCapture* GetCapture(int index);
Isolate* isolate() { return isolate_; }
Zone* zone() const { return zone_; }
......@@ -518,6 +529,7 @@ class RegExpParser BASE_EMBEDDED {
FlatStringReader* in_;
uc32 current_;
int next_pos_;
int captures_started_;
// The capture count is only valid after we have scanned for captures.
int capture_count_;
bool has_more_;
......
......@@ -34,9 +34,11 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
......@@ -119,9 +121,9 @@ class RegExpMacroAssemblerARM: public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
......
......@@ -39,9 +39,11 @@ class RegExpMacroAssemblerARM64: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
......@@ -190,7 +192,7 @@ class RegExpMacroAssemblerARM64: public NativeRegExpMacroAssembler {
Register code_pointer() { return x20; }
// Register holding the value used for clearing capture registers.
Register non_position_value() { return w24; }
Register string_start_minus_one() { return w24; }
// The top 32 bit of this register is used to store this value
// twice. This is used for clearing more than one register at a time.
Register twice_non_position_value() { return x24; }
......
......@@ -57,15 +57,17 @@ V(CHECK_LT, 35, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_GT, 36, 8) /* bc8 pad8 uc16 addr32 */ \
V(CHECK_NOT_BACK_REF, 37, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE, 38, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_REGS_EQUAL, 39, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(CHECK_REGISTER_LT, 40, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 41, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 42, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_AT_START, 43, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 44, 8) /* bc8 pad24 addr32 */ \
V(CHECK_GREEDY, 45, 8) /* bc8 pad24 addr32 */ \
V(ADVANCE_CP_AND_GOTO, 46, 8) /* bc8 offset24 addr32 */ \
V(SET_CURRENT_POSITION_FROM_END, 47, 4) /* bc8 idx24 */
V(CHECK_NOT_BACK_REF_BACKWARD, 39, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD, 40, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_NOT_REGS_EQUAL, 41, 12) /* bc8 regidx24 reg_idx32 addr32 */ \
V(CHECK_REGISTER_LT, 42, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_GE, 43, 12) /* bc8 reg_idx24 value32 addr32 */ \
V(CHECK_REGISTER_EQ_POS, 44, 8) /* bc8 reg_idx24 addr32 */ \
V(CHECK_AT_START, 45, 8) /* bc8 pad24 addr32 */ \
V(CHECK_NOT_AT_START, 46, 8) /* bc8 offset24 addr32 */ \
V(CHECK_GREEDY, 47, 8) /* bc8 pad24 addr32 */ \
V(ADVANCE_CP_AND_GOTO, 48, 8) /* bc8 offset24 addr32 */ \
V(SET_CURRENT_POSITION_FROM_END, 49, 4) /* bc8 idx24 */
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
......
......@@ -33,9 +33,11 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
......@@ -116,9 +118,9 @@ class RegExpMacroAssemblerIA32: public NativeRegExpMacroAssembler {
static const int kBackup_edi = kBackup_esi - kPointerSize;
static const int kBackup_ebx = kBackup_edi - kPointerSize;
static const int kSuccessfulCaptures = kBackup_ebx - kPointerSize;
static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
......
......@@ -270,7 +270,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
break;
BYTECODE(LOAD_CURRENT_CHAR) {
int pos = current + (insn >> BYTECODE_SHIFT);
if (pos >= subject.length()) {
if (pos >= subject.length() || pos < 0) {
pc = code_base + Load32Aligned(pc + 4);
} else {
current_char = subject[pos];
......@@ -286,7 +286,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
}
BYTECODE(LOAD_2_CURRENT_CHARS) {
int pos = current + (insn >> BYTECODE_SHIFT);
if (pos + 2 > subject.length()) {
if (pos + 2 > subject.length() || pos < 0) {
pc = code_base + Load32Aligned(pc + 4);
} else {
Char next = subject[pos + 1];
......@@ -306,7 +306,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
BYTECODE(LOAD_4_CURRENT_CHARS) {
DCHECK(sizeof(Char) == 1);
int pos = current + (insn >> BYTECODE_SHIFT);
if (pos + 4 > subject.length()) {
if (pos + 4 > subject.length() || pos < 0) {
pc = code_base + Load32Aligned(pc + 4);
} else {
Char next1 = subject[pos + 1];
......@@ -497,46 +497,59 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
BYTECODE(CHECK_NOT_BACK_REF) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from < 0 || len <= 0) {
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
if (current + len > subject.length()) {
pc = code_base + Load32Aligned(pc + 4);
break;
} else {
int i;
for (i = 0; i < len; i++) {
if (subject[from + i] != subject[current + i]) {
pc = code_base + Load32Aligned(pc + 4);
break;
}
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
CompareChars(&subject[from], &subject[current], len) != 0) {
pc = code_base + Load32Aligned(pc + 4);
break;
}
if (i < len) break;
current += len;
}
pc += BC_CHECK_NOT_BACK_REF_LENGTH;
break;
}
BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
CompareChars(&subject[from], &subject[current - len], len) != 0) {
pc = code_base + Load32Aligned(pc + 4);
break;
}
current -= len;
}
pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
break;
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from < 0 || len <= 0) {
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
break;
if (from >= 0 && len > 0) {
if (current + len > subject.length() ||
!BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
from, current, len, subject)) {
pc = code_base + Load32Aligned(pc + 4);
break;
}
current += len;
}
if (current + len > subject.length()) {
pc = code_base + Load32Aligned(pc + 4);
break;
} else {
if (BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
from, current, len, subject)) {
current += len;
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
} else {
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
break;
}
BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
int from = registers[insn >> BYTECODE_SHIFT];
int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
if (from >= 0 && len > 0) {
if (current - len < 0 ||
!BackRefMatchesNoCase(isolate->interp_canonicalize_mapping(),
from, current - len, len, subject)) {
pc = code_base + Load32Aligned(pc + 4);
break;
}
current -= len;
}
pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
break;
}
BYTECODE(CHECK_AT_START)
......@@ -547,7 +560,7 @@ static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
}
break;
BYTECODE(CHECK_NOT_AT_START)
if (current == 0) {
if (current + (insn >> BYTECODE_SHIFT) == 0) {
pc += BC_CHECK_NOT_AT_START_LENGTH;
} else {
pc = code_base + Load32Aligned(pc + 4);
......
This diff is collapsed.
......@@ -387,17 +387,17 @@ class DispatchTable : public ZoneObject {
VISIT(Text)
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
VISIT(Disjunction) \
VISIT(Alternative) \
VISIT(Assertion) \
VISIT(CharacterClass) \
VISIT(Atom) \
VISIT(Quantifier) \
VISIT(Capture) \
VISIT(Lookahead) \
VISIT(BackReference) \
VISIT(Empty) \
#define FOR_EACH_REG_EXP_TREE_TYPE(VISIT) \
VISIT(Disjunction) \
VISIT(Alternative) \
VISIT(Assertion) \
VISIT(CharacterClass) \
VISIT(Atom) \
VISIT(Quantifier) \
VISIT(Capture) \
VISIT(Lookaround) \
VISIT(BackReference) \
VISIT(Empty) \
VISIT(Text)
......@@ -603,7 +603,7 @@ class RegExpNode: public ZoneObject {
RegExpCompiler* compiler,
int characters_filled_in,
bool not_at_start) = 0;
static const int kNodeIsTooComplexForGreedyLoops = -1;
static const int kNodeIsTooComplexForGreedyLoops = kMinInt;
virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
// Only returns the successor for a text node of length 1 that matches any
// character and that has no guards on it.
......@@ -827,14 +827,14 @@ class ActionNode: public SeqRegExpNode {
class TextNode: public SeqRegExpNode {
public:
TextNode(ZoneList<TextElement>* elms,
TextNode(ZoneList<TextElement>* elms, bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
elms_(elms) { }
TextNode(RegExpCharacterClass* that,
: SeqRegExpNode(on_success), elms_(elms), read_backward_(read_backward) {}
TextNode(RegExpCharacterClass* that, bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
elms_(new(zone()) ZoneList<TextElement>(1, zone())) {
elms_(new (zone()) ZoneList<TextElement>(1, zone())),
read_backward_(read_backward) {
elms_->Add(TextElement::CharClass(that), zone());
}
virtual void Accept(NodeVisitor* visitor);
......@@ -845,6 +845,7 @@ class TextNode: public SeqRegExpNode {
int characters_filled_in,
bool not_at_start);
ZoneList<TextElement>* elements() { return elms_; }
bool read_backward() { return read_backward_; }
void MakeCaseIndependent(Isolate* isolate, bool is_one_byte);
virtual int GreedyLoopTextLength();
virtual RegExpNode* GetSuccessorOfOmnivorousTextNode(
......@@ -873,6 +874,7 @@ class TextNode: public SeqRegExpNode {
int* checked_up_to);
int Length();
ZoneList<TextElement>* elms_;
bool read_backward_;
};
......@@ -925,15 +927,16 @@ class AssertionNode: public SeqRegExpNode {
class BackReferenceNode: public SeqRegExpNode {
public:
BackReferenceNode(int start_reg,
int end_reg,
BackReferenceNode(int start_reg, int end_reg, bool read_backward,
RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
end_reg_(end_reg) { }
end_reg_(end_reg),
read_backward_(read_backward) {}
virtual void Accept(NodeVisitor* visitor);
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
bool read_backward() { return read_backward_; }
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find,
int recursion_depth,
......@@ -950,6 +953,7 @@ class BackReferenceNode: public SeqRegExpNode {
private:
int start_reg_;
int end_reg_;
bool read_backward_;
};
......@@ -1074,6 +1078,7 @@ class ChoiceNode: public RegExpNode {
return true;
}
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual bool read_backward() { return false; }
protected:
int GreedyLoopTextLengthForAlternative(GuardedAlternative* alternative);
......@@ -1150,12 +1155,12 @@ class NegativeLookaheadChoiceNode: public ChoiceNode {
class LoopChoiceNode: public ChoiceNode {
public:
explicit LoopChoiceNode(bool body_can_be_zero_length, Zone* zone)
LoopChoiceNode(bool body_can_be_zero_length, bool read_backward, Zone* zone)
: ChoiceNode(2, zone),
loop_node_(NULL),
continue_node_(NULL),
body_can_be_zero_length_(body_can_be_zero_length)
{ }
body_can_be_zero_length_(body_can_be_zero_length),
read_backward_(read_backward) {}
void AddLoopAlternative(GuardedAlternative alt);
void AddContinueAlternative(GuardedAlternative alt);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
......@@ -1169,6 +1174,7 @@ class LoopChoiceNode: public ChoiceNode {
RegExpNode* loop_node() { return loop_node_; }
RegExpNode* continue_node() { return continue_node_; }
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
......@@ -1183,6 +1189,7 @@ class LoopChoiceNode: public ChoiceNode {
RegExpNode* loop_node_;
RegExpNode* continue_node_;
bool body_can_be_zero_length_;
bool read_backward_;
};
......@@ -1438,9 +1445,7 @@ class Trace {
at_start_ == UNKNOWN;
}
TriBool at_start() { return at_start_; }
void set_at_start(bool at_start) {
at_start_ = at_start ? TRUE_VALUE : FALSE_VALUE;
}
void set_at_start(TriBool at_start) { at_start_ = at_start; }
Label* backtrack() { return backtrack_; }
Label* loop_label() { return loop_label_; }
RegExpNode* stop_node() { return stop_node_; }
......
......@@ -33,9 +33,11 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
......@@ -120,9 +122,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
......
......@@ -33,9 +33,11 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
......@@ -125,9 +127,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
#elif defined(MIPS_ABI_O32)
// Offsets from frame_pointer() of function parameters and stored registers.
......@@ -158,9 +160,9 @@ class RegExpMacroAssemblerMIPS: public NativeRegExpMacroAssembler {
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kSuccessfulCaptures = kInputString - kPointerSize;
static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
#else
# error "undefined MIPS ABI"
......
......@@ -273,8 +273,9 @@ void RegExpMacroAssemblerIrregexp::CheckAtStart(Label* on_at_start) {
}
void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) {
Emit(BC_CHECK_NOT_AT_START, 0);
void RegExpMacroAssemblerIrregexp::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
Emit(BC_CHECK_NOT_AT_START, cp_offset);
EmitOrLink(on_not_at_start);
}
......@@ -370,20 +371,23 @@ void RegExpMacroAssemblerIrregexp::CheckBitInTable(
void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg,
bool read_backward,
Label* on_not_equal) {
DCHECK(start_reg >= 0);
DCHECK(start_reg <= kMaxRegister);
Emit(BC_CHECK_NOT_BACK_REF, start_reg);
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_BACKWARD : BC_CHECK_NOT_BACK_REF,
start_reg);
EmitOrLink(on_not_equal);
}
void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
int start_reg,
Label* on_not_equal) {
int start_reg, bool read_backward, Label* on_not_equal) {
DCHECK(start_reg >= 0);
DCHECK(start_reg <= kMaxRegister);
Emit(BC_CHECK_NOT_BACK_REF_NO_CASE, start_reg);
Emit(read_backward ? BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD
: BC_CHECK_NOT_BACK_REF_NO_CASE,
start_reg);
EmitOrLink(on_not_equal);
}
......
......@@ -66,7 +66,7 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckAtStart(Label* on_at_start);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
unsigned mask,
......@@ -82,8 +82,10 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
uc16 to,
Label* on_not_in_range);
virtual void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match);
virtual void IfRegisterLT(int register_index, int comparand, Label* if_lt);
virtual void IfRegisterGE(int register_index, int comparand, Label* if_ge);
......
......@@ -13,9 +13,9 @@ RegExpMacroAssemblerTracer::RegExpMacroAssemblerTracer(
Isolate* isolate, RegExpMacroAssembler* assembler)
: RegExpMacroAssembler(isolate, assembler->zone()), assembler_(assembler) {
unsigned int type = assembler->Implementation();
DCHECK(type < 6);
const char* impl_names[] = {"IA32", "ARM", "ARM64",
"MIPS", "X64", "X87", "Bytecode"};
DCHECK(type < 8);
const char* impl_names[] = {"IA32", "ARM", "ARM64", "MIPS",
"PPC", "X64", "X87", "Bytecode"};
PrintF("RegExpMacroAssembler%s();\n", impl_names[type]);
}
......@@ -241,9 +241,11 @@ void RegExpMacroAssemblerTracer::CheckAtStart(Label* on_at_start) {
}
void RegExpMacroAssemblerTracer::CheckNotAtStart(Label* on_not_at_start) {
PrintF(" CheckNotAtStart(label[%08x]);\n", LabelToInt(on_not_at_start));
assembler_->CheckNotAtStart(on_not_at_start);
void RegExpMacroAssemblerTracer::CheckNotAtStart(int cp_offset,
Label* on_not_at_start) {
PrintF(" CheckNotAtStart(cp_offset=%d, label[%08x]);\n", cp_offset,
LabelToInt(on_not_at_start));
assembler_->CheckNotAtStart(cp_offset, on_not_at_start);
}
......@@ -349,19 +351,21 @@ void RegExpMacroAssemblerTracer::CheckBitInTable(
void RegExpMacroAssemblerTracer::CheckNotBackReference(int start_reg,
bool read_backward,
Label* on_no_match) {
PrintF(" CheckNotBackReference(register=%d, label[%08x]);\n", start_reg,
LabelToInt(on_no_match));
assembler_->CheckNotBackReference(start_reg, on_no_match);
PrintF(" CheckNotBackReference(register=%d, %s, label[%08x]);\n", start_reg,
read_backward ? "backward" : "forward", LabelToInt(on_no_match));
assembler_->CheckNotBackReference(start_reg, read_backward, on_no_match);
}
void RegExpMacroAssemblerTracer::CheckNotBackReferenceIgnoreCase(
int start_reg,
Label* on_no_match) {
PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, label[%08x]);\n",
start_reg, LabelToInt(on_no_match));
assembler_->CheckNotBackReferenceIgnoreCase(start_reg, on_no_match);
int start_reg, bool read_backward, Label* on_no_match) {
PrintF(" CheckNotBackReferenceIgnoreCase(register=%d, %s, label[%08x]);\n",
start_reg, read_backward ? "backward" : "forward",
LabelToInt(on_no_match));
assembler_->CheckNotBackReferenceIgnoreCase(start_reg, read_backward,
on_no_match);
}
......
......@@ -30,9 +30,11 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacterLT(uc16 limit, Label* on_less);
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(unsigned c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(unsigned c,
......
......@@ -71,9 +71,11 @@ class RegExpMacroAssembler {
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position) = 0;
virtual void CheckNotAtStart(Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) = 0;
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start) = 0;
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) = 0;
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If we
// fail to match then goto the on_failure label. End of input always
......
......@@ -34,9 +34,11 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
// A "greedy loop" is a loop that is both greedy and with a simple
// body. It has a particularly simple implementation.
virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
virtual void CheckNotAtStart(Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotAtStart(int cp_offset, Label* on_not_at_start);
virtual void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match);
virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
bool read_backward,
Label* on_no_match);
virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
virtual void CheckNotCharacterAfterAnd(uint32_t c,
......@@ -171,10 +173,10 @@ class RegExpMacroAssemblerX64: public NativeRegExpMacroAssembler {
static const int kSuccessfulCaptures = kLastCalleeSaveRegister - kPointerSize;
// When adding local variables remember to push space for them in
// the frame in GetCode.
static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
static const int kStringStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
static const int kRegisterZero = kStringStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
......
......@@ -159,7 +159,10 @@ static MinMaxPair CheckMinMaxMatch(const char* input) {
CHECK_EQ(max, min_max.max_match); \
}
TEST(Parser) {
void TestRegExpParser(bool lookbehind) {
FLAG_harmony_regexp_lookbehind = lookbehind;
CHECK_PARSE_ERROR("?");
CheckParseEq("abc", "'abc'");
......@@ -191,6 +194,13 @@ TEST(Parser) {
CheckParseEq("foo|(bar|baz)|quux", "(| 'foo' (^ (| 'bar' 'baz')) 'quux')");
CheckParseEq("foo(?=bar)baz", "(: 'foo' (-> + 'bar') 'baz')");
CheckParseEq("foo(?!bar)baz", "(: 'foo' (-> - 'bar') 'baz')");
if (lookbehind) {
CheckParseEq("foo(?<=bar)baz", "(: 'foo' (<- + 'bar') 'baz')");
CheckParseEq("foo(?<!bar)baz", "(: 'foo' (<- - 'bar') 'baz')");
} else {
CHECK_PARSE_ERROR("foo(?<=bar)baz");
CHECK_PARSE_ERROR("foo(?<!bar)baz");
}
CheckParseEq("()", "(^ %)");
CheckParseEq("(?=)", "(-> + %)");
CheckParseEq("[]", "^[\\x00-\\uffff]"); // Doesn't compile on windows
......@@ -267,9 +277,16 @@ TEST(Parser) {
CheckParseEq("(?=a){1,10}a", "(: (-> + 'a') 'a')");
CheckParseEq("(?=a){9,10}a", "(: (-> + 'a') 'a')");
CheckParseEq("(?!a)?a", "'a'");
CheckParseEq("\\1(a)", "(^ 'a')");
CheckParseEq("\\1(a)", "(: (<- 1) (^ 'a'))");
CheckParseEq("(?!(a))\\1", "(: (-> - (^ 'a')) (<- 1))");
CheckParseEq("(?!\\1(a\\1)\\1)\\1", "(: (-> - (: (^ 'a') (<- 1))) (<- 1))");
CheckParseEq("(?!\\1(a\\1)\\1)\\1",
"(: (-> - (: (<- 1) (^ 'a') (<- 1))) (<- 1))");
CheckParseEq("\\1\\2(a(?:\\1(b\\1\\2))\\2)\\1",
"(: (<- 1) (<- 2) (^ (: 'a' (^ 'b') (<- 2))) (<- 1))");
if (lookbehind) {
CheckParseEq("\\1\\2(a(?<=\\1(b\\1\\2))\\2)\\1",
"(: (<- 1) (<- 2) (^ (: 'a' (<- + (^ 'b')) (<- 2))) (<- 1))");
}
CheckParseEq("[\\0]", "[\\x00]");
CheckParseEq("[\\11]", "[\\x09]");
CheckParseEq("[\\11a]", "[\\x09 a]");
......@@ -400,6 +417,16 @@ TEST(Parser) {
}
TEST(ParserWithLookbehind) {
TestRegExpParser(true); // Lookbehind enabled.
}
TEST(ParserWithoutLookbehind) {
TestRegExpParser(true); // Lookbehind enabled.
}
TEST(ParserRegression) {
CheckParseEq("[A-Z$-][x]", "(! [A-Z $ -] [x])");
CheckParseEq("a{3,4*}", "(: 'a{3,' (# 0 - g '4') '}')");
......@@ -790,7 +817,7 @@ TEST(MacroAssemblerNativeSimple) {
Label fail, backtrack;
m.PushBacktrack(&fail);
m.CheckNotAtStart(NULL);
m.CheckNotAtStart(0, NULL);
m.LoadCurrentCharacter(2, NULL);
m.CheckNotCharacter('o', NULL);
m.LoadCurrentCharacter(1, NULL, false);
......@@ -857,7 +884,7 @@ TEST(MacroAssemblerNativeSimpleUC16) {
Label fail, backtrack;
m.PushBacktrack(&fail);
m.CheckNotAtStart(NULL);
m.CheckNotAtStart(0, NULL);
m.LoadCurrentCharacter(2, NULL);
m.CheckNotCharacter('o', NULL);
m.LoadCurrentCharacter(1, NULL, false);
......@@ -973,12 +1000,12 @@ TEST(MacroAssemblerNativeBackReferenceLATIN1) {
m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(1, 0);
Label nomatch;
m.CheckNotBackReference(0, &nomatch);
m.CheckNotBackReference(0, false, &nomatch);
m.Fail();
m.Bind(&nomatch);
m.AdvanceCurrentPosition(2);
Label missing_match;
m.CheckNotBackReference(0, &missing_match);
m.CheckNotBackReference(0, false, &missing_match);
m.WriteCurrentPositionToRegister(2, 0);
m.Succeed();
m.Bind(&missing_match);
......@@ -1023,12 +1050,12 @@ TEST(MacroAssemblerNativeBackReferenceUC16) {
m.AdvanceCurrentPosition(2);
m.WriteCurrentPositionToRegister(1, 0);
Label nomatch;
m.CheckNotBackReference(0, &nomatch);
m.CheckNotBackReference(0, false, &nomatch);
m.Fail();
m.Bind(&nomatch);
m.AdvanceCurrentPosition(2);
Label missing_match;
m.CheckNotBackReference(0, &missing_match);
m.CheckNotBackReference(0, false, &missing_match);
m.WriteCurrentPositionToRegister(2, 0);
m.Succeed();
m.Bind(&missing_match);
......@@ -1073,7 +1100,7 @@ TEST(MacroAssemblernativeAtStart) {
0);
Label not_at_start, newline, fail;
m.CheckNotAtStart(&not_at_start);
m.CheckNotAtStart(0, &not_at_start);
// Check that prevchar = '\n' and current = 'f'.
m.CheckCharacter('\n', &newline);
m.Bind(&fail);
......@@ -1138,16 +1165,16 @@ TEST(MacroAssemblerNativeBackRefNoCase) {
m.WriteCurrentPositionToRegister(2, 0);
m.AdvanceCurrentPosition(3);
m.WriteCurrentPositionToRegister(3, 0);
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, &fail); // Match "ABC".
m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "AbC".
m.CheckNotBackReferenceIgnoreCase(2, false, &fail); // Match "ABC".
Label expected_fail;
m.CheckNotBackReferenceIgnoreCase(2, &expected_fail);
m.CheckNotBackReferenceIgnoreCase(2, false, &expected_fail);
m.Bind(&fail);
m.Fail();
m.Bind(&expected_fail);
m.AdvanceCurrentPosition(3); // Skip "xYz"
m.CheckNotBackReferenceIgnoreCase(2, &succ);
m.CheckNotBackReferenceIgnoreCase(2, false, &succ);
m.Fail();
m.Bind(&succ);
......@@ -1339,7 +1366,7 @@ TEST(MacroAssemblerNativeLotsOfRegisters) {
m.WriteCurrentPositionToRegister(0, 0);
m.WriteCurrentPositionToRegister(1, 1);
Label done;
m.CheckNotBackReference(0, &done); // Performs a system-stack push.
m.CheckNotBackReference(0, false, &done); // Performs a system-stack push.
m.Bind(&done);
m.PushRegister(large_number, RegExpMacroAssembler::kNoStackLimitCheck);
m.PopRegister(1);
......@@ -1388,7 +1415,7 @@ TEST(MacroAssembler) {
m.Fail();
m.Bind(&start);
m.PushBacktrack(&fail);
m.CheckNotAtStart(NULL);
m.CheckNotAtStart(0, NULL);
m.LoadCurrentCharacter(0, NULL);
m.CheckNotCharacter('f', NULL);
m.LoadCurrentCharacter(1, NULL);
......
// Copyright 2015 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-lookbehind
// Simple fixed-length matches.
assertEquals(["a"], "a".match(/^.(?<=a)/));
assertNull("b".match(/^.(?<=a)/));
assertEquals(["foo"], "foo1".match(/^f..(?<=.oo)/));
assertEquals(["foo"], "foo2".match(/^f\w\w(?<=\woo)/));
assertNull("boo".match(/^f\w\w(?<=\woo)/));
assertNull("fao".match(/^f\w\w(?<=\woo)/));
assertNull("foa".match(/^f\w\w(?<=\woo)/));
assertEquals(["def"], "abcdef".match(/(?<=abc)\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a.c)\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a\wc)\w\w\w/));
assertEquals(["cde"], "abcdef".match(/(?<=a[a-z])\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a[a-z][a-z])\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a[a-z]{2})\w\w\w/));
assertEquals(["bcd"], "abcdef".match(/(?<=a{1})\w\w\w/));
assertEquals(["cde"], "abcdef".match(/(?<=a{1}b{1})\w\w\w/));
assertEquals(["def"], "abcdef".match(/(?<=a{1}[a-z]{2})\w\w\w/));
// Variable-length matches.
assertEquals(["def"], "abcdef".match(/(?<=[a|b|c]*)[^a|b|c]{3}/));
assertEquals(["def"], "abcdef".match(/(?<=\w*)[^a|b|c]{3}/));
// Start of line matches.
assertEquals(["def"], "abcdef".match(/(?<=^abc)def/));
assertEquals(["def"], "abcdef".match(/(?<=^[a-c]{3})def/));
assertEquals(["def"], "xyz\nabcdef".match(/(?<=^[a-c]{3})def/m));
assertEquals(["ab", "cd", "efg"], "ab\ncd\nefg".match(/(?<=^)\w+/gm));
assertEquals(["ab", "cd", "efg"], "ab\ncd\nefg".match(/\w+(?<=$)/gm));
assertEquals(["ab", "cd", "efg"], "ab\ncd\nefg".match(/(?<=^)\w+(?<=$)/gm));
assertNull("abcdef".match(/(?<=^[^a-c]{3})def/));
assertNull("foooo".match(/"^foooo(?<=^o+)$/));
assertNull("foooo".match(/"^foooo(?<=^o*)$/));
assertEquals(["foo"], "foo".match(/^foo(?<=^fo+)$/));
assertEquals(["foooo"], "foooo".match(/^foooo(?<=^fo*)/));
assertEquals(["foo", "f"], "foo".match(/^(f)oo(?<=^\1o+)$/));
assertEquals(["foo", "f"], "foo".match(/^(f)oo(?<=^\1o+)$/i));
assertEquals(["foo\u1234", "f"], "foo\u1234".match(/^(f)oo(?<=^\1o+).$/i));
assertEquals(["def"], "abcdefdef".match(/(?<=^\w+)def/));
assertEquals(["def", "def"], "abcdefdef".match(/(?<=^\w+)def/g));
// Word boundary matches.
assertEquals(["def"], "abc def".match(/(?<=\b)[d-f]{3}/));
assertEquals(["def"], "ab cdef".match(/(?<=\B)\w{3}/));
assertEquals(["def"], "ab cdef".match(/(?<=\B)(?<=c(?<=\w))\w{3}/));
assertNull("abcdef".match(/(?<=\b)[d-f]{3}/));
// Negative lookbehind.
assertEquals(["abc"], "abcdef".match(/(?<!abc)\w\w\w/));
assertEquals(["abc"], "abcdef".match(/(?<!a.c)\w\w\w/));
assertEquals(["abc"], "abcdef".match(/(?<!a\wc)\w\w\w/));
assertEquals(["abc"], "abcdef".match(/(?<!a[a-z])\w\w\w/));
assertEquals(["abc"], "abcdef".match(/(?<!a[a-z]{2})\w\w\w/));
assertNull("abcdef".match(/(?<!abc)def/));
assertNull("abcdef".match(/(?<!a.c)def/));
assertNull("abcdef".match(/(?<!a\wc)def/));
assertNull("abcdef".match(/(?<!a[a-z][a-z])def/));
assertNull("abcdef".match(/(?<!a[a-z]{2})def/));
assertNull("abcdef".match(/(?<!a{1}b{1})cde/));
assertNull("abcdef".match(/(?<!a{1}[a-z]{2})def/));
// Capturing matches.
assertEquals(["def", "c"], "abcdef".match(/(?<=(c))def/));
assertEquals(["def", "bc"], "abcdef".match(/(?<=(\w{2}))def/));
assertEquals(["def", "bc", "c"], "abcdef".match(/(?<=(\w(\w)))def/));
assertEquals(["def", "a"], "abcdef".match(/(?<=(\w){3})def/));
assertEquals(["d", "bc", undefined], "abcdef".match(/(?<=(bc)|(cd))./));
assertEquals(["c", "a", undefined],
"abcdef".match(/(?<=([ab]{1,2})\D|(abc))\w/));
assertEquals(["ab", "a", "b"], "abcdef".match(/\D(?<=([ab]+))(\w)/));
assertEquals(["c", "d"], "abcdef".match(/(?<=b|c)\w/g));
assertEquals(["cd", "ef"], "abcdef".match(/(?<=[b-e])\w{2}/g));
// Captures inside negative lookbehind. (They never capture.)
assertEquals(["de", undefined], "abcdef".match(/(?<!(^|[ab]))\w{2}/));
// Nested lookaround.
assertEquals(["ef"], "abcdef".match(/(?<=ab(?=c)\wd)\w\w/));
assertEquals(["ef", "bc"], "abcdef".match(/(?<=a(?=([^a]{2})d)\w{3})\w\w/));
assertEquals(["ef", "bc"],
"abcdef".match(/(?<=a(?=([bc]{2}(?<!a{2}))d)\w{3})\w\w/));
assertNull("abcdef".match(/(?<=a(?=([bc]{2}(?<!a*))d)\w{3})\w\w/));
assertEquals(["faaa"], "faaao".match(/^faaao?(?<=^f[oa]+(?=o))/));
// Back references.
assertEquals(["b", "b", "bb"], "abb".match(/(.)(?<=(\1\1))/));
assertEquals(["B", "B", "bB"], "abB".match(/(.)(?<=(\1\1))/i));
assertEquals(["aB", "aB", "a"], "aabAaBa".match(/((\w)\w)(?<=\1\2\1)/i));
assertEquals(["Ba", "Ba", "a"], "aabAaBa".match(/(\w(\w))(?<=\1\2\1)/i));
assertEquals(["b", "b", "B"], "abaBbAa".match(/(?=(\w))(?<=(\1))./i));
assertEquals(["foo", "'", "foo"], " 'foo' ".match(/(?<=(.))(\w+)(?=\1)/));
assertEquals(["foo", "\"", "foo"], " \"foo\" ".match(/(?<=(.))(\w+)(?=\1)/));
assertNull(" .foo\" ".match(/(?<=(.))(\w+)(?=\1)/));
assertNull("ab".match(/(.)(?<=\1\1\1)/));
assertNull("abb".match(/(.)(?<=\1\1\1)/));
assertEquals(["b", "b"], "abbb".match(/(.)(?<=\1\1\1)/));
assertNull("ab".match(/(..)(?<=\1\1\1)/));
assertNull("abb".match(/(..)(?<=\1\1\1)/));
assertNull("aabb".match(/(..)(?<=\1\1\1)/));
assertNull("abab".match(/(..)(?<=\1\1\1)/));
assertNull("fabxbab".match(/(..)(?<=\1\1\1)/));
assertNull("faxabab".match(/(..)(?<=\1\1\1)/));
assertEquals(["ab", "ab"], "fababab".match(/(..)(?<=\1\1\1)/));
// Back references to captures inside the lookbehind.
assertEquals(["d", "C"], "abcCd".match(/(?<=\1(\w))d/i));
assertEquals(["d", "x"], "abxxd".match(/(?<=\1([abx]))d/));
assertEquals(["c", "ab"], "ababc".match(/(?<=\1(\w+))c/));
assertEquals(["c", "b"], "ababbc".match(/(?<=\1(\w+))c/));
assertNull("ababdc".match(/(?<=\1(\w+))c/));
assertEquals(["c", "abab"], "ababc".match(/(?<=(\w+)\1)c/));
// Alternations are tried left to right,
// and we do not backtrack into a lookbehind.
assertEquals(["xabcd", "cd", ""], "xabcd".match(/.*(?<=(..|...|....))(.*)/));
assertEquals(["xabcd", "bcd", ""], "xabcd".match(/.*(?<=(xx|...|....))(.*)/));
assertEquals(["xxabcd", "bcd", ""], "xxabcd".match(/.*(?<=(xx|...))(.*)/));
assertEquals(["xxabcd", "xx", "abcd"], "xxabcd".match(/.*(?<=(xx|xxx))(.*)/));
// We do not backtrack into a lookbehind.
// The lookbehind captures "abc" so that \1 does not match. We do not backtrack
// to capture only "bc" in the lookbehind.
assertNull("abcdbc".match(/(?<=([abc]+)).\1/));
// Greedy loop.
assertEquals(["c", "bbbbbb"], "abbbbbbc".match(/(?<=(b+))c/));
assertEquals(["c", "b1234"], "ab1234c".match(/(?<=(b\d+))c/));
assertEquals(["c", "b12b23b34"], "ab12b23b34c".match(/(?<=((?:b\d{2})+))c/));
// Sticky
var re1 = /(?<=^(\w+))def/g;
assertEquals(["def", "abc"], re1.exec("abcdefdef"));
assertEquals(["def", "abcdef"], re1.exec("abcdefdef"));
var re2 = /\Bdef/g;
assertEquals(["def"], re2.exec("abcdefdef"));
assertEquals(["def"], re2.exec("abcdefdef"));
// Misc
assertNull("abcdef".match(/(?<=$abc)def/));
assertEquals(["foo"], "foo".match(/^foo(?<=foo)$/));
assertEquals(["foo"], "foo".match(/^f.o(?<=foo)$/));
assertNull("fno".match(/^f.o(?<=foo)$/));
assertNull("foo".match(/^foo(?<!foo)$/));
assertNull("foo".match(/^f.o(?<!foo)$/));
assertEquals(["fno"], "fno".match(/^f.o(?<!foo)$/));
assertEquals(["foooo"], "foooo".match(/^foooo(?<=fo+)$/));
assertEquals(["foooo"], "foooo".match(/^foooo(?<=fo*)$/));
assertEquals(["abc", "abc"], /(abc\1)/.exec("abc"));
assertEquals(["abc", "abc"], /(abc\1)/.exec("abc\u1234"));
assertEquals(["abc", "abc"], /(abc\1)/i.exec("abc"));
assertEquals(["abc", "abc"], /(abc\1)/i.exec("abc\u1234"));
var oob_subject = "abcdefghijklmnabcdefghijklmn".substr(14);
assertNull(oob_subject.match(/(?=(abcdefghijklmn))(?<=\1)a/i));
assertNull(oob_subject.match(/(?=(abcdefghijklmn))(?<=\1)a/));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment