Commit 57d202d8 authored by yangguo's avatar yangguo Committed by Commit bot

[regexp] correctly advance zero length matches for global/unicode.

R=erik.corry@gmail.com
BUG=v8:2952
LOG=N

Review URL: https://codereview.chromium.org/1630633002

Cr-Commit-Position: refs/heads/master@{#33550}
parent 66e2a786
......@@ -805,9 +805,12 @@ Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
__ cmp(current_input_offset(), Operand::Zero());
__ b(eq, &exit_label_);
// Advance current position after a zero-length match.
Label advance;
__ bind(&advance);
__ add(current_input_offset(),
current_input_offset(),
Operand((mode_ == UC16) ? 2 : 1));
if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
}
__ b(&load_char_start_regexp);
......
......@@ -998,9 +998,12 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
// Offset from the end is zero if we already reached the end.
__ Cbz(current_input_offset(), &return_w0);
// Advance current position after a zero-length match.
Label advance;
__ bind(&advance);
__ Add(current_input_offset(),
current_input_offset(),
Operand((mode_ == UC16) ? 2 : 1));
if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
}
__ B(&load_char_start_regexp);
......
......@@ -829,13 +829,15 @@ Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
__ test(edi, edi);
__ j(zero, &exit_label_, Label::kNear);
// Advance current position after a zero-length match.
Label advance;
__ bind(&advance);
if (mode_ == UC16) {
__ add(edi, Immediate(2));
} else {
__ inc(edi);
}
if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
}
__ jmp(&load_char_start_regexp);
} else {
__ mov(eax, Immediate(SUCCESS));
......
......@@ -47,7 +47,10 @@ int32_t* RegExpImpl::GlobalCache::FetchNext() {
register_array_size_);
} else {
int last_start_index = last_match[0];
if (last_start_index == last_end_index) last_end_index++;
if (last_start_index == last_end_index) {
// Zero-length match. Advance by one code point.
last_end_index = AdvanceZeroLength(last_end_index);
}
if (last_end_index > subject_->length()) {
num_matches_ = 0; // Signal failed match.
return NULL;
......
......@@ -638,7 +638,6 @@ Handle<JSArray> RegExpImpl::SetLastMatchInfo(Handle<JSArray> last_match_info,
RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
Handle<String> subject,
bool is_global,
Isolate* isolate)
: register_array_(NULL),
register_array_size_(0),
......@@ -663,7 +662,8 @@ RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
}
}
if (is_global && !interpreted) {
DCHECK_NE(0, regexp->GetFlags() & JSRegExp::kGlobal);
if (!interpreted) {
register_array_size_ =
Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
max_matches_ = register_array_size_ / registers_per_match_;
......@@ -692,6 +692,16 @@ RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
last_match[1] = 0;
}
int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) {
if ((regexp_->GetFlags() & JSRegExp::kUnicode) != 0 &&
last_index + 1 < subject_->length() &&
unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
// Advance over the surrogate pair.
return last_index + 2;
}
return last_index + 1;
}
// -------------------------------------------------------------------
// Implementation of the Irregexp regular expression engine.
......@@ -6623,6 +6633,7 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
bool ignore_case = flags & JSRegExp::kIgnoreCase;
bool is_sticky = flags & JSRegExp::kSticky;
bool is_global = flags & JSRegExp::kGlobal;
bool is_unicode = flags & JSRegExp::kUnicode;
RegExpCompiler compiler(isolate, zone, data->capture_count, flags,
is_one_byte);
......@@ -6742,10 +6753,13 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
}
if (is_global) {
macro_assembler.set_global_mode(
(data->tree->min_match() > 0)
? RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK
: RegExpMacroAssembler::GLOBAL);
RegExpMacroAssembler::GlobalMode mode = RegExpMacroAssembler::GLOBAL;
if (data->tree->min_match() > 0) {
mode = RegExpMacroAssembler::GLOBAL_NO_ZERO_LENGTH_CHECK;
} else if (is_unicode) {
mode = RegExpMacroAssembler::GLOBAL_UNICODE;
}
macro_assembler.set_global_mode(mode);
}
return compiler.Assemble(&macro_assembler,
......
......@@ -122,7 +122,6 @@ class RegExpImpl {
public:
GlobalCache(Handle<JSRegExp> regexp,
Handle<String> subject,
bool is_global,
Isolate* isolate);
INLINE(~GlobalCache());
......@@ -138,6 +137,8 @@ class RegExpImpl {
INLINE(bool HasException()) { return num_matches_ < 0; }
private:
int AdvanceZeroLength(int last_index);
int num_matches_;
int max_matches_;
int current_match_index_;
......
......@@ -808,9 +808,12 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ Branch(&exit_label_, eq, current_input_offset(),
Operand(zero_reg));
// Advance current position after a zero-length match.
Label advance;
__ bind(&advance);
__ Addu(current_input_offset(),
current_input_offset(),
Operand((mode_ == UC16) ? 2 : 1));
if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
}
__ Branch(&load_char_start_regexp);
......
......@@ -848,9 +848,12 @@ Handle<HeapObject> RegExpMacroAssemblerMIPS::GetCode(Handle<String> source) {
__ Branch(&exit_label_, eq, current_input_offset(),
Operand(zero_reg));
// Advance current position after a zero-length match.
Label advance;
__ bind(&advance);
__ Daddu(current_input_offset(),
current_input_offset(),
Operand((mode_ == UC16) ? 2 : 1));
if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
}
__ Branch(&load_char_start_regexp);
......
......@@ -166,21 +166,27 @@ class RegExpMacroAssembler {
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
bool slow_safe() { return slow_safe_compiler_; }
enum GlobalMode { NOT_GLOBAL, GLOBAL, GLOBAL_NO_ZERO_LENGTH_CHECK };
enum GlobalMode {
NOT_GLOBAL,
GLOBAL_NO_ZERO_LENGTH_CHECK,
GLOBAL,
GLOBAL_UNICODE
};
// Set whether the regular expression has the global flag. Exiting due to
// a failure in a global regexp may still mean success overall.
inline void set_global_mode(GlobalMode mode) { global_mode_ = mode; }
inline bool global() { return global_mode_ != NOT_GLOBAL; }
inline bool global_with_zero_length_check() {
return global_mode_ == GLOBAL;
return global_mode_ == GLOBAL || global_mode_ == GLOBAL_UNICODE;
}
inline bool global_unicode() { return global_mode_ == GLOBAL_UNICODE; }
Isolate* isolate() const { return isolate_; }
Zone* zone() const { return zone_; }
private:
bool slow_safe_compiler_;
bool global_mode_;
GlobalMode global_mode_;
Isolate* isolate_;
Zone* zone_;
};
......
......@@ -877,11 +877,14 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
__ testp(rdi, rdi);
__ j(zero, &exit_label_, Label::kNear);
// Advance current position after a zero-length match.
Label advance;
__ bind(&advance);
if (mode_ == UC16) {
__ addq(rdi, Immediate(2));
} else {
__ incq(rdi);
}
if (global_unicode()) CheckNotInSurrogatePair(0, &advance);
}
__ jmp(&load_char_start_regexp);
......
......@@ -492,7 +492,7 @@ MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithString(
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return isolate->heap()->exception();
int32_t* current_match = global_cache.FetchNext();
......@@ -568,7 +568,7 @@ MUST_USE_RESULT static Object* StringReplaceGlobalRegExpWithEmptyString(
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return isolate->heap()->exception();
int32_t* current_match = global_cache.FetchNext();
......@@ -876,7 +876,7 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return isolate->heap()->exception();
// Ensured in Runtime_RegExpExecMultiple.
......
......@@ -341,7 +341,7 @@ RUNTIME_FUNCTION(Runtime_StringMatch) {
RUNTIME_ASSERT(regexp_info->HasFastObjectElements());
RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate);
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return isolate->heap()->exception();
int capture_count = regexp->CaptureCount();
......
// Copyright 2016 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-unicode-regexps
var L = "\ud800";
var T = "\udc00";
var x = "x";
var r = /()/g; // Global, but not unicode.
// Zero-length matches do not advance lastIndex.
assertEquals(["", ""], r.exec(L + T + L + T));
assertEquals(0, r.lastIndex);
r.lastIndex = 1;
assertEquals(["", ""], r.exec(L + T + L + T));
assertEquals(1, r.lastIndex);
var u = /()/ug; // Global and unicode.
// Zero-length matches do not advance lastIndex.
assertEquals(["", ""], u.exec(L + T + L + T));
assertEquals(0, u.lastIndex);
u.lastIndex = 1;
assertEquals(["", ""], u.exec(L + T + L + T));
assertEquals(0, u.lastIndex);
// However, with repeating matches, lastIndex does not matter.
// We do advance from match to match.
r.lastIndex = 2;
assertEquals(x + L + x + T + x + L + x + T + x,
(L + T + L + T).replace(r, "x"));
// With unicode flag, we advance code point by code point.
u.lastIndex = 3;
assertEquals(x + L + T + x + L + T + x,
(L + T + L + T).replace(u, "x"));
// Test that exhausting the global match cache is fine.
assertEquals((x + L + T).repeat(1000) + x,
(L + T).repeat(1000).replace(u, "x"));
// Same thing for RegExp.prototype.match.
r.lastIndex = 1;
assertEquals(["","","","",""], (L + T + L + T).match(r));
r.lastIndex = 2;
assertEquals(["","","","",""], (L + T + L + T).match(r));
u.lastIndex = 1;
assertEquals(["","",""], (L + T + L + T).match(u));
u.lastIndex = 2;
assertEquals(["","",""], (L + T + L + T).match(u));
var expected = [];
for (var i = 0; i <= 1000; i++) expected.push("");
assertEquals(expected, (L + T).repeat(1000).match(u));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment