Commit 85e085b7 authored by yangguo's avatar yangguo Committed by Commit bot

RegExp: remove last match info override.

With ES6 21.2.5.8, step 13, we no longer have to keep up the illusion
that matching and calling replace function is interleaved. This is
observable through unspec'ed static properties such as RegExp.$1.

Last match info not working yet.

R=littledan@chromium.org

Review URL: https://codereview.chromium.org/1418703003

Cr-Commit-Position: refs/heads/master@{#31593}
parent d43bbd06
......@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
var $regexpLastMatchInfoOverride;
(function(global, utils) {
%CheckIsBootstrapping();
......@@ -44,12 +42,6 @@ var RegExpLastMatchInfo = new InternalPackedArray(
0 // REGEXP_FIRST_CAPTURE + 1
);
// Override last match info with an array of actual substrings.
// Used internally by replace regexp with function.
// The array has the format of an "apply" argument for a replacement
// function.
$regexpLastMatchInfoOverride = null;
// -------------------------------------------------------------------
// A recursive descent parser for Patterns according to the grammar of
......@@ -114,7 +106,6 @@ function RegExpCompileJS(pattern, flags) {
function DoRegExpExec(regexp, string, index) {
var result = %_RegExpExec(regexp, string, index, RegExpLastMatchInfo);
if (result !== null) $regexpLastMatchInfoOverride = null;
return result;
}
......@@ -149,7 +140,6 @@ function RegExpExecNoTests(regexp, string, start) {
// Must be called with RegExp, string and positive integer as arguments.
var matchInfo = %_RegExpExec(regexp, string, start, RegExpLastMatchInfo);
if (matchInfo !== null) {
$regexpLastMatchInfoOverride = null;
// ES6 21.2.5.2.2 step 18.
if (FLAG_harmony_regexps && regexp.sticky) {
regexp.lastIndex = matchInfo[CAPTURE1];
......@@ -193,7 +183,6 @@ function RegExpExecJS(string) {
}
// Successful match.
$regexpLastMatchInfoOverride = null;
if (updateLastIndex) {
this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
}
......@@ -233,7 +222,6 @@ function RegExpTest(string) {
this.lastIndex = 0;
return false;
}
$regexpLastMatchInfoOverride = null;
this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
return true;
} else {
......@@ -254,7 +242,6 @@ function RegExpTest(string) {
this.lastIndex = 0;
return false;
}
$regexpLastMatchInfoOverride = null;
return true;
}
}
......@@ -291,9 +278,6 @@ function RegExpToString() {
// on the captures array of the last successful match and the subject string
// of the last successful match.
function RegExpGetLastMatch() {
if ($regexpLastMatchInfoOverride !== null) {
return OVERRIDE_MATCH($regexpLastMatchInfoOverride);
}
var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo);
return %_SubString(regExpSubject,
RegExpLastMatchInfo[CAPTURE0],
......@@ -302,11 +286,6 @@ function RegExpGetLastMatch() {
function RegExpGetLastParen() {
if ($regexpLastMatchInfoOverride) {
var override = $regexpLastMatchInfoOverride;
if (override.length <= 3) return '';
return override[override.length - 3];
}
var length = NUMBER_OF_CAPTURES(RegExpLastMatchInfo);
if (length <= 2) return ''; // There were no captures.
// We match the SpiderMonkey behavior: return the substring defined by the
......@@ -325,14 +304,8 @@ function RegExpGetLastParen() {
function RegExpGetLeftContext() {
var start_index;
var subject;
if (!$regexpLastMatchInfoOverride) {
start_index = RegExpLastMatchInfo[CAPTURE0];
subject = LAST_SUBJECT(RegExpLastMatchInfo);
} else {
var override = $regexpLastMatchInfoOverride;
start_index = OVERRIDE_POS(override);
subject = OVERRIDE_SUBJECT(override);
}
start_index = RegExpLastMatchInfo[CAPTURE0];
subject = LAST_SUBJECT(RegExpLastMatchInfo);
return %_SubString(subject, 0, start_index);
}
......@@ -340,15 +313,8 @@ function RegExpGetLeftContext() {
function RegExpGetRightContext() {
var start_index;
var subject;
if (!$regexpLastMatchInfoOverride) {
start_index = RegExpLastMatchInfo[CAPTURE1];
subject = LAST_SUBJECT(RegExpLastMatchInfo);
} else {
var override = $regexpLastMatchInfoOverride;
subject = OVERRIDE_SUBJECT(override);
var match = OVERRIDE_MATCH(override);
start_index = OVERRIDE_POS(override) + match.length;
}
start_index = RegExpLastMatchInfo[CAPTURE1];
subject = LAST_SUBJECT(RegExpLastMatchInfo);
return %_SubString(subject, start_index, subject.length);
}
......@@ -358,12 +324,6 @@ function RegExpGetRightContext() {
// called with indices from 1 to 9.
function RegExpMakeCaptureGetter(n) {
return function foo() {
if ($regexpLastMatchInfoOverride) {
if (n < $regexpLastMatchInfoOverride.length - 2) {
return OVERRIDE_CAPTURE($regexpLastMatchInfoOverride, n);
}
return '';
}
var index = n * 2;
if (index >= NUMBER_OF_CAPTURES(RegExpLastMatchInfo)) return '';
var matchStart = RegExpLastMatchInfo[CAPTURE(index)];
......
......@@ -157,7 +157,6 @@ function StringMatchJS(regexp) {
if (IS_REGEXP(regexp)) {
if (!regexp.global) return RegExpExecNoTests(regexp, subject, 0);
var result = %StringMatch(subject, regexp, RegExpLastMatchInfo);
if (result !== null) $regexpLastMatchInfoOverride = null;
regexp.lastIndex = 0;
return result;
}
......@@ -244,24 +243,8 @@ function StringReplace(search, replace) {
// Global regexp search, string replace.
search.lastIndex = 0;
if ($regexpLastMatchInfoOverride == null) {
return %StringReplaceGlobalRegExpWithString(
subject, search, replace, RegExpLastMatchInfo);
} else {
// We use this hack to detect whether StringReplaceRegExpWithString
// found at least one hit. In that case we need to remove any
// override.
var saved_subject = RegExpLastMatchInfo[LAST_SUBJECT_INDEX];
RegExpLastMatchInfo[LAST_SUBJECT_INDEX] = 0;
var answer = %StringReplaceGlobalRegExpWithString(
subject, search, replace, RegExpLastMatchInfo);
if (%_IsSmi(RegExpLastMatchInfo[LAST_SUBJECT_INDEX])) {
RegExpLastMatchInfo[LAST_SUBJECT_INDEX] = saved_subject;
} else {
$regexpLastMatchInfoOverride = null;
}
return answer;
}
return %StringReplaceGlobalRegExpWithString(
subject, search, replace, RegExpLastMatchInfo);
}
if (search.global) {
......@@ -438,22 +421,16 @@ function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) {
// input string and some replacements that were returned from the replace
// function.
var match_start = 0;
var override = new InternalPackedArray(null, 0, subject);
for (var i = 0; i < len; i++) {
var elem = res[i];
if (%_IsSmi(elem)) {
// Integers represent slices of the original string. Use these to
// get the offsets we need for the override array (so things like
// RegExp.leftContext work during the callback function.
// Integers represent slices of the original string.
if (elem > 0) {
match_start = (elem >> 11) + (elem & 0x7ff);
} else {
match_start = res[++i] - elem;
}
} else {
override[0] = elem;
override[1] = match_start;
$regexpLastMatchInfoOverride = override;
var func_result = replace(elem, match_start, subject);
// Overwrite the i'th element in the results with the string we got
// back from the callback function.
......@@ -467,7 +444,6 @@ function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) {
if (!%_IsSmi(elem)) {
// elem must be an Array.
// Use the apply argument as backing for global RegExp properties.
$regexpLastMatchInfoOverride = elem;
var func_result = %Apply(replace, UNDEFINED, elem, 0, elem.length);
// Overwrite the i'th element in the results with the string we got
// back from the callback function.
......@@ -475,7 +451,7 @@ function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) {
}
}
}
var result = %StringBuilderConcat(res, res.length, subject);
var result = %StringBuilderConcat(res, len, subject);
resultArray.length = 0;
reusableReplaceArray = resultArray;
return result;
......
......@@ -6410,7 +6410,9 @@ bool RegExpEngine::TooMuchRegExpCode(Handle<String> pattern) {
Object* RegExpResultsCache::Lookup(Heap* heap, String* key_string,
Object* key_pattern, ResultsCacheType type) {
Object* key_pattern,
FixedArray** last_match_cache,
ResultsCacheType type) {
FixedArray* cache;
if (!key_string->IsInternalizedString()) return Smi::FromInt(0);
if (type == STRING_SPLIT_SUBSTRINGS) {
......@@ -6426,23 +6428,25 @@ Object* RegExpResultsCache::Lookup(Heap* heap, String* key_string,
uint32_t hash = key_string->Hash();
uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) &
~(kArrayEntriesPerCacheEntry - 1));
if (cache->get(index + kStringOffset) == key_string &&
cache->get(index + kPatternOffset) == key_pattern) {
return cache->get(index + kArrayOffset);
}
index =
((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
if (cache->get(index + kStringOffset) == key_string &&
cache->get(index + kPatternOffset) == key_pattern) {
return cache->get(index + kArrayOffset);
if (cache->get(index + kStringOffset) != key_string ||
cache->get(index + kPatternOffset) != key_pattern) {
index =
((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
if (cache->get(index + kStringOffset) != key_string ||
cache->get(index + kPatternOffset) != key_pattern) {
return Smi::FromInt(0);
}
}
return Smi::FromInt(0);
*last_match_cache = FixedArray::cast(cache->get(index + kLastMatchOffset));
return cache->get(index + kArrayOffset);
}
void RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string,
Handle<Object> key_pattern,
Handle<FixedArray> value_array,
Handle<FixedArray> last_match_cache,
ResultsCacheType type) {
Factory* factory = isolate->factory();
Handle<FixedArray> cache;
......@@ -6464,6 +6468,7 @@ void RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string,
cache->set(index + kStringOffset, *key_string);
cache->set(index + kPatternOffset, *key_pattern);
cache->set(index + kArrayOffset, *value_array);
cache->set(index + kLastMatchOffset, *last_match_cache);
} else {
uint32_t index2 =
((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1));
......@@ -6471,13 +6476,16 @@ void RegExpResultsCache::Enter(Isolate* isolate, Handle<String> key_string,
cache->set(index2 + kStringOffset, *key_string);
cache->set(index2 + kPatternOffset, *key_pattern);
cache->set(index2 + kArrayOffset, *value_array);
cache->set(index2 + kLastMatchOffset, *last_match_cache);
} else {
cache->set(index2 + kStringOffset, Smi::FromInt(0));
cache->set(index2 + kPatternOffset, Smi::FromInt(0));
cache->set(index2 + kArrayOffset, Smi::FromInt(0));
cache->set(index2 + kLastMatchOffset, Smi::FromInt(0));
cache->set(index + kStringOffset, *key_string);
cache->set(index + kPatternOffset, *key_pattern);
cache->set(index + kArrayOffset, *value_array);
cache->set(index + kLastMatchOffset, *last_match_cache);
}
}
// If the array is a reasonably short list of substrings, convert it into a
......
......@@ -1666,12 +1666,12 @@ class RegExpResultsCache : public AllStatic {
// Attempt to retrieve a cached result. On failure, 0 is returned as a Smi.
// On success, the returned result is guaranteed to be a COW-array.
static Object* Lookup(Heap* heap, String* key_string, Object* key_pattern,
ResultsCacheType type);
FixedArray** last_match_out, ResultsCacheType type);
// Attempt to add value_array to the cache specified by type. On success,
// value_array is turned into a COW-array.
static void Enter(Isolate* isolate, Handle<String> key_string,
Handle<Object> key_pattern, Handle<FixedArray> value_array,
ResultsCacheType type);
Handle<FixedArray> last_match_cache, ResultsCacheType type);
static void Clear(FixedArray* cache);
static const int kRegExpResultsCacheSize = 0x100;
......@@ -1680,6 +1680,7 @@ class RegExpResultsCache : public AllStatic {
static const int kStringOffset = 0;
static const int kPatternOffset = 1;
static const int kArrayOffset = 2;
static const int kLastMatchOffset = 3;
};
} // namespace internal
......
......@@ -693,8 +693,10 @@ RUNTIME_FUNCTION(Runtime_StringSplit) {
RUNTIME_ASSERT(pattern_length > 0);
if (limit == 0xffffffffu) {
FixedArray* last_match_cache_unused;
Handle<Object> cached_answer(
RegExpResultsCache::Lookup(isolate->heap(), *subject, *pattern,
&last_match_cache_unused,
RegExpResultsCache::STRING_SPLIT_SUBSTRINGS),
isolate);
if (*cached_answer != Smi::FromInt(0)) {
......@@ -757,6 +759,7 @@ RUNTIME_FUNCTION(Runtime_StringSplit) {
if (limit == 0xffffffffu) {
if (result->HasFastObjectElements()) {
RegExpResultsCache::Enter(isolate, subject, pattern, elements,
isolate->factory()->empty_fixed_array(),
RegExpResultsCache::STRING_SPLIT_SUBSTRINGS);
}
}
......@@ -1017,23 +1020,23 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
static const int kMinLengthToCache = 0x1000;
if (subject_length > kMinLengthToCache) {
Handle<Object> cached_answer(
RegExpResultsCache::Lookup(isolate->heap(), *subject, regexp->data(),
RegExpResultsCache::REGEXP_MULTIPLE_INDICES),
isolate);
if (*cached_answer != Smi::FromInt(0)) {
FixedArray* last_match_cache;
Object* cached_answer = RegExpResultsCache::Lookup(
isolate->heap(), *subject, regexp->data(), &last_match_cache,
RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
if (cached_answer->IsFixedArray()) {
int capture_registers = (capture_count + 1) * 2;
int32_t* last_match = NewArray<int32_t>(capture_registers);
for (int i = 0; i < capture_registers; i++) {
last_match[i] = Smi::cast(last_match_cache->get(i))->value();
}
Handle<FixedArray> cached_fixed_array =
Handle<FixedArray>(FixedArray::cast(*cached_answer));
Handle<FixedArray>(FixedArray::cast(cached_answer));
// The cache FixedArray is a COW-array and can therefore be reused.
JSArray::SetContent(result_array, cached_fixed_array);
// The actual length of the result array is stored in the last element of
// the backing store (the backing FixedArray may have a larger capacity).
Object* cached_fixed_array_last_element =
cached_fixed_array->get(cached_fixed_array->length() - 1);
Smi* js_array_length = Smi::cast(cached_fixed_array_last_element);
result_array->set_length(js_array_length);
RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
NULL);
last_match);
DeleteArray(last_match);
return *result_array;
}
}
......@@ -1121,19 +1124,24 @@ static Object* SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
}
RegExpImpl::SetLastMatchInfo(last_match_array, subject, capture_count,
NULL);
global_cache.LastSuccessfulMatch());
if (subject_length > kMinLengthToCache) {
// Store the length of the result array into the last element of the
// backing FixedArray.
builder.EnsureCapacity(1);
Handle<FixedArray> fixed_array = builder.array();
fixed_array->set(fixed_array->length() - 1,
Smi::FromInt(builder.length()));
// Store the last successful match into the array for caching.
// TODO(yangguo): do not expose last match to JS and simplify caching.
int capture_registers = (capture_count + 1) * 2;
Handle<FixedArray> last_match_cache =
isolate->factory()->NewFixedArray(capture_registers);
int32_t* last_match = global_cache.LastSuccessfulMatch();
for (int i = 0; i < capture_registers; i++) {
last_match_cache->set(i, Smi::FromInt(last_match[i]));
}
Handle<FixedArray> result_array = builder.array();
result_array->Shrink(builder.length());
// Cache the result and turn the FixedArray into a COW array.
RegExpResultsCache::Enter(isolate, subject,
handle(regexp->data(), isolate), fixed_array,
RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
RegExpResultsCache::Enter(
isolate, subject, handle(regexp->data(), isolate), result_array,
last_match_cache, RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
}
return *builder.ToJSArray(result_array);
} else {
......@@ -1149,8 +1157,8 @@ RUNTIME_FUNCTION(Runtime_RegExpExecMultiple) {
HandleScope handles(isolate);
DCHECK(args.length() == 4);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
CONVERT_ARG_HANDLE_CHECKED(String, subject, 1);
CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2);
CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3);
RUNTIME_ASSERT(last_match_info->HasFastObjectElements());
......
......@@ -119,7 +119,7 @@ for (var i = 4; i < 10; ++i) {
// case the function uses the static properties of the regexp constructor.
re = /(.)/g;
function f() { return RegExp.$1; };
assertEquals('abcd', 'abcd'.replace(re, f));
assertEquals('dddd', 'abcd'.replace(re, f));
// lastParen where the last parenthesis didn't match.
assertEquals(["foo",undefined], /foo(?:a(x))?/.exec("foobx"),
......
......@@ -163,6 +163,30 @@ replaceTest("0a1b2cx", short, /(x)(?=(.))/g, function r(m, c1, c2, i, s) {
assertEquals(3, ctr, "replace(/x/g,func) num-match");
replaceTest("ABCD", "abcd", /(.)/g, function r(m, c1, i, s) {
assertEquals("d", RegExp.lastMatch);
assertEquals("d", RegExp.$1);
assertEquals("abc", RegExp.leftContext);
return m.toUpperCase();
});
var long = "";
while (long.length < 0x2000) {
long += String.fromCharCode(65 + Math.random() * 26);
}
for (var i = 0; i < 3; i++) {
replaceTest(long.toLowerCase(), long, /(..)/g, function r(m, c1, i, s) {
var expected = long.substring(0x1ffe, 0x2000);
assertEquals(expected, RegExp.lastMatch);
assertEquals(expected, RegExp.$1);
assertEquals(long.substring(0, 0x1ffe), RegExp.leftContext);
return m.toLowerCase();
});
}
// Test special cases of replacement parts longer than 1<<11.
var longstring = "xyzzy";
longstring = longstring + longstring;
......@@ -194,20 +218,6 @@ replaceTest("aundefinedbundefinedcundefined",
// Test nested calls to replace, including that it sets RegExp.$& correctly.
function replacer(m,i,s) {
assertEquals(m,RegExp['$&']);
return "[" + RegExp['$&'] + "-"
+ m.replace(/./g,"$&$&") + "-"
+ m.replace(/./g,function() { return RegExp['$&']; })
+ "-" + RegExp['$&'] + "]";
}
replaceTest("[ab-aabb-ab-b][az-aazz-az-z]",
"abaz", /a./g, replacer);
replaceTest("[ab-aabb-ab-b][az-aazz-az-z]",
"abaz", /a(.)/g, replacer);
var str = 'She sells seashells by the seashore.';
var re = /sh/g;
assertEquals('She sells sea$schells by the sea$schore.',
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment