Commit 498b074b authored by lrn@chromium.org's avatar lrn@chromium.org

Run string replace regexp with function in C++ code loop.

Reuses the result array to save on allocation.
Matches Safari's behavior.

Review URL: http://codereview.chromium.org/1109010

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4269 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 0737ec8d
......@@ -344,6 +344,7 @@ function RegExpToString() {
// on the captures array of the last successful match and the subject string
// of the last successful match.
function RegExpGetLastMatch() {
if (lastMatchInfoOverride) { return lastMatchInfoOverride[0]; }
var regExpSubject = LAST_SUBJECT(lastMatchInfo);
return SubString(regExpSubject,
lastMatchInfo[CAPTURE0],
......@@ -352,6 +353,11 @@ function RegExpGetLastMatch() {
function RegExpGetLastParen() {
if (lastMatchInfoOverride) {
var override = lastMatchInfoOverride;
if (override.length <= 3) return '';
return override[override.length - 3];
}
var length = NUMBER_OF_CAPTURES(lastMatchInfo);
if (length <= 2) return ''; // There were no captures.
// We match the SpiderMonkey behavior: return the substring defined by the
......@@ -368,17 +374,32 @@ function RegExpGetLastParen() {
function RegExpGetLeftContext() {
return SubString(LAST_SUBJECT(lastMatchInfo),
0,
lastMatchInfo[CAPTURE0]);
var start_index;
var subject;
if (!lastMatchInfoOverride) {
start_index = lastMatchInfo[CAPTURE0];
subject = LAST_SUBJECT(lastMatchInfo);
} else {
var override = lastMatchInfoOverride;
start_index = override[override.length - 2];
subject = override[override.length - 1];
}
return SubString(subject, 0, start_index);
}
function RegExpGetRightContext() {
var subject = LAST_SUBJECT(lastMatchInfo);
return SubString(subject,
lastMatchInfo[CAPTURE1],
subject.length);
var start_index;
var subject;
if (!lastMatchInfoOverride) {
start_index = lastMatchInfo[CAPTURE1];
subject = LAST_SUBJECT(lastMatchInfo);
} else {
var override = lastMatchInfoOverride;
subject = override[override.length - 1];
start_index = override[override.length - 2] + subject.length;
}
return SubString(subject, start_index, subject.length);
}
......@@ -387,6 +408,10 @@ function RegExpGetRightContext() {
// called with indices from 1 to 9.
function RegExpMakeCaptureGetter(n) {
return function() {
if (lastMatchInfoOverride) {
if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n];
return '';
}
var index = n * 2;
if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
var matchStart = lastMatchInfo[CAPTURE(index)];
......@@ -411,6 +436,12 @@ var lastMatchInfo = [
0, // REGEXP_FIRST_CAPTURE + 1
];
// Override last match info with an array of actual substrings.
// Used internally by replace regexp with function.
// The array has the format of an "apply" argument for a replacement
// function.
var lastMatchInfoOverride = null;
// -------------------------------------------------------------------
function SetupRegExp() {
......
This diff is collapsed.
......@@ -153,6 +153,7 @@ namespace internal {
/* Regular expressions */ \
F(RegExpCompile, 3, 1) \
F(RegExpExec, 4, 1) \
F(RegExpExecMultiple, 4, 1) \
\
/* Strings */ \
F(StringCharCodeAt, 2, 1) \
......
......@@ -405,97 +405,91 @@ function addCaptureString(builder, matchInfo, index) {
builder.addSpecialSlice(start, end);
};
// TODO(lrn): This array will survive indefinitely if replace is never
// called again. However, it will be empty, since the contents are cleared
// in the finally block.
var reusableReplaceArray = $Array(16);
// Helper function for replacing regular expressions with the result of a
// function application in String.prototype.replace. The function application
// must be interleaved with the regexp matching (contrary to ECMA-262
// 15.5.4.11) to mimic SpiderMonkey and KJS behavior when the function uses
// the static properties of the RegExp constructor. Example:
// 'abcd'.replace(/(.)/g, function() { return RegExp.$1; }
// should be 'abcd' and not 'dddd' (or anything else).
// function application in String.prototype.replace.
function StringReplaceRegExpWithFunction(subject, regexp, replace) {
var matchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(matchInfo)) return subject;
var result = new ReplaceResultBuilder(subject);
// There's at least one match. If the regexp is global, we have to loop
// over all matches. The loop is not in C++ code here like the one in
// RegExp.prototype.exec, because of the interleaved function application.
// Unfortunately, that means this code is nearly duplicated, here and in
// jsregexp.cc.
if (regexp.global) {
var previous = 0;
var startOfMatch;
if (NUMBER_OF_CAPTURES(matchInfo) == 2) {
// Both branches contain essentially the same loop except for the call
// to the replace function. The branch is put outside of the loop for
// speed
do {
startOfMatch = matchInfo[CAPTURE0];
result.addSpecialSlice(previous, startOfMatch);
previous = matchInfo[CAPTURE1];
var match = SubString(subject, startOfMatch, previous);
// Don't call directly to avoid exposing the built-in global object.
result.add(replace.call(null, match, startOfMatch, subject));
// Can't use matchInfo any more from here, since the function could
// overwrite it.
// Continue with the next match.
// Increment previous if we matched an empty string, as per ECMA-262
// 15.5.4.10.
if (previous == startOfMatch) {
// Add the skipped character to the output, if any.
if (previous < subject.length) {
result.addSpecialSlice(previous, previous + 1);
}
previous++;
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match
if (previous > subject.length) {
return result.generate();
}
}
matchInfo = DoRegExpExec(regexp, subject, previous);
} while (!IS_NULL(matchInfo));
var resultArray = reusableReplaceArray;
if (resultArray) {
reusableReplaceArray = null;
} else {
do {
startOfMatch = matchInfo[CAPTURE0];
result.addSpecialSlice(previous, startOfMatch);
previous = matchInfo[CAPTURE1];
result.add(ApplyReplacementFunction(replace, matchInfo, subject));
// Can't use matchInfo any more from here, since the function could
// overwrite it.
// Continue with the next match.
// Increment previous if we matched an empty string, as per ECMA-262
// 15.5.4.10.
if (previous == startOfMatch) {
// Add the skipped character to the output, if any.
if (previous < subject.length) {
result.addSpecialSlice(previous, previous + 1);
// Inside a nested replace (replace called from the replacement function
// of another replace) or we have failed to set the reusable array
// back due to an exception in a replacement function. Create a new
// array to use in the future, or until the original is written back.
resultArray = $Array(16);
}
try {
// Must handle exceptions thrown by the replace functions correctly,
// including unregistering global regexps.
var res = %RegExpExecMultiple(regexp,
subject,
lastMatchInfo,
resultArray);
regexp.lastIndex = 0;
if (IS_NULL(res)) {
// No matches at all.
return subject;
}
var len = res.length;
var i = 0;
if (NUMBER_OF_CAPTURES(lastMatchInfo) == 2) {
var match_start = 0;
while (i < len) {
var elem = res[i];
if (%_IsSmi(elem)) {
if (elem > 0) {
match_start = (elem >> 11) + (elem & 0x7ff);
} else {
match_start = res[++i] - elem;
}
} else {
var func_result = replace.call(null, elem, match_start, subject);
if (!IS_STRING(func_result)) func_result = TO_STRING(func_result);
res[i] = func_result;
match_start += elem.length;
}
previous++;
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match
if (previous > subject.length) {
return result.generate();
i++;
}
} else {
while (i < len) {
var elem = res[i];
if (!%_IsSmi(elem)) {
// elem must be an Array.
// Use the apply argument as backing for global RegExp properties.
lastMatchInfoOverride = elem;
var func_result = replace.apply(null, elem);
if (!IS_STRING(func_result)) func_result = TO_STRING(func_result);
res[i] = func_result;
}
i++;
}
matchInfo = DoRegExpExec(regexp, subject, previous);
} while (!IS_NULL(matchInfo));
}
var result = new ReplaceResultBuilder(subject, res);
return result.generate();
} finally {
lastMatchInfoOverride = null;
resultArray.length = 0;
reusableReplaceArray = resultArray;
}
// Tack on the final right substring after the last match.
result.addSpecialSlice(previous, subject.length);
} else { // Not a global regexp, no need to loop.
var matchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(matchInfo)) return subject;
var result = new ReplaceResultBuilder(subject);
result.addSpecialSlice(0, matchInfo[CAPTURE0]);
var endOfMatch = matchInfo[CAPTURE1];
result.add(ApplyReplacementFunction(replace, matchInfo, subject));
// Can't use matchInfo any more from here, since the function could
// overwrite it.
result.addSpecialSlice(endOfMatch, subject.length);
return result.generate();
}
return result.generate();
}
......@@ -894,8 +888,11 @@ function StringSup() {
// ReplaceResultBuilder support.
function ReplaceResultBuilder(str) {
this.__proto__ = void 0;
this.elements = new $Array();
if (%_ArgumentsLength() > 1) {
this.elements = %_Arguments(1);
} else {
this.elements = new $Array();
}
this.special_string = str;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment