Commit 498b074b authored by lrn@chromium.org's avatar lrn@chromium.org

Run string replace regexp with function in C++ code loop.

Reuses the result array to save on allocation.
Matches Safari's behavior.

Review URL: http://codereview.chromium.org/1109010

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@4269 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 0737ec8d
...@@ -344,6 +344,7 @@ function RegExpToString() { ...@@ -344,6 +344,7 @@ function RegExpToString() {
// on the captures array of the last successful match and the subject string // on the captures array of the last successful match and the subject string
// of the last successful match. // of the last successful match.
function RegExpGetLastMatch() { function RegExpGetLastMatch() {
if (lastMatchInfoOverride) { return lastMatchInfoOverride[0]; }
var regExpSubject = LAST_SUBJECT(lastMatchInfo); var regExpSubject = LAST_SUBJECT(lastMatchInfo);
return SubString(regExpSubject, return SubString(regExpSubject,
lastMatchInfo[CAPTURE0], lastMatchInfo[CAPTURE0],
...@@ -352,6 +353,11 @@ function RegExpGetLastMatch() { ...@@ -352,6 +353,11 @@ function RegExpGetLastMatch() {
function RegExpGetLastParen() { function RegExpGetLastParen() {
if (lastMatchInfoOverride) {
var override = lastMatchInfoOverride;
if (override.length <= 3) return '';
return override[override.length - 3];
}
var length = NUMBER_OF_CAPTURES(lastMatchInfo); var length = NUMBER_OF_CAPTURES(lastMatchInfo);
if (length <= 2) return ''; // There were no captures. if (length <= 2) return ''; // There were no captures.
// We match the SpiderMonkey behavior: return the substring defined by the // We match the SpiderMonkey behavior: return the substring defined by the
...@@ -368,17 +374,32 @@ function RegExpGetLastParen() { ...@@ -368,17 +374,32 @@ function RegExpGetLastParen() {
function RegExpGetLeftContext() { function RegExpGetLeftContext() {
return SubString(LAST_SUBJECT(lastMatchInfo), var start_index;
0, var subject;
lastMatchInfo[CAPTURE0]); if (!lastMatchInfoOverride) {
start_index = lastMatchInfo[CAPTURE0];
subject = LAST_SUBJECT(lastMatchInfo);
} else {
var override = lastMatchInfoOverride;
start_index = override[override.length - 2];
subject = override[override.length - 1];
}
return SubString(subject, 0, start_index);
} }
function RegExpGetRightContext() { function RegExpGetRightContext() {
var subject = LAST_SUBJECT(lastMatchInfo); var start_index;
return SubString(subject, var subject;
lastMatchInfo[CAPTURE1], if (!lastMatchInfoOverride) {
subject.length); start_index = lastMatchInfo[CAPTURE1];
subject = LAST_SUBJECT(lastMatchInfo);
} else {
var override = lastMatchInfoOverride;
subject = override[override.length - 1];
start_index = override[override.length - 2] + subject.length;
}
return SubString(subject, start_index, subject.length);
} }
...@@ -387,6 +408,10 @@ function RegExpGetRightContext() { ...@@ -387,6 +408,10 @@ function RegExpGetRightContext() {
// called with indices from 1 to 9. // called with indices from 1 to 9.
function RegExpMakeCaptureGetter(n) { function RegExpMakeCaptureGetter(n) {
return function() { return function() {
if (lastMatchInfoOverride) {
if (n < lastMatchInfoOverride.length - 2) return lastMatchInfoOverride[n];
return '';
}
var index = n * 2; var index = n * 2;
if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return '';
var matchStart = lastMatchInfo[CAPTURE(index)]; var matchStart = lastMatchInfo[CAPTURE(index)];
...@@ -411,6 +436,12 @@ var lastMatchInfo = [ ...@@ -411,6 +436,12 @@ var lastMatchInfo = [
0, // REGEXP_FIRST_CAPTURE + 1 0, // REGEXP_FIRST_CAPTURE + 1
]; ];
// Override last match info with an array of actual substrings.
// Used internally by replace regexp with function.
// The array has the format of an "apply" argument for a replacement
// function.
var lastMatchInfoOverride = null;
// ------------------------------------------------------------------- // -------------------------------------------------------------------
function SetupRegExp() { function SetupRegExp() {
......
This diff is collapsed.
...@@ -153,6 +153,7 @@ namespace internal { ...@@ -153,6 +153,7 @@ namespace internal {
/* Regular expressions */ \ /* Regular expressions */ \
F(RegExpCompile, 3, 1) \ F(RegExpCompile, 3, 1) \
F(RegExpExec, 4, 1) \ F(RegExpExec, 4, 1) \
F(RegExpExecMultiple, 4, 1) \
\ \
/* Strings */ \ /* Strings */ \
F(StringCharCodeAt, 2, 1) \ F(StringCharCodeAt, 2, 1) \
......
...@@ -405,97 +405,91 @@ function addCaptureString(builder, matchInfo, index) { ...@@ -405,97 +405,91 @@ function addCaptureString(builder, matchInfo, index) {
builder.addSpecialSlice(start, end); builder.addSpecialSlice(start, end);
}; };
// TODO(lrn): This array will survive indefinitely if replace is never
// called again. However, it will be empty, since the contents are cleared
// in the finally block.
var reusableReplaceArray = $Array(16);
// Helper function for replacing regular expressions with the result of a // Helper function for replacing regular expressions with the result of a
// function application in String.prototype.replace. The function application // function application in String.prototype.replace.
// must be interleaved with the regexp matching (contrary to ECMA-262
// 15.5.4.11) to mimic SpiderMonkey and KJS behavior when the function uses
// the static properties of the RegExp constructor. Example:
// 'abcd'.replace(/(.)/g, function() { return RegExp.$1; }
// should be 'abcd' and not 'dddd' (or anything else).
function StringReplaceRegExpWithFunction(subject, regexp, replace) { function StringReplaceRegExpWithFunction(subject, regexp, replace) {
var matchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(matchInfo)) return subject;
var result = new ReplaceResultBuilder(subject);
// There's at least one match. If the regexp is global, we have to loop
// over all matches. The loop is not in C++ code here like the one in
// RegExp.prototype.exec, because of the interleaved function application.
// Unfortunately, that means this code is nearly duplicated, here and in
// jsregexp.cc.
if (regexp.global) { if (regexp.global) {
var previous = 0; var resultArray = reusableReplaceArray;
var startOfMatch; if (resultArray) {
if (NUMBER_OF_CAPTURES(matchInfo) == 2) { reusableReplaceArray = null;
// Both branches contain essentially the same loop except for the call
// to the replace function. The branch is put outside of the loop for
// speed
do {
startOfMatch = matchInfo[CAPTURE0];
result.addSpecialSlice(previous, startOfMatch);
previous = matchInfo[CAPTURE1];
var match = SubString(subject, startOfMatch, previous);
// Don't call directly to avoid exposing the built-in global object.
result.add(replace.call(null, match, startOfMatch, subject));
// Can't use matchInfo any more from here, since the function could
// overwrite it.
// Continue with the next match.
// Increment previous if we matched an empty string, as per ECMA-262
// 15.5.4.10.
if (previous == startOfMatch) {
// Add the skipped character to the output, if any.
if (previous < subject.length) {
result.addSpecialSlice(previous, previous + 1);
}
previous++;
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match
if (previous > subject.length) {
return result.generate();
}
}
matchInfo = DoRegExpExec(regexp, subject, previous);
} while (!IS_NULL(matchInfo));
} else { } else {
do { // Inside a nested replace (replace called from the replacement function
startOfMatch = matchInfo[CAPTURE0]; // of another replace) or we have failed to set the reusable array
result.addSpecialSlice(previous, startOfMatch); // back due to an exception in a replacement function. Create a new
previous = matchInfo[CAPTURE1]; // array to use in the future, or until the original is written back.
result.add(ApplyReplacementFunction(replace, matchInfo, subject)); resultArray = $Array(16);
// Can't use matchInfo any more from here, since the function could }
// overwrite it. try {
// Continue with the next match. // Must handle exceptions thrown by the replace functions correctly,
// Increment previous if we matched an empty string, as per ECMA-262 // including unregistering global regexps.
// 15.5.4.10. var res = %RegExpExecMultiple(regexp,
if (previous == startOfMatch) { subject,
// Add the skipped character to the output, if any. lastMatchInfo,
if (previous < subject.length) { resultArray);
result.addSpecialSlice(previous, previous + 1); regexp.lastIndex = 0;
if (IS_NULL(res)) {
// No matches at all.
return subject;
}
var len = res.length;
var i = 0;
if (NUMBER_OF_CAPTURES(lastMatchInfo) == 2) {
var match_start = 0;
while (i < len) {
var elem = res[i];
if (%_IsSmi(elem)) {
if (elem > 0) {
match_start = (elem >> 11) + (elem & 0x7ff);
} else {
match_start = res[++i] - elem;
}
} else {
var func_result = replace.call(null, elem, match_start, subject);
if (!IS_STRING(func_result)) func_result = TO_STRING(func_result);
res[i] = func_result;
match_start += elem.length;
} }
previous++; i++;
// Per ECMA-262 15.10.6.2, if the previous index is greater than the }
// string length, there is no match } else {
if (previous > subject.length) { while (i < len) {
return result.generate(); var elem = res[i];
if (!%_IsSmi(elem)) {
// elem must be an Array.
// Use the apply argument as backing for global RegExp properties.
lastMatchInfoOverride = elem;
var func_result = replace.apply(null, elem);
if (!IS_STRING(func_result)) func_result = TO_STRING(func_result);
res[i] = func_result;
} }
i++;
} }
matchInfo = DoRegExpExec(regexp, subject, previous); }
} while (!IS_NULL(matchInfo)); var result = new ReplaceResultBuilder(subject, res);
return result.generate();
} finally {
lastMatchInfoOverride = null;
resultArray.length = 0;
reusableReplaceArray = resultArray;
} }
// Tack on the final right substring after the last match.
result.addSpecialSlice(previous, subject.length);
} else { // Not a global regexp, no need to loop. } else { // Not a global regexp, no need to loop.
var matchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(matchInfo)) return subject;
var result = new ReplaceResultBuilder(subject);
result.addSpecialSlice(0, matchInfo[CAPTURE0]); result.addSpecialSlice(0, matchInfo[CAPTURE0]);
var endOfMatch = matchInfo[CAPTURE1]; var endOfMatch = matchInfo[CAPTURE1];
result.add(ApplyReplacementFunction(replace, matchInfo, subject)); result.add(ApplyReplacementFunction(replace, matchInfo, subject));
// Can't use matchInfo any more from here, since the function could // Can't use matchInfo any more from here, since the function could
// overwrite it. // overwrite it.
result.addSpecialSlice(endOfMatch, subject.length); result.addSpecialSlice(endOfMatch, subject.length);
return result.generate();
} }
return result.generate();
} }
...@@ -894,8 +888,11 @@ function StringSup() { ...@@ -894,8 +888,11 @@ function StringSup() {
// ReplaceResultBuilder support. // ReplaceResultBuilder support.
function ReplaceResultBuilder(str) { function ReplaceResultBuilder(str) {
this.__proto__ = void 0; if (%_ArgumentsLength() > 1) {
this.elements = new $Array(); this.elements = %_Arguments(1);
} else {
this.elements = new $Array();
}
this.special_string = str; this.special_string = str;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment