Commit 578e70a5 authored by sandholm@chromium.org's avatar sandholm@chromium.org

Improve regexp split, replace and test.

Review URL: http://codereview.chromium.org/5959002

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6065 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 83b28cd1
...@@ -32,7 +32,7 @@ const $RegExp = global.RegExp; ...@@ -32,7 +32,7 @@ const $RegExp = global.RegExp;
// A recursive descent parser for Patterns according to the grammar of // A recursive descent parser for Patterns according to the grammar of
// ECMA-262 15.10.1, with deviations noted below. // ECMA-262 15.10.1, with deviations noted below.
function DoConstructRegExp(object, pattern, flags, isConstructorCall) { function DoConstructRegExp(object, pattern, flags) {
// RegExp : Called as constructor; see ECMA-262, section 15.10.4. // RegExp : Called as constructor; see ECMA-262, section 15.10.4.
if (IS_REGEXP(pattern)) { if (IS_REGEXP(pattern)) {
if (!IS_UNDEFINED(flags)) { if (!IS_UNDEFINED(flags)) {
...@@ -80,7 +80,7 @@ function DoConstructRegExp(object, pattern, flags, isConstructorCall) { ...@@ -80,7 +80,7 @@ function DoConstructRegExp(object, pattern, flags, isConstructorCall) {
function RegExpConstructor(pattern, flags) { function RegExpConstructor(pattern, flags) {
if (%_IsConstructCall()) { if (%_IsConstructCall()) {
DoConstructRegExp(this, pattern, flags, true); DoConstructRegExp(this, pattern, flags);
} else { } else {
// RegExp : Called as function; see ECMA-262, section 15.10.3.1. // RegExp : Called as function; see ECMA-262, section 15.10.3.1.
if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) {
...@@ -104,9 +104,9 @@ function CompileRegExp(pattern, flags) { ...@@ -104,9 +104,9 @@ function CompileRegExp(pattern, flags) {
// the empty string. For compatibility with JSC, we match their // the empty string. For compatibility with JSC, we match their
// behavior. // behavior.
if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) {
DoConstructRegExp(this, 'undefined', flags, false); DoConstructRegExp(this, 'undefined', flags);
} else { } else {
DoConstructRegExp(this, pattern, flags, false); DoConstructRegExp(this, pattern, flags);
} }
} }
...@@ -150,12 +150,12 @@ function BuildResultFromMatchInfo(lastMatchInfo, s) { ...@@ -150,12 +150,12 @@ function BuildResultFromMatchInfo(lastMatchInfo, s) {
function RegExpExecNoTests(regexp, string, start) { function RegExpExecNoTests(regexp, string, start) {
// Must be called with RegExp, string and positive integer as arguments. // Must be called with RegExp, string and positive integer as arguments.
var matchInfo = DoRegExpExec(regexp, string, start); var matchInfo = %_RegExpExec(regexp, string, start, lastMatchInfo);
var result = null;
if (matchInfo !== null) { if (matchInfo !== null) {
result = BuildResultFromMatchInfo(matchInfo, string); lastMatchInfoOverride = null;
return BuildResultFromMatchInfo(matchInfo, string);
} }
return result; return null;
} }
...@@ -261,11 +261,14 @@ function RegExpTest(string) { ...@@ -261,11 +261,14 @@ function RegExpTest(string) {
%_StringCharCodeAt(this.source, 2) != 63) { // '?' %_StringCharCodeAt(this.source, 2) != 63) { // '?'
if (!%_ObjectEquals(regexp_key, this)) { if (!%_ObjectEquals(regexp_key, this)) {
regexp_key = this; regexp_key = this;
regexp_val = new $RegExp(this.source.substring(2, this.source.length), regexp_val = new $RegExp(SubString(this.source, 2, this.source.length),
(this.ignoreCase ? 'i' : '') (!this.ignoreCase
+ (this.multiline ? 'm' : '')); ? !this.multiline ? "" : "m"
: !this.multiline ? "i" : "im"));
}
if (%_RegExpExec(regexp_val, string, 0, lastMatchInfo) === null) {
return false;
} }
if (!regexp_val.test(string)) return false;
} }
%_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]); %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, string, lastIndex]);
// matchIndices is either null or the lastMatchInfo array. // matchIndices is either null or the lastMatchInfo array.
......
...@@ -159,7 +159,7 @@ function StringLocaleCompare(other) { ...@@ -159,7 +159,7 @@ function StringLocaleCompare(other) {
function StringMatch(regexp) { function StringMatch(regexp) {
var subject = TO_STRING_INLINE(this); var subject = TO_STRING_INLINE(this);
if (IS_REGEXP(regexp)) { if (IS_REGEXP(regexp)) {
if (!regexp.global) return regexp.exec(subject); if (!regexp.global) return RegExpExecNoTests(regexp, subject, 0);
%_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]); %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);
// lastMatchInfo is defined in regexp.js. // lastMatchInfo is defined in regexp.js.
return %StringMatch(subject, regexp, lastMatchInfo); return %StringMatch(subject, regexp, lastMatchInfo);
...@@ -245,17 +245,18 @@ function StringReplace(search, replace) { ...@@ -245,17 +245,18 @@ function StringReplace(search, replace) {
// Expand the $-expressions in the string and return a new string with // Expand the $-expressions in the string and return a new string with
// the result. // the result.
function ExpandReplacement(string, subject, matchInfo, builder) { function ExpandReplacement(string, subject, matchInfo, builder) {
var length = string.length;
var builder_elements = builder.elements;
var next = %StringIndexOf(string, '$', 0); var next = %StringIndexOf(string, '$', 0);
if (next < 0) { if (next < 0) {
builder.add(string); if (length > 0) builder_elements.push(string);
return; return;
} }
// Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102. // Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102.
var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match. var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match.
if (next > 0) builder.add(SubString(string, 0, next)); if (next > 0) builder_elements.push(SubString(string, 0, next));
var length = string.length;
while (true) { while (true) {
var expansion = '$'; var expansion = '$';
...@@ -264,7 +265,7 @@ function ExpandReplacement(string, subject, matchInfo, builder) { ...@@ -264,7 +265,7 @@ function ExpandReplacement(string, subject, matchInfo, builder) {
var peek = %_StringCharCodeAt(string, position); var peek = %_StringCharCodeAt(string, position);
if (peek == 36) { // $$ if (peek == 36) { // $$
++position; ++position;
builder.add('$'); builder_elements.push('$');
} else if (peek == 38) { // $& - match } else if (peek == 38) { // $& - match
++position; ++position;
builder.addSpecialSlice(matchInfo[CAPTURE0], builder.addSpecialSlice(matchInfo[CAPTURE0],
...@@ -301,14 +302,14 @@ function ExpandReplacement(string, subject, matchInfo, builder) { ...@@ -301,14 +302,14 @@ function ExpandReplacement(string, subject, matchInfo, builder) {
// digit capture references, we can only enter here when a // digit capture references, we can only enter here when a
// single digit capture reference is outside the range of // single digit capture reference is outside the range of
// captures. // captures.
builder.add('$'); builder_elements.push('$');
--position; --position;
} }
} else { } else {
builder.add('$'); builder_elements.push('$');
} }
} else { } else {
builder.add('$'); builder_elements.push('$');
} }
// Go the the next $ in the string. // Go the the next $ in the string.
...@@ -318,13 +319,15 @@ function ExpandReplacement(string, subject, matchInfo, builder) { ...@@ -318,13 +319,15 @@ function ExpandReplacement(string, subject, matchInfo, builder) {
// haven't reached the end, we need to append the suffix. // haven't reached the end, we need to append the suffix.
if (next < 0) { if (next < 0) {
if (position < length) { if (position < length) {
builder.add(SubString(string, position, length)); builder_elements.push(SubString(string, position, length));
} }
return; return;
} }
// Append substring between the previous and the next $ character. // Append substring between the previous and the next $ character.
builder.add(SubString(string, position, next)); if (next > position) {
builder_elements.push(SubString(string, position, next));
}
} }
}; };
...@@ -559,23 +562,22 @@ function StringSplit(separator, limit) { ...@@ -559,23 +562,22 @@ function StringSplit(separator, limit) {
var currentIndex = 0; var currentIndex = 0;
var startIndex = 0; var startIndex = 0;
var startMatch = 0;
var result = []; var result = [];
outer_loop: outer_loop:
while (true) { while (true) {
if (startIndex === length) { if (startIndex === length) {
result.push(subject.slice(currentIndex, length)); result.push(SubString(subject, currentIndex, length));
break; break;
} }
var matchInfo = splitMatch(separator, subject, currentIndex, startIndex); var matchInfo = DoRegExpExec(separator, subject, startIndex);
if (matchInfo == null || length === (startMatch = matchInfo[CAPTURE0])) {
if (IS_NULL(matchInfo)) { result.push(SubString(subject, currentIndex, length));
result.push(subject.slice(currentIndex, length));
break; break;
} }
var endIndex = matchInfo[CAPTURE1]; var endIndex = matchInfo[CAPTURE1];
// We ignore a zero-length match at the currentIndex. // We ignore a zero-length match at the currentIndex.
...@@ -584,7 +586,12 @@ function StringSplit(separator, limit) { ...@@ -584,7 +586,12 @@ function StringSplit(separator, limit) {
continue; continue;
} }
result.push(SubString(subject, currentIndex, matchInfo[CAPTURE0])); if (currentIndex + 1 == startMatch) {
result.push(%_StringCharAt(subject, currentIndex));
} else {
result.push(%_SubString(subject, currentIndex, startMatch));
}
if (result.length === limit) break; if (result.length === limit) break;
var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE; var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE;
...@@ -609,19 +616,6 @@ function StringSplit(separator, limit) { ...@@ -609,19 +616,6 @@ function StringSplit(separator, limit) {
} }
// ECMA-262 section 15.5.4.14
// Helper function used by split. This version returns the matchInfo
// instead of allocating a new array with basically the same information.
function splitMatch(separator, subject, current_index, start_index) {
var matchInfo = DoRegExpExec(separator, subject, start_index);
if (matchInfo == null) return null;
// Section 15.5.4.14 paragraph two says that we do not allow zero length
// matches at the end of the string.
if (matchInfo[CAPTURE0] === subject.length) return null;
return matchInfo;
}
// ECMA-262 section 15.5.4.15 // ECMA-262 section 15.5.4.15
function StringSubstring(start, end) { function StringSubstring(start, end) {
var s = TO_STRING_INLINE(this); var s = TO_STRING_INLINE(this);
...@@ -844,24 +838,21 @@ function ReplaceResultBuilder(str) { ...@@ -844,24 +838,21 @@ function ReplaceResultBuilder(str) {
ReplaceResultBuilder.prototype.add = function(str) { ReplaceResultBuilder.prototype.add = function(str) {
str = TO_STRING_INLINE(str); str = TO_STRING_INLINE(str);
if (str.length > 0) { if (str.length > 0) this.elements.push(str);
var elements = this.elements;
elements[elements.length] = str;
}
} }
ReplaceResultBuilder.prototype.addSpecialSlice = function(start, end) { ReplaceResultBuilder.prototype.addSpecialSlice = function(start, end) {
var len = end - start; var len = end - start;
if (start < 0 || len <= 0) return; if (start < 0 || len <= 0) return;
var elements = this.elements;
if (start < 0x80000 && len < 0x800) { if (start < 0x80000 && len < 0x800) {
elements[elements.length] = (start << 11) | len; this.elements.push((start << 11) | len);
} else { } else {
// 0 < len <= String::kMaxLength and Smi::kMaxValue >= String::kMaxLength, // 0 < len <= String::kMaxLength and Smi::kMaxValue >= String::kMaxLength,
// so -len is a smi. // so -len is a smi.
elements[elements.length] = -len; var elements = this.elements;
elements[elements.length] = start; elements.push(-len);
elements.push(start);
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment