Commit 90e9e99b authored by lrn@chromium.org's avatar lrn@chromium.org

RegExp replace with empty string optimization by Sandholm.

Review URL: http://codereview.chromium.org/2809048

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5021 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 8a446330
......@@ -420,7 +420,7 @@ enum NegativeZeroHandling {
kIgnoreNegativeZero
};
class GenericUnaryOpStub : public CodeStub {
public:
GenericUnaryOpStub(Token::Value op,
......
......@@ -96,6 +96,12 @@ Handle<String> Factory::NewStringFromTwoByte(Vector<const uc16> string,
}
Handle<String> Factory::NewRawAsciiString(int length,
PretenureFlag pretenure) {
CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(length, pretenure), String);
}
Handle<String> Factory::NewRawTwoByteString(int length,
PretenureFlag pretenure) {
CALL_HEAP_FUNCTION(Heap::AllocateRawTwoByteString(length, pretenure), String);
......
......@@ -95,12 +95,16 @@ class Factory : public AllStatic {
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
static Handle<String> NewStringFromTwoByte(Vector<const uc16> str,
static Handle<String> NewStringFromTwoByte(
Vector<const uc16> str,
PretenureFlag pretenure = NOT_TENURED);
// Allocates and partially initializes a TwoByte String. The characters of
// the string are uninitialized. Currently used in regexp code only, where
// they are pretenured.
// Allocates and partially initializes an ASCII or TwoByte String. The
// characters of the string are uninitialized. Currently used in regexp code
// only, where they are pretenured.
static Handle<String> NewRawAsciiString(
int length,
PretenureFlag pretenure = NOT_TENURED);
static Handle<String> NewRawTwoByteString(
int length,
PretenureFlag pretenure = NOT_TENURED);
......
......@@ -2284,6 +2284,134 @@ static Object* StringReplaceRegExpWithString(String* subject,
return *(builder.ToString());
}
template <typename ResultSeqString>
static Object* StringReplaceRegExpWithEmptyString(ResultSeqString* subject,
JSRegExp* regexp,
JSArray* last_match_info) {
ASSERT(subject->IsFlat());
HandleScope handles;
Handle<String> subject_handle(subject);
Handle<JSRegExp> regexp_handle(regexp);
Handle<JSArray> last_match_info_handle(last_match_info);
Handle<Object> match = RegExpImpl::Exec(regexp_handle,
subject_handle,
0,
last_match_info_handle);
if (match.is_null()) return Failure::Exception();
if (match->IsNull()) return *subject_handle;
ASSERT(last_match_info_handle->HasFastElements());
HandleScope loop_scope;
int start, end;
{
AssertNoAllocation match_info_array_is_not_in_a_handle;
FixedArray* match_info_array =
FixedArray::cast(last_match_info_handle->elements());
start = RegExpImpl::GetCapture(match_info_array, 0);
end = RegExpImpl::GetCapture(match_info_array, 1);
}
int length = subject->length();
int new_length = length - (end - start);
if (new_length == 0) {
return Heap::empty_string();
}
// TODO(sandholm) try to use types statically to determine this.
Handle<ResultSeqString> answer;
if (subject_handle->IsAsciiRepresentation()) {
answer =
Handle<ResultSeqString>::cast(Factory::NewRawAsciiString(new_length));
} else {
answer =
Handle<ResultSeqString>::cast(Factory::NewRawTwoByteString(new_length));
}
// If the regexp isn't global, only match once.
if (!regexp_handle->GetFlags().is_global()) {
if (start > 0) {
String::WriteToFlat(*subject_handle,
answer->GetChars(),
0,
start);
}
if (end < length) {
String::WriteToFlat(*subject_handle,
answer->GetChars() + start,
end,
length);
}
return *answer;
}
int prev = 0; // Index of end of last match.
int next = 0; // Start of next search (prev unless last match was empty).
int position = 0;
do {
if (prev < start) {
// Add substring subject[prev;start] to answer string.
String::WriteToFlat(*subject_handle,
answer->GetChars() + position,
prev,
start);
position += start - prev;
}
prev = end;
next = end;
// Continue from where the match ended, unless it was an empty match.
if (start == end) {
next++;
if (next > length) break;
}
match = RegExpImpl::Exec(regexp_handle,
subject_handle,
next,
last_match_info_handle);
if (match.is_null()) return Failure::Exception();
if (match->IsNull()) break;
ASSERT(last_match_info_handle->HasFastElements());
HandleScope loop_scope;
{
AssertNoAllocation match_info_array_is_not_in_a_handle;
FixedArray* match_info_array =
FixedArray::cast(last_match_info_handle->elements());
start = RegExpImpl::GetCapture(match_info_array, 0);
end = RegExpImpl::GetCapture(match_info_array, 1);
}
} while (true);
if (prev < length) {
// Add substring subject[prev;length] to answer string.
String::WriteToFlat(*subject_handle,
answer->GetChars() + position,
prev,
length);
position += length - prev;
}
if (position == 0) {
return Heap::empty_string();
}
// Shorten string and fill
int string_size = ResultSeqString::SizeFor(position);
int allocated_string_size = ResultSeqString::SizeFor(new_length);
int delta = allocated_string_size - string_size;
answer->set_length(position);
if (delta == 0) return *answer;
Address end_of_string = answer->address() + string_size;
Heap::CreateFillerObjectAt(end_of_string, delta);
return *answer;
}
static Object* Runtime_StringReplaceRegExpWithString(Arguments args) {
ASSERT(args.length() == 4);
......@@ -2311,6 +2439,18 @@ static Object* Runtime_StringReplaceRegExpWithString(Arguments args) {
ASSERT(last_match_info->HasFastElements());
if (replacement->length() == 0) {
if (subject->IsAsciiRepresentation()) {
return StringReplaceRegExpWithEmptyString(SeqAsciiString::cast(subject),
regexp,
last_match_info);
} else {
return StringReplaceRegExpWithEmptyString(SeqTwoByteString::cast(subject),
regexp,
last_match_info);
}
}
return StringReplaceRegExpWithString(subject,
regexp,
replacement,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment