Commit e2af4529 authored by lrn@chromium.org's avatar lrn@chromium.org

String.replace implemented in C++.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1506 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 30af089b
......@@ -600,7 +600,6 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
if (previous_index > subject->length() || previous_index < 0) {
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match.
matches = Factory::null_value();
return result;
} else {
#ifdef DEBUG
......@@ -666,81 +665,28 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
bool rc;
Handle<String> original_subject = subject;
if (FLAG_regexp_native) {
#ifndef ARM
Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii));
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject->length(shape);
if (shape.IsCons()) {
subject = Handle<String>(ConsString::cast(*subject)->first());
} else if (shape.IsSliced()) {
SlicedString* slice = SlicedString::cast(*subject);
start_offset += slice->start();
end_offset += slice->start();
subject = Handle<String>(slice->buffer());
}
// String is now either Sequential or External
StringShape flatshape(*subject);
bool is_ascii = flatshape.IsAsciiRepresentation();
int char_size_shift = is_ascii ? 0 : 1;
RegExpMacroAssemblerIA32::Result res;
if (flatshape.IsExternal()) {
const byte* address;
if (is_ascii) {
ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
} else {
ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
}
res = RegExpMacroAssemblerIA32::Execute(
*code,
const_cast<Address*>(&address),
start_offset << char_size_shift,
end_offset << char_size_shift,
offsets_vector,
previous_index == 0);
} else { // Sequential string
ASSERT(StringShape(*subject).IsSequential());
Address char_address =
is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
: SeqTwoByteString::cast(*subject)->GetCharsAddress();
int byte_offset = char_address - reinterpret_cast<Address>(*subject);
res = RegExpMacroAssemblerIA32::Execute(
*code,
reinterpret_cast<Address*>(subject.location()),
byte_offset + (start_offset << char_size_shift),
byte_offset + (end_offset << char_size_shift),
offsets_vector,
previous_index == 0);
}
if (UseNativeRegexp()) {
#ifdef ARM
UNREACHABLE();
#else
Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
RegExpMacroAssemblerIA32::Result res =
RegExpMacroAssemblerIA32::Match(code,
subject,
offsets_vector,
offsets_vector_length,
previous_index);
if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
ASSERT(Top::has_pending_exception());
return Handle<Object>::null();
}
rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS
|| res == RegExpMacroAssemblerIA32::FAILURE);
if (rc) {
// Capture values are relative to start_offset only.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
} else {
#else
// Unimplemented on ARM, fall through to bytecode.
}
{
rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
#endif
} else {
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
offsets_vector[i] = -1;
}
......@@ -757,6 +703,9 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
}
FixedArray* array = last_match_info->elements();
// Clear previous input/string values to avoid potential memory leak.
SetLastSubject(array, Heap::empty_string());
SetLastInput(array, Heap::empty_string());
ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
for (int i = 0; i < number_of_capture_registers; i += 2) {
......@@ -4716,9 +4665,9 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
NodeInfo info = *node->info();
if (FLAG_regexp_native) {
if (RegExpImpl::UseNativeRegexp()) {
#ifdef ARM
// Unimplemented, fall-through to bytecode implementation.
UNREACHABLE();
#else // IA32
RegExpMacroAssemblerIA32::Mode mode;
if (is_ascii) {
......
......@@ -36,6 +36,13 @@ class RegExpMacroAssembler;
class RegExpImpl {
public:
static inline bool UseNativeRegexp() {
#ifdef ARM
return false;
#else
return FLAG_regexp_native;
#endif
}
// Creates a regular expression literal in the old space.
// This function calls the garbage collector if necessary.
static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor,
......@@ -117,6 +124,7 @@ class RegExpImpl {
static const int kFirstCapture = 1;
static const int kLastMatchOverhead = 3;
// Used to access the lastMatchInfo array.
static int GetCapture(FixedArray* array, int index) {
return Smi::cast(array->get(index + kFirstCapture))->value();
}
......@@ -139,12 +147,11 @@ class RegExpImpl {
array->set(index + kFirstCapture, Smi::FromInt(to));
}
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
static int GetLastCaptureCount(FixedArray* array) {
return Smi::cast(array->get(kLastCaptureCount))->value();
}
// For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray* re);
static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
static int IrregexpNumberOfCaptures(FixedArray* re);
......@@ -152,6 +159,13 @@ class RegExpImpl {
static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
......@@ -171,10 +185,6 @@ class RegExpImpl {
int character_position,
int utf8_position);
// Used to access the lastMatchInfo array.
static int GetLastCaptureCount(FixedArray* array) {
return Smi::cast(array->get(kLastCaptureCount))->value();
}
// A one element cache of the last utf8_subject string and its length. The
// subject JS String object is cached in the heap. We also cache a
// translation between position and utf8 position.
......
......@@ -30,6 +30,7 @@
#include "disassembler.h"
#include "disasm.h"
#include "macro-assembler.h"
#include "jsregexp.h"
namespace v8 { namespace internal {
......@@ -696,11 +697,8 @@ void JSRegExp::JSRegExpVerify() {
break;
}
case JSRegExp::IRREGEXP: {
bool is_native = FLAG_regexp_native;
#ifdef ARM
// No native regexp on arm yet.
is_native = false;
#endif
bool is_native = RegExpImpl::UseNativeRegexp();
FixedArray* arr = FixedArray::cast(data());
Object* ascii_data = arr->get(JSRegExp::kIrregexpASCIICodeIndex);
ASSERT(ascii_data->IsTheHole()
......
......@@ -2316,6 +2316,19 @@ JSRegExp::Type JSRegExp::TypeTag() {
}
int JSRegExp::CaptureCount() {
switch (TypeTag()) {
case ATOM:
return 0;
case IRREGEXP:
return Smi::cast(DataAt(kIrregexpCaptureCountIndex))->value();
default:
UNREACHABLE();
return -1;
}
}
JSRegExp::Flags JSRegExp::GetFlags() {
ASSERT(this->data()->IsFixedArray());
Object* data = this->data();
......
......@@ -3003,6 +3003,7 @@ class JSRegExp: public JSObject {
DECL_ACCESSORS(data, Object)
inline Type TypeTag();
inline int CaptureCount();
inline Flags GetFlags();
inline String* Pattern();
inline Object* DataAt(int index);
......
......@@ -200,9 +200,9 @@ function RegExpExec(string) {
// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
// that test is defined in terms of String.prototype.exec even if the method is
// called on a non-RegExp object. However, it probably means the original
// value of String.prototype.exec, which is what everybody else implements.
// that test is defined in terms of String.prototype.exec. However, it probably
// means the original value of String.prototype.exec, which is what everybody
// else implements.
function RegExpTest(string) {
if (!IS_REGEXP(this)) {
throw MakeTypeError('method_called_on_incompatible',
......
......@@ -978,6 +978,79 @@ void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
}
RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Match(
Handle<Code> regexp_code,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index) {
StringShape shape(*subject);
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject->length(shape);
if (shape.IsCons()) {
subject =
Handle<String>(String::cast(ConsString::cast(*subject)->first()));
} else if (shape.IsSliced()) {
SlicedString* slice = SlicedString::cast(*subject);
start_offset += slice->start();
end_offset += slice->start();
subject = Handle<String>(String::cast(slice->buffer()));
}
// String is now either Sequential or External
StringShape flatshape(*subject);
bool is_ascii = flatshape.IsAsciiRepresentation();
int char_size_shift = is_ascii ? 0 : 1;
RegExpMacroAssemblerIA32::Result res;
if (flatshape.IsExternal()) {
const byte* address;
if (is_ascii) {
ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
} else {
ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
}
res = Execute(*regexp_code,
const_cast<Address*>(&address),
start_offset << char_size_shift,
end_offset << char_size_shift,
offsets_vector,
previous_index == 0);
} else { // Sequential string
ASSERT(StringShape(*subject).IsSequential());
Address char_address =
is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
: SeqTwoByteString::cast(*subject)->GetCharsAddress();
int byte_offset = char_address - reinterpret_cast<Address>(*subject);
res = Execute(*regexp_code,
reinterpret_cast<Address*>(subject.location()),
byte_offset + (start_offset << char_size_shift),
byte_offset + (end_offset << char_size_shift),
offsets_vector,
previous_index == 0);
}
if (res == RegExpMacroAssemblerIA32::SUCCESS) {
// Capture values are relative to start_offset only.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
return res;
}
// Private methods:
......
......@@ -113,6 +113,12 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static Result Match(Handle<Code> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index);
static Result Execute(Code* code,
Address* input,
int start_offset,
......
This diff is collapsed.
......@@ -146,6 +146,7 @@ namespace v8 { namespace internal {
F(StringLastIndexOf, 3) \
F(StringLocaleCompare, 2) \
F(StringSlice, 3) \
F(StringReplaceRegExpWithString, 4) \
\
/* Numbers */ \
F(NumberToRadixString, 2) \
......
......@@ -237,50 +237,15 @@ function StringReplace(search, replace) {
// lastMatchArray without erroneously affecting the properties on the global
// RegExp object.
var reusableMatchInfo = [2, -1, -1, "", ""];
var reusableMatchArray = [ void 0 ];
// Helper function for regular expressions in String.prototype.replace.
function StringReplaceRegExp(subject, regexp, replace) {
// Compute an array of matches; each match is really a list of
// captures - pairs of (start, end) indexes into the subject string.
var matches;
if (regexp.global) {
matches = DoRegExpExecGlobal(regexp, subject);
if (matches.length == 0) return subject;
} else {
var lastMatchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(lastMatchInfo)) return subject;
reusableMatchArray[0] = lastMatchInfo;
matches = reusableMatchArray;
}
// Determine the number of matches.
var length = matches.length;
// Build the resulting string of subject slices and replacements.
var result = new ReplaceResultBuilder(subject);
var previous = 0;
// The caller of StringReplaceRegExp must ensure that replace is not a
// function.
replace = ToString(replace);
if (%StringIndexOf(replace, "$", 0) < 0) {
for (var i = 0; i < length; i++) {
var matchInfo = matches[i];
result.addSpecialSlice(previous, matchInfo[CAPTURE0]);
result.add(replace);
previous = matchInfo[CAPTURE1]; // continue after match
}
} else {
for (var i = 0; i < length; i++) {
var matchInfo = matches[i];
result.addSpecialSlice(previous, matchInfo[CAPTURE0]);
ExpandReplacement(replace, subject, matchInfo, result);
previous = matchInfo[CAPTURE1]; // continue after match
}
}
result.addSpecialSlice(previous, subject.length);
return result.generate();
return %StringReplaceRegExpWithString(subject,
regexp,
replace,
lastMatchInfo);
};
......
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/**
* @fileoverview Test String.prototype.replace
*/
function replaceTest(result, subject, pattern, replacement) {
var name =
"\"" + subject + "\".replace(" + pattern + ", " + replacement + ")";
assertEquals(result, subject.replace(pattern, replacement), name);
}
var short = "xaxbxcx";
replaceTest("axbxcx", short, "x", "");
replaceTest("axbxcx", short, /x/, "");
replaceTest("abc", short, /x/g, "");
replaceTest("xaxxcx", short, "b", "");
replaceTest("xaxxcx", short, /b/, "");
replaceTest("xaxxcx", short, /b/g, "");
replaceTest("[]axbxcx", short, "x", "[]");
replaceTest("[]axbxcx", short, /x/, "[]");
replaceTest("[]a[]b[]c[]", short, /x/g, "[]");
replaceTest("xax[]xcx", short, "b", "[]");
replaceTest("xax[]xcx", short, /b/, "[]");
replaceTest("xax[]xcx", short, /b/g, "[]");
replaceTest("[$]axbxcx", short, "x", "[$$]");
replaceTest("[$]axbxcx", short, /x/, "[$$]");
replaceTest("[$]a[$]b[$]c[$]", short, /x/g, "[$$]");
replaceTest("xax[$]xcx", short, "b", "[$$]");
replaceTest("xax[$]xcx", short, /b/, "[$$]");
replaceTest("xax[$]xcx", short, /b/g, "[$$]");
replaceTest("[]axbxcx", short, "x", "[$`]");
replaceTest("[]axbxcx", short, /x/, "[$`]");
replaceTest("[]a[xa]b[xaxb]c[xaxbxc]", short, /x/g, "[$`]");
replaceTest("xax[xax]xcx", short, "b", "[$`]");
replaceTest("xax[xax]xcx", short, /b/, "[$`]");
replaceTest("xax[xax]xcx", short, /b/g, "[$`]");
replaceTest("[x]axbxcx", short, "x", "[$&]");
replaceTest("[x]axbxcx", short, /x/, "[$&]");
replaceTest("[x]a[x]b[x]c[x]", short, /x/g, "[$&]");
replaceTest("xax[b]xcx", short, "b", "[$&]");
replaceTest("xax[b]xcx", short, /b/, "[$&]");
replaceTest("xax[b]xcx", short, /b/g, "[$&]");
replaceTest("[axbxcx]axbxcx", short, "x", "[$']");
replaceTest("[axbxcx]axbxcx", short, /x/, "[$']");
replaceTest("[axbxcx]a[bxcx]b[cx]c[]", short, /x/g, "[$']");
replaceTest("xax[xcx]xcx", short, "b", "[$']");
replaceTest("xax[xcx]xcx", short, /b/, "[$']");
replaceTest("xax[xcx]xcx", short, /b/g, "[$']");
replaceTest("[$1]axbxcx", short, "x", "[$1]");
replaceTest("[$1]axbxcx", short, /x/, "[$1]");
replaceTest("[]axbxcx", short, /x()/, "[$1]");
replaceTest("[$1]a[$1]b[$1]c[$1]", short, /x/g, "[$1]");
replaceTest("[]a[]b[]c[]", short, /x()/g, "[$1]");
replaceTest("xax[$1]xcx", short, "b", "[$1]");
replaceTest("xax[$1]xcx", short, /b/, "[$1]");
replaceTest("xax[]xcx", short, /b()/, "[$1]");
replaceTest("xax[$1]xcx", short, /b/g, "[$1]");
replaceTest("xax[]xcx", short, /b()/g, "[$1]");
replaceTest("[$$$1$$a1abb1bb0$002$3$03][$$$1$$b1bcc1cc0$002$3$03]c",
"abc", /(.)(?=(.))/g, "[$$$$$$1$$$$$11$01$2$21$02$020$002$3$03]");
// Replace with functions.
var ctr = 0;
replaceTest("0axbxcx", short, "x", function r(m, i, s) {
assertEquals(3, arguments.length, "replace('x',func) func-args");
assertEquals("x", m, "replace('x',func(m,..))");
assertEquals(0, i, "replace('x',func(..,i,..))");
assertEquals(short, s, "replace('x',func(..,s))");
return String(ctr++);
});
assertEquals(1, ctr, "replace('x',func) num-match");
ctr = 0;
replaceTest("0axbxcx", short, /x/, function r(m, i, s) {
assertEquals(3, arguments.length, "replace(/x/,func) func-args");
assertEquals("x", m, "replace(/x/,func(m,..))");
assertEquals(0, i, "replace(/x/,func(..,i,..))");
assertEquals(short, s, "replace(/x/,func(..,s))");
return String(ctr++);
});
assertEquals(1, ctr, "replace(/x/,func) num-match");
ctr = 0;
replaceTest("0a1b2c3", short, /x/g, function r(m, i, s) {
assertEquals(3, arguments.length, "replace(/x/g,func) func-args");
assertEquals("x", m, "replace(/x/g,func(m,..))");
assertEquals(ctr * 2, i, "replace(/x/g,func(..,i,.))");
assertEquals(short, s, "replace(/x/g,func(..,s))");
return String(ctr++);
});
assertEquals(4, ctr, "replace(/x/g,func) num-match");
ctr = 0;
replaceTest("0a1b2cx", short, /(x)(?=(.))/g, function r(m, c1, c2, i, s) {
assertEquals(5, arguments.length, "replace(/(x)(?=(.))/g,func) func-args");
assertEquals("x", m, "replace(/(x)(?=(.))/g,func(m,..))");
assertEquals("x", c1, "replace(/(x)(?=(.))/g,func(..,c1,..))");
assertEquals(["a","b","c"][ctr], c2, "replace(/(x)(?=(.))/g,func(..,c2,..))");
assertEquals(ctr * 2, i, "replace(/(x)(?=(.))/g,func(..,i,..))");
assertEquals(short, s, "replace(/(x)(?=(.))/g,func(..,s))");
return String(ctr++);
});
assertEquals(3, ctr, "replace(/x/g,func) num-match");
// Test special cases of replacement parts longer than 1<<11.
var longstring = "xyzzy";
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
// longstring.length == 5 << 11
replaceTest(longstring + longstring,
"<" + longstring + ">", /<(.*)>/g, "$1$1");
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment