Commit e2af4529 authored by lrn@chromium.org's avatar lrn@chromium.org

String.replace implemented in C++.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1506 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 30af089b
......@@ -600,7 +600,6 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
if (previous_index > subject->length() || previous_index < 0) {
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match.
matches = Factory::null_value();
return result;
} else {
#ifdef DEBUG
......@@ -666,81 +665,28 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
bool rc;
Handle<String> original_subject = subject;
if (FLAG_regexp_native) {
#ifndef ARM
Handle<Code> code(IrregexpNativeCode(*regexp, is_ascii));
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject->length(shape);
if (shape.IsCons()) {
subject = Handle<String>(ConsString::cast(*subject)->first());
} else if (shape.IsSliced()) {
SlicedString* slice = SlicedString::cast(*subject);
start_offset += slice->start();
end_offset += slice->start();
subject = Handle<String>(slice->buffer());
}
// String is now either Sequential or External
StringShape flatshape(*subject);
bool is_ascii = flatshape.IsAsciiRepresentation();
int char_size_shift = is_ascii ? 0 : 1;
RegExpMacroAssemblerIA32::Result res;
if (flatshape.IsExternal()) {
const byte* address;
if (is_ascii) {
ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
} else {
ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
}
res = RegExpMacroAssemblerIA32::Execute(
*code,
const_cast<Address*>(&address),
start_offset << char_size_shift,
end_offset << char_size_shift,
offsets_vector,
previous_index == 0);
} else { // Sequential string
ASSERT(StringShape(*subject).IsSequential());
Address char_address =
is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
: SeqTwoByteString::cast(*subject)->GetCharsAddress();
int byte_offset = char_address - reinterpret_cast<Address>(*subject);
res = RegExpMacroAssemblerIA32::Execute(
*code,
reinterpret_cast<Address*>(subject.location()),
byte_offset + (start_offset << char_size_shift),
byte_offset + (end_offset << char_size_shift),
offsets_vector,
previous_index == 0);
}
if (UseNativeRegexp()) {
#ifdef ARM
UNREACHABLE();
#else
Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
RegExpMacroAssemblerIA32::Result res =
RegExpMacroAssemblerIA32::Match(code,
subject,
offsets_vector,
offsets_vector_length,
previous_index);
if (res == RegExpMacroAssemblerIA32::EXCEPTION) {
ASSERT(Top::has_pending_exception());
return Handle<Object>::null();
}
rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
ASSERT(res == RegExpMacroAssemblerIA32::SUCCESS
|| res == RegExpMacroAssemblerIA32::FAILURE);
if (rc) {
// Capture values are relative to start_offset only.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
} else {
#else
// Unimplemented on ARM, fall through to bytecode.
}
{
rc = (res == RegExpMacroAssemblerIA32::SUCCESS);
#endif
} else {
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
offsets_vector[i] = -1;
}
......@@ -757,6 +703,9 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> regexp,
}
FixedArray* array = last_match_info->elements();
// Clear previous input/string values to avoid potential memory leak.
SetLastSubject(array, Heap::empty_string());
SetLastInput(array, Heap::empty_string());
ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
// The captures come in (start, end+1) pairs.
for (int i = 0; i < number_of_capture_registers; i += 2) {
......@@ -4716,9 +4665,9 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data,
NodeInfo info = *node->info();
if (FLAG_regexp_native) {
if (RegExpImpl::UseNativeRegexp()) {
#ifdef ARM
// Unimplemented, fall-through to bytecode implementation.
UNREACHABLE();
#else // IA32
RegExpMacroAssemblerIA32::Mode mode;
if (is_ascii) {
......
......@@ -36,6 +36,13 @@ class RegExpMacroAssembler;
class RegExpImpl {
public:
static inline bool UseNativeRegexp() {
#ifdef ARM
return false;
#else
return FLAG_regexp_native;
#endif
}
// Creates a regular expression literal in the old space.
// This function calls the garbage collector if necessary.
static Handle<Object> CreateRegExpLiteral(Handle<JSFunction> constructor,
......@@ -117,6 +124,7 @@ class RegExpImpl {
static const int kFirstCapture = 1;
static const int kLastMatchOverhead = 3;
// Used to access the lastMatchInfo array.
static int GetCapture(FixedArray* array, int index) {
return Smi::cast(array->get(index + kFirstCapture))->value();
}
......@@ -139,12 +147,11 @@ class RegExpImpl {
array->set(index + kFirstCapture, Smi::FromInt(to));
}
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
static int GetLastCaptureCount(FixedArray* array) {
return Smi::cast(array->get(kLastCaptureCount))->value();
}
// For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray* re);
static void SetIrregexpMaxRegisterCount(FixedArray* re, int value);
static int IrregexpNumberOfCaptures(FixedArray* re);
......@@ -152,6 +159,13 @@ class RegExpImpl {
static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii);
static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii);
private:
static String* last_ascii_string_;
static String* two_byte_cached_string_;
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
......@@ -171,10 +185,6 @@ class RegExpImpl {
int character_position,
int utf8_position);
// Used to access the lastMatchInfo array.
static int GetLastCaptureCount(FixedArray* array) {
return Smi::cast(array->get(kLastCaptureCount))->value();
}
// A one element cache of the last utf8_subject string and its length. The
// subject JS String object is cached in the heap. We also cache a
// translation between position and utf8 position.
......
......@@ -30,6 +30,7 @@
#include "disassembler.h"
#include "disasm.h"
#include "macro-assembler.h"
#include "jsregexp.h"
namespace v8 { namespace internal {
......@@ -696,11 +697,8 @@ void JSRegExp::JSRegExpVerify() {
break;
}
case JSRegExp::IRREGEXP: {
bool is_native = FLAG_regexp_native;
#ifdef ARM
// No native regexp on arm yet.
is_native = false;
#endif
bool is_native = RegExpImpl::UseNativeRegexp();
FixedArray* arr = FixedArray::cast(data());
Object* ascii_data = arr->get(JSRegExp::kIrregexpASCIICodeIndex);
ASSERT(ascii_data->IsTheHole()
......
......@@ -2316,6 +2316,19 @@ JSRegExp::Type JSRegExp::TypeTag() {
}
int JSRegExp::CaptureCount() {
switch (TypeTag()) {
case ATOM:
return 0;
case IRREGEXP:
return Smi::cast(DataAt(kIrregexpCaptureCountIndex))->value();
default:
UNREACHABLE();
return -1;
}
}
JSRegExp::Flags JSRegExp::GetFlags() {
ASSERT(this->data()->IsFixedArray());
Object* data = this->data();
......
......@@ -3003,6 +3003,7 @@ class JSRegExp: public JSObject {
DECL_ACCESSORS(data, Object)
inline Type TypeTag();
inline int CaptureCount();
inline Flags GetFlags();
inline String* Pattern();
inline Object* DataAt(int index);
......
......@@ -200,9 +200,9 @@ function RegExpExec(string) {
// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
// that test is defined in terms of String.prototype.exec even if the method is
// called on a non-RegExp object. However, it probably means the original
// value of String.prototype.exec, which is what everybody else implements.
// that test is defined in terms of String.prototype.exec. However, it probably
// means the original value of String.prototype.exec, which is what everybody
// else implements.
function RegExpTest(string) {
if (!IS_REGEXP(this)) {
throw MakeTypeError('method_called_on_incompatible',
......
......@@ -978,6 +978,79 @@ void RegExpMacroAssemblerIA32::WriteStackPointerToRegister(int reg) {
}
RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Match(
Handle<Code> regexp_code,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index) {
StringShape shape(*subject);
// Character offsets into string.
int start_offset = previous_index;
int end_offset = subject->length(shape);
if (shape.IsCons()) {
subject =
Handle<String>(String::cast(ConsString::cast(*subject)->first()));
} else if (shape.IsSliced()) {
SlicedString* slice = SlicedString::cast(*subject);
start_offset += slice->start();
end_offset += slice->start();
subject = Handle<String>(String::cast(slice->buffer()));
}
// String is now either Sequential or External
StringShape flatshape(*subject);
bool is_ascii = flatshape.IsAsciiRepresentation();
int char_size_shift = is_ascii ? 0 : 1;
RegExpMacroAssemblerIA32::Result res;
if (flatshape.IsExternal()) {
const byte* address;
if (is_ascii) {
ExternalAsciiString* ext = ExternalAsciiString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
} else {
ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject);
address = reinterpret_cast<const byte*>(ext->resource()->data());
}
res = Execute(*regexp_code,
const_cast<Address*>(&address),
start_offset << char_size_shift,
end_offset << char_size_shift,
offsets_vector,
previous_index == 0);
} else { // Sequential string
ASSERT(StringShape(*subject).IsSequential());
Address char_address =
is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress()
: SeqTwoByteString::cast(*subject)->GetCharsAddress();
int byte_offset = char_address - reinterpret_cast<Address>(*subject);
res = Execute(*regexp_code,
reinterpret_cast<Address*>(subject.location()),
byte_offset + (start_offset << char_size_shift),
byte_offset + (end_offset << char_size_shift),
offsets_vector,
previous_index == 0);
}
if (res == RegExpMacroAssemblerIA32::SUCCESS) {
// Capture values are relative to start_offset only.
for (int i = 0; i < offsets_vector_length; i++) {
if (offsets_vector[i] >= 0) {
offsets_vector[i] += previous_index;
}
}
}
return res;
}
// Private methods:
......
......@@ -113,6 +113,12 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static Result Match(Handle<Code> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
int previous_index);
static Result Execute(Code* code,
Address* input,
int start_offset,
......
......@@ -1180,6 +1180,527 @@ static Object* Runtime_CharFromCode(Arguments args) {
return Heap::empty_string();
}
// Forward declarations.
static const int kStringBuilderConcatHelperLengthBits = 11;
static const int kStringBuilderConcatHelperPositionBits = 19;
template <typename schar>
static inline void StringBuilderConcatHelper(String*,
StringShape,
schar*,
FixedArray*,
int);
typedef BitField<int, 0, 11> StringBuilderSubstringLength;
typedef BitField<int, 11, 19> StringBuilderSubstringPosition;
class ReplacementStringBuilder {
public:
ReplacementStringBuilder(Handle<String> subject, int estimated_part_count)
: subject_(subject),
parts_(Factory::NewFixedArray(estimated_part_count)),
part_count_(0),
character_count_(0),
is_ascii_(StringShape(*subject).IsAsciiRepresentation()) {
// Require a non-zero initial size. Ensures that doubling the size to
// extend the array will work.
ASSERT(estimated_part_count > 0);
}
void AddSubjectSlice(int from, int to) {
ASSERT(from >= 0);
int length = to - from;
ASSERT(length >= 0);
if (length > 0) {
// Can we encode the slice in 11 bits for length and 19 bits for
// start position - as used by StringBuilderConcatHelper?
if (StringBuilderSubstringLength::is_valid(length) &&
StringBuilderSubstringPosition::is_valid(from)) {
int encoded_slice = StringBuilderSubstringLength::encode(length) |
StringBuilderSubstringPosition::encode(from);
AddElement(Smi::FromInt(encoded_slice));
} else {
Handle<String> slice = Factory::NewStringSlice(subject_, from, to);
AddElement(*slice);
}
IncrementCharacterCount(length);
}
}
void AddString(Handle<String> string) {
StringShape shape(*string);
int length = string->length(shape);
if (length > 0) {
AddElement(*string);
if (!shape.IsAsciiRepresentation()) {
is_ascii_ = false;
}
IncrementCharacterCount(length);
}
}
Handle<String> ToString() {
if (part_count_ == 0) {
return Factory::empty_string();
}
Handle<String> joined_string;
if (is_ascii_) {
joined_string = NewRawAsciiString(character_count_);
AssertNoAllocation no_alloc;
SeqAsciiString* seq = SeqAsciiString::cast(*joined_string);
char* char_buffer = seq->GetChars();
StringBuilderConcatHelper(*subject_,
StringShape(*subject_),
char_buffer,
*parts_,
part_count_);
} else {
// Non-ASCII.
joined_string = NewRawTwoByteString(character_count_);
AssertNoAllocation no_alloc;
SeqTwoByteString* seq = SeqTwoByteString::cast(*joined_string);
uc16* char_buffer = seq->GetChars();
StringBuilderConcatHelper(*subject_,
StringShape(*subject_),
char_buffer,
*parts_,
part_count_);
}
return joined_string;
}
void IncrementCharacterCount(int by) {
if (character_count_ > Smi::kMaxValue - by) {
V8::FatalProcessOutOfMemory("String.replace result too large.");
}
character_count_ += by;
}
private:
Handle<String> NewRawAsciiString(int size) {
CALL_HEAP_FUNCTION(Heap::AllocateRawAsciiString(size), String);
}
Handle<String> NewRawTwoByteString(int size) {
CALL_HEAP_FUNCTION(Heap::AllocateRawTwoByteString(size), String);
}
void AddElement(Object* element) {
ASSERT(element->IsSmi() || element->IsString());
// Extend parts_ array if necessary.
if (parts_->length() == part_count_) {
Handle<FixedArray> extended_array =
Factory::NewFixedArray(part_count_ * 2);
parts_->CopyTo(0, *extended_array, 0, part_count_);
parts_ = extended_array;
}
parts_->set(part_count_, element);
part_count_++;
}
Handle<String> subject_;
Handle<FixedArray> parts_;
int part_count_;
int character_count_;
bool is_ascii_;
};
class CompiledReplacement {
public:
CompiledReplacement()
: parts_(1), replacement_substrings_(0) {}
void Compile(Handle<String> replacement,
int capture_count,
int subject_length);
void Apply(ReplacementStringBuilder* builder,
int match_from,
int match_to,
Handle<JSArray> last_match_info);
// Number of distinct parts of the replacement pattern.
int parts() {
return parts_.length();
}
private:
enum PartType {
SUBJECT_PREFIX = 1,
SUBJECT_SUFFIX,
SUBJECT_CAPTURE,
REPLACEMENT_SUBSTRING,
REPLACEMENT_STRING,
NUMBER_OF_PART_TYPES
};
struct ReplacementPart {
static inline ReplacementPart SubjectMatch() {
return ReplacementPart(SUBJECT_CAPTURE, 0);
}
static inline ReplacementPart SubjectCapture(int capture_index) {
return ReplacementPart(SUBJECT_CAPTURE, capture_index);
}
static inline ReplacementPart SubjectPrefix() {
return ReplacementPart(SUBJECT_PREFIX, 0);
}
static inline ReplacementPart SubjectSuffix(int subject_length) {
return ReplacementPart(SUBJECT_SUFFIX, subject_length);
}
static inline ReplacementPart ReplacementString() {
return ReplacementPart(REPLACEMENT_STRING, 0);
}
static inline ReplacementPart ReplacementSubString(int from, int to) {
ASSERT(from >= 0);
ASSERT(to > from);
return ReplacementPart(-from, to);
}
// If tag <= 0 then it is the negation of a start index of a substring of
// the replacement pattern, otherwise it's a value from PartType.
ReplacementPart(int tag, int data)
: tag(tag), data(data) {
// Must be non-positive or a PartType value.
ASSERT(tag < NUMBER_OF_PART_TYPES);
}
// Either a value of PartType or a non-positive number that is
// the negation of an index into the replacement string.
int tag;
// The data value's interpretation depends on the value of tag:
// tag == SUBJECT_PREFIX ||
// tag == SUBJECT_SUFFIX: data is unused.
// tag == SUBJECT_CAPTURE: data is the number of the capture.
// tag == REPLACEMENT_SUBSTRING ||
// tag == REPLACEMENT_STRING: data is index into array of substrings
// of the replacement string.
// tag <= 0: Temporary representation of the substring of the replacement
// string ranging over -tag .. data.
// Is replaced by REPLACEMENT_{SUB,}STRING when we create the
// substring objects.
int data;
};
template<typename Char>
static void ParseReplacementPattern(ZoneList<ReplacementPart>* parts,
Vector<Char> characters,
int capture_count,
int subject_length) {
int length = characters.length();
int last = 0;
for (int i = 0; i < length; i++) {
Char c = characters[i];
if (c == '$') {
int next_index = i + 1;
if (next_index == length) { // No next character!
break;
}
Char c2 = characters[next_index];
switch (c2) {
case '$':
if (i > last) {
// There is a substring before. Include the first "$".
parts->Add(ReplacementPart::ReplacementSubString(last, next_index));
last = next_index + 1; // Continue after the second "$".
} else {
// Let the next substring start with the second "$".
last = next_index;
}
i = next_index;
break;
case '`':
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i));
}
parts->Add(ReplacementPart::SubjectPrefix());
i = next_index;
last = i + 1;
break;
case '\'':
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i));
}
parts->Add(ReplacementPart::SubjectSuffix(subject_length));
i = next_index;
last = i + 1;
break;
case '&':
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i));
}
parts->Add(ReplacementPart::SubjectMatch());
i = next_index;
last = i + 1;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9': {
int capture_ref = c2 - '0';
if (capture_ref > capture_count) {
i = next_index;
continue;
}
int second_digit_index = next_index + 1;
if (second_digit_index < length) {
// Peek ahead to see if we have two digits.
Char c3 = characters[second_digit_index];
if ('0' <= c3 && c3 <= '9') { // Double digits.
int double_digit_ref = capture_ref * 10 + c3 - '0';
if (double_digit_ref <= capture_count) {
next_index = second_digit_index;
capture_ref = double_digit_ref;
}
}
}
if (capture_ref > 0) {
if (i > last) {
parts->Add(ReplacementPart::ReplacementSubString(last, i));
}
parts->Add(ReplacementPart::SubjectCapture(capture_ref));
last = next_index + 1;
}
i = next_index;
break;
}
default:
i = next_index;
break;
}
}
}
if (length > last) {
if (last == 0) {
parts->Add(ReplacementPart::ReplacementString());
} else {
parts->Add(ReplacementPart::ReplacementSubString(last, length));
}
}
}
ZoneList<ReplacementPart> parts_;
ZoneList<Handle<String> > replacement_substrings_;
};
void CompiledReplacement::Compile(Handle<String> replacement,
int capture_count,
int subject_length) {
StringShape shape(*replacement);
ASSERT(replacement->IsFlat(shape));
if (shape.IsAsciiRepresentation()) {
AssertNoAllocation no_alloc;
ParseReplacementPattern(&parts_,
replacement->ToAsciiVector(),
capture_count,
subject_length);
} else {
ASSERT(shape.IsTwoByteRepresentation());
AssertNoAllocation no_alloc;
ParseReplacementPattern(&parts_,
replacement->ToUC16Vector(),
capture_count,
subject_length);
}
// Find substrings of replacement string and create them as String objects..
int substring_index = 0;
for (int i = 0, n = parts_.length(); i < n; i++) {
int tag = parts_[i].tag;
if (tag <= 0) { // A replacement string slice.
int from = -tag;
int to = parts_[i].data;
replacement_substrings_.Add(Factory::NewStringSlice(replacement,
from,
to));
parts_[i].tag = REPLACEMENT_SUBSTRING;
parts_[i].data = substring_index;
substring_index++;
} else if (tag == REPLACEMENT_STRING) {
replacement_substrings_.Add(replacement);
parts_[i].data = substring_index;
substring_index++;
}
}
}
void CompiledReplacement::Apply(ReplacementStringBuilder* builder,
int match_from,
int match_to,
Handle<JSArray> last_match_info) {
for (int i = 0, n = parts_.length(); i < n; i++) {
ReplacementPart part = parts_[i];
switch (part.tag) {
case SUBJECT_PREFIX:
builder->AddSubjectSlice(0, match_from);
break;
case SUBJECT_SUFFIX: {
int subject_length = part.data;
builder->AddSubjectSlice(match_to, subject_length);
break;
}
case SUBJECT_CAPTURE: {
int capture = part.data;
FixedArray* match_info = last_match_info->elements();
int from = RegExpImpl::GetCapture(match_info, capture * 2);
int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1);
if (from >= 0 && to > from) {
builder->AddSubjectSlice(from, to);
}
break;
}
case REPLACEMENT_SUBSTRING:
case REPLACEMENT_STRING:
builder->AddString(replacement_substrings_[part.data]);
break;
default:
UNREACHABLE();
}
}
}
static Object* StringReplaceRegExpWithString(String* subject,
JSRegExp* regexp,
String* replacement,
JSArray* last_match_info) {
ASSERT(subject->IsFlat(StringShape(subject)));
ASSERT(replacement->IsFlat(StringShape(replacement)));
HandleScope handles;
int length = subject->length();
Handle<String> subject_handle(subject);
Handle<JSRegExp> regexp_handle(regexp);
Handle<String> replacement_handle(replacement);
Handle<JSArray> last_match_info_handle(last_match_info);
Handle<Object> match = RegExpImpl::Exec(regexp_handle,
subject_handle,
0,
last_match_info_handle);
if (match.is_null()) {
return Failure::Exception();
}
if (match->IsNull()) {
return *subject_handle;
}
int capture_count = regexp_handle->CaptureCount();
// CompiledReplacement uses zone allocation.
ZoneScope zone(DELETE_ON_EXIT);
CompiledReplacement compiled_replacement;
compiled_replacement.Compile(replacement_handle,
capture_count,
length);
bool is_global = regexp_handle->GetFlags().is_global();
// Guessing the number of parts that the final result string is built
// from. Global regexps can match any number of times, so we guess
// conservatively.
int expected_parts =
(compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1;
ReplacementStringBuilder builder(subject_handle, expected_parts);
// Index of end of last match.
int prev = 0;
do {
ASSERT(last_match_info_handle->HasFastElements());
FixedArray* match_info_array = last_match_info_handle->elements();
ASSERT_EQ(capture_count * 2 + 2,
RegExpImpl::GetLastCaptureCount(match_info_array));
int start = RegExpImpl::GetCapture(match_info_array, 0);
int end = RegExpImpl::GetCapture(match_info_array, 1);
if (prev < start) {
builder.AddSubjectSlice(prev, start);
}
compiled_replacement.Apply(&builder,
start,
end,
last_match_info_handle);
prev = end;
// Only continue checking for global regexps.
if (!is_global) break;
// Continue from where the match ended, unless it was an empty match.
int next = end;
if (start == end) {
next = end + 1;
if (next > length) break;
}
match = RegExpImpl::Exec(regexp_handle,
subject_handle,
next,
last_match_info_handle);
if (match.is_null()) {
return Failure::Exception();
}
} while (!match->IsNull());
if (prev < length) {
builder.AddSubjectSlice(prev, length);
}
return *(builder.ToString());
}
static Object* Runtime_StringReplaceRegExpWithString(Arguments args) {
ASSERT(args.length() == 4);
CONVERT_CHECKED(String, subject, args[0]);
StringShape subject_shape(subject);
if (!subject->IsFlat(subject_shape)) {
Object* flat_subject = subject->TryFlatten(subject_shape);
if (flat_subject->IsFailure()) {
return flat_subject;
}
subject = String::cast(flat_subject);
}
CONVERT_CHECKED(String, replacement, args[2]);
StringShape replacement_shape(replacement);
if (!replacement->IsFlat(replacement_shape)) {
Object* flat_replacement = replacement->TryFlatten(replacement_shape);
if (flat_replacement->IsFailure()) {
return flat_replacement;
}
replacement = String::cast(flat_replacement);
}
CONVERT_CHECKED(JSRegExp, regexp, args[1]);
CONVERT_CHECKED(JSArray, last_match_info, args[3]);
ASSERT(last_match_info->HasFastElements());
return StringReplaceRegExpWithString(subject,
regexp,
replacement,
last_match_info);
}
// Cap on the maximal shift in the Boyer-Moore implementation. By setting a
// limit, we can fix the size of tables.
......@@ -2922,9 +3443,9 @@ static inline void StringBuilderConcatHelper(String* special,
for (int i = 0; i < array_length; i++) {
Object* element = fixed_array->get(i);
if (element->IsSmi()) {
int len = Smi::cast(element)->value();
int pos = len >> 11;
len &= 0x7ff;
int encoded_slice = Smi::cast(element)->value();
int pos = StringBuilderSubstringPosition::decode(encoded_slice);
int len = StringBuilderSubstringLength::decode(encoded_slice);
String::WriteToFlat(special,
special_shape,
sink + position,
......
......@@ -146,6 +146,7 @@ namespace v8 { namespace internal {
F(StringLastIndexOf, 3) \
F(StringLocaleCompare, 2) \
F(StringSlice, 3) \
F(StringReplaceRegExpWithString, 4) \
\
/* Numbers */ \
F(NumberToRadixString, 2) \
......
......@@ -237,50 +237,15 @@ function StringReplace(search, replace) {
// lastMatchArray without erroneously affecting the properties on the global
// RegExp object.
var reusableMatchInfo = [2, -1, -1, "", ""];
var reusableMatchArray = [ void 0 ];
// Helper function for regular expressions in String.prototype.replace.
function StringReplaceRegExp(subject, regexp, replace) {
// Compute an array of matches; each match is really a list of
// captures - pairs of (start, end) indexes into the subject string.
var matches;
if (regexp.global) {
matches = DoRegExpExecGlobal(regexp, subject);
if (matches.length == 0) return subject;
} else {
var lastMatchInfo = DoRegExpExec(regexp, subject, 0);
if (IS_NULL(lastMatchInfo)) return subject;
reusableMatchArray[0] = lastMatchInfo;
matches = reusableMatchArray;
}
// Determine the number of matches.
var length = matches.length;
// Build the resulting string of subject slices and replacements.
var result = new ReplaceResultBuilder(subject);
var previous = 0;
// The caller of StringReplaceRegExp must ensure that replace is not a
// function.
replace = ToString(replace);
if (%StringIndexOf(replace, "$", 0) < 0) {
for (var i = 0; i < length; i++) {
var matchInfo = matches[i];
result.addSpecialSlice(previous, matchInfo[CAPTURE0]);
result.add(replace);
previous = matchInfo[CAPTURE1]; // continue after match
}
} else {
for (var i = 0; i < length; i++) {
var matchInfo = matches[i];
result.addSpecialSlice(previous, matchInfo[CAPTURE0]);
ExpandReplacement(replace, subject, matchInfo, result);
previous = matchInfo[CAPTURE1]; // continue after match
}
}
result.addSpecialSlice(previous, subject.length);
return result.generate();
return %StringReplaceRegExpWithString(subject,
regexp,
replace,
lastMatchInfo);
};
......
// Copyright 2009 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/**
* @fileoverview Test String.prototype.replace
*/
function replaceTest(result, subject, pattern, replacement) {
var name =
"\"" + subject + "\".replace(" + pattern + ", " + replacement + ")";
assertEquals(result, subject.replace(pattern, replacement), name);
}
var short = "xaxbxcx";
replaceTest("axbxcx", short, "x", "");
replaceTest("axbxcx", short, /x/, "");
replaceTest("abc", short, /x/g, "");
replaceTest("xaxxcx", short, "b", "");
replaceTest("xaxxcx", short, /b/, "");
replaceTest("xaxxcx", short, /b/g, "");
replaceTest("[]axbxcx", short, "x", "[]");
replaceTest("[]axbxcx", short, /x/, "[]");
replaceTest("[]a[]b[]c[]", short, /x/g, "[]");
replaceTest("xax[]xcx", short, "b", "[]");
replaceTest("xax[]xcx", short, /b/, "[]");
replaceTest("xax[]xcx", short, /b/g, "[]");
replaceTest("[$]axbxcx", short, "x", "[$$]");
replaceTest("[$]axbxcx", short, /x/, "[$$]");
replaceTest("[$]a[$]b[$]c[$]", short, /x/g, "[$$]");
replaceTest("xax[$]xcx", short, "b", "[$$]");
replaceTest("xax[$]xcx", short, /b/, "[$$]");
replaceTest("xax[$]xcx", short, /b/g, "[$$]");
replaceTest("[]axbxcx", short, "x", "[$`]");
replaceTest("[]axbxcx", short, /x/, "[$`]");
replaceTest("[]a[xa]b[xaxb]c[xaxbxc]", short, /x/g, "[$`]");
replaceTest("xax[xax]xcx", short, "b", "[$`]");
replaceTest("xax[xax]xcx", short, /b/, "[$`]");
replaceTest("xax[xax]xcx", short, /b/g, "[$`]");
replaceTest("[x]axbxcx", short, "x", "[$&]");
replaceTest("[x]axbxcx", short, /x/, "[$&]");
replaceTest("[x]a[x]b[x]c[x]", short, /x/g, "[$&]");
replaceTest("xax[b]xcx", short, "b", "[$&]");
replaceTest("xax[b]xcx", short, /b/, "[$&]");
replaceTest("xax[b]xcx", short, /b/g, "[$&]");
replaceTest("[axbxcx]axbxcx", short, "x", "[$']");
replaceTest("[axbxcx]axbxcx", short, /x/, "[$']");
replaceTest("[axbxcx]a[bxcx]b[cx]c[]", short, /x/g, "[$']");
replaceTest("xax[xcx]xcx", short, "b", "[$']");
replaceTest("xax[xcx]xcx", short, /b/, "[$']");
replaceTest("xax[xcx]xcx", short, /b/g, "[$']");
replaceTest("[$1]axbxcx", short, "x", "[$1]");
replaceTest("[$1]axbxcx", short, /x/, "[$1]");
replaceTest("[]axbxcx", short, /x()/, "[$1]");
replaceTest("[$1]a[$1]b[$1]c[$1]", short, /x/g, "[$1]");
replaceTest("[]a[]b[]c[]", short, /x()/g, "[$1]");
replaceTest("xax[$1]xcx", short, "b", "[$1]");
replaceTest("xax[$1]xcx", short, /b/, "[$1]");
replaceTest("xax[]xcx", short, /b()/, "[$1]");
replaceTest("xax[$1]xcx", short, /b/g, "[$1]");
replaceTest("xax[]xcx", short, /b()/g, "[$1]");
replaceTest("[$$$1$$a1abb1bb0$002$3$03][$$$1$$b1bcc1cc0$002$3$03]c",
"abc", /(.)(?=(.))/g, "[$$$$$$1$$$$$11$01$2$21$02$020$002$3$03]");
// Replace with functions.
var ctr = 0;
replaceTest("0axbxcx", short, "x", function r(m, i, s) {
assertEquals(3, arguments.length, "replace('x',func) func-args");
assertEquals("x", m, "replace('x',func(m,..))");
assertEquals(0, i, "replace('x',func(..,i,..))");
assertEquals(short, s, "replace('x',func(..,s))");
return String(ctr++);
});
assertEquals(1, ctr, "replace('x',func) num-match");
ctr = 0;
replaceTest("0axbxcx", short, /x/, function r(m, i, s) {
assertEquals(3, arguments.length, "replace(/x/,func) func-args");
assertEquals("x", m, "replace(/x/,func(m,..))");
assertEquals(0, i, "replace(/x/,func(..,i,..))");
assertEquals(short, s, "replace(/x/,func(..,s))");
return String(ctr++);
});
assertEquals(1, ctr, "replace(/x/,func) num-match");
ctr = 0;
replaceTest("0a1b2c3", short, /x/g, function r(m, i, s) {
assertEquals(3, arguments.length, "replace(/x/g,func) func-args");
assertEquals("x", m, "replace(/x/g,func(m,..))");
assertEquals(ctr * 2, i, "replace(/x/g,func(..,i,.))");
assertEquals(short, s, "replace(/x/g,func(..,s))");
return String(ctr++);
});
assertEquals(4, ctr, "replace(/x/g,func) num-match");
ctr = 0;
replaceTest("0a1b2cx", short, /(x)(?=(.))/g, function r(m, c1, c2, i, s) {
assertEquals(5, arguments.length, "replace(/(x)(?=(.))/g,func) func-args");
assertEquals("x", m, "replace(/(x)(?=(.))/g,func(m,..))");
assertEquals("x", c1, "replace(/(x)(?=(.))/g,func(..,c1,..))");
assertEquals(["a","b","c"][ctr], c2, "replace(/(x)(?=(.))/g,func(..,c2,..))");
assertEquals(ctr * 2, i, "replace(/(x)(?=(.))/g,func(..,i,..))");
assertEquals(short, s, "replace(/(x)(?=(.))/g,func(..,s))");
return String(ctr++);
});
assertEquals(3, ctr, "replace(/x/g,func) num-match");
// Test special cases of replacement parts longer than 1<<11.
var longstring = "xyzzy";
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
longstring = longstring + longstring;
// longstring.length == 5 << 11
replaceTest(longstring + longstring,
"<" + longstring + ">", /<(.*)>/g, "$1$1");
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment