Commit bd8816ef authored by lrn@chromium.org's avatar lrn@chromium.org

Moved String.prototype.match implementation to C++.

Some extra runtime assertions added.


git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1608 ce2b1a6d-e550-0410-aec6-3dcde31c8c00
parent 7c35a5d8
...@@ -205,25 +205,6 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp, ...@@ -205,25 +205,6 @@ Handle<Object> RegExpImpl::Exec(Handle<JSRegExp> regexp,
} }
Handle<Object> RegExpImpl::ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> last_match_info) {
switch (regexp->TypeTag()) {
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject, last_match_info);
case JSRegExp::IRREGEXP: {
Handle<Object> result =
IrregexpExecGlobal(regexp, subject, last_match_info);
ASSERT(!result.is_null() || Top::has_pending_exception());
return result;
}
default:
UNREACHABLE();
return Handle<Object>::null();
}
}
// RegExp Atom implementation: Simple string search using indexOf. // RegExp Atom implementation: Simple string search using indexOf.
...@@ -273,55 +254,6 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re, ...@@ -273,55 +254,6 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
} }
Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
Handle<String> subject,
Handle<JSArray> last_match_info) {
Handle<String> needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)));
ASSERT(last_match_info->HasFastElements());
Handle<JSArray> result = Factory::NewJSArray(1);
int index = 0;
int match_count = 0;
int subject_length = subject->length();
int needle_length = needle->length();
int last_value = -1;
while (true) {
HandleScope scope;
int value = -1;
if (index + needle_length <= subject_length) {
value = Runtime::StringMatch(subject, needle, index);
}
if (value == -1) {
if (last_value != -1) {
Handle<FixedArray> array(last_match_info->elements());
SetAtomLastCapture(*array,
*subject,
last_value,
last_value + needle->length());
}
break;
}
int end = value + needle_length;
// Create an array that looks like the static last_match_info array
// that is attached to the global RegExp object. We will be returning
// an array of these.
Handle<FixedArray> array = Factory::NewFixedArray(kFirstCapture + 2);
SetLastCaptureCount(*array, 2);
// Ignore subject and input fields.
SetCapture(*array, 0, value);
SetCapture(*array, 1, end);
Handle<JSArray> pair = Factory::NewJSArrayWithElements(array);
SetElement(result, match_count, pair);
match_count++;
index = end;
if (needle_length == 0) index++;
last_value = value;
}
return result;
}
// Irregexp implementation. // Irregexp implementation.
...@@ -331,8 +263,7 @@ Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re, ...@@ -331,8 +263,7 @@ Handle<Object> RegExpImpl::AtomExecGlobal(Handle<JSRegExp> re,
// from the source pattern. // from the source pattern.
// If compilation fails, an exception is thrown and this function // If compilation fails, an exception is thrown and this function
// returns false. // returns false.
bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii) {
bool is_ascii) {
int index; int index;
if (is_ascii) { if (is_ascii) {
index = JSRegExp::kIrregexpASCIICodeIndex; index = JSRegExp::kIrregexpASCIICodeIndex;
...@@ -460,22 +391,20 @@ void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re, ...@@ -460,22 +391,20 @@ void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
} }
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
Handle<String> subject, Handle<String> subject,
int index, int previous_index,
Handle<JSArray> last_match_info) { Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
// Prepare space for the return values. // Prepare space for the return values.
int number_of_capture_registers = int number_of_capture_registers =
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
OffsetsVector offsets(number_of_capture_registers); OffsetsVector offsets(number_of_capture_registers);
int previous_index = index;
#ifdef DEBUG #ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) { if (FLAG_trace_regexp_bytecodes) {
String* pattern = regexp->Pattern(); String* pattern = jsregexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
} }
...@@ -487,104 +416,12 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp, ...@@ -487,104 +416,12 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
return IrregexpExecOnce(regexp, int* offsets_vector = offsets.vector();
number_of_capture_registers, int offsets_vector_length = offsets.length();
last_match_info,
subject,
previous_index,
offsets.vector(),
offsets.length());
}
Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> last_match_info) {
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
// Prepare space for the return values.
int number_of_capture_registers =
(IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
OffsetsVector offsets(number_of_capture_registers);
int previous_index = 0;
Handle<JSArray> result = Factory::NewJSArray(0);
int result_length = 0;
Handle<Object> matches;
if (!subject->IsFlat()) {
FlattenString(subject);
}
last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
while (true) {
if (previous_index > subject->length() || previous_index < 0) {
// Per ECMA-262 15.10.6.2, if the previous index is greater than the
// string length, there is no match.
return result;
} else {
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
String* pattern = regexp->Pattern();
PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString()));
PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString()));
}
#endif
HandleScope scope;
matches = IrregexpExecOnce(regexp,
number_of_capture_registers,
last_match_info,
subject,
previous_index,
offsets.vector(),
offsets.length());
if (matches.is_null()) {
ASSERT(Top::has_pending_exception());
return matches;
}
if (matches->IsJSArray()) {
// Create an array that looks like the static last_match_info array
// that is attached to the global RegExp object. We will be returning
// an array of these.
int match_length = kFirstCapture + number_of_capture_registers;
Handle<JSArray> latest_match =
Factory::NewJSArray(match_length);
AssertNoAllocation no_allocation;
FixedArray* match_array = JSArray::cast(*matches)->elements();
match_array->CopyTo(0,
latest_match->elements(),
0,
match_length);
SetElement(result, result_length, latest_match);
result_length++;
previous_index = GetCapture(match_array, 1);
if (GetCapture(match_array, 0) == previous_index) {
previous_index++;
}
} else {
ASSERT(matches->IsNull());
return result;
}
}
}
}
Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
int number_of_capture_registers,
Handle<JSArray> last_match_info,
Handle<String> subject,
int previous_index,
int* offsets_vector,
int offsets_vector_length) {
ASSERT(subject->IsFlat());
bool rc; bool rc;
// Dispatch to the correct RegExp implementation.
Handle<String> original_subject = subject; Handle<String> original_subject = subject;
Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data())); Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
if (UseNativeRegexp()) { if (UseNativeRegexp()) {
...@@ -631,6 +468,8 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp, ...@@ -631,6 +468,8 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> jsregexp,
previous_index); previous_index);
} }
// Handle results from RegExp implementation.
if (!rc) { if (!rc) {
return Factory::null_value(); return Factory::null_value();
} }
......
...@@ -94,20 +94,15 @@ class RegExpImpl { ...@@ -94,20 +94,15 @@ class RegExpImpl {
int index, int index,
Handle<JSArray> lastMatchInfo); Handle<JSArray> lastMatchInfo);
static Handle<Object> AtomExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> lastMatchInfo);
// Execute an Irregexp bytecode pattern. // Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp, static Handle<Object> IrregexpExec(Handle<JSRegExp> regexp,
Handle<String> subject, Handle<String> subject,
int index, int index,
Handle<JSArray> lastMatchInfo); Handle<JSArray> lastMatchInfo);
static Handle<Object> IrregexpExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<JSArray> lastMatchInfo);
// Offsets in the lastMatchInfo array. // Offsets in the lastMatchInfo array.
static const int kLastCaptureCount = 0; static const int kLastCaptureCount = 0;
static const int kLastSubject = 1; static const int kLastSubject = 1;
...@@ -155,17 +150,6 @@ class RegExpImpl { ...@@ -155,17 +150,6 @@ class RegExpImpl {
static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii); static bool EnsureCompiledIrregexp(Handle<JSRegExp> re, bool is_ascii);
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Returns an empty handle in case of an exception.
static Handle<Object> IrregexpExecOnce(Handle<JSRegExp> jsregexp,
int num_captures,
Handle<JSArray> lastMatchInfo,
Handle<String> subject16,
int previous_index,
int* ovector,
int ovector_length);
// Set the subject cache. The previous string buffer is not deleted, so the // Set the subject cache. The previous string buffer is not deleted, so the
// caller should ensure that it doesn't leak. // caller should ensure that it doesn't leak.
static void SetSubjectCache(String* subject, static void SetSubjectCache(String* subject,
......
...@@ -314,7 +314,7 @@ Time::Time() { ...@@ -314,7 +314,7 @@ Time::Time() {
// Initialize timestamp from a JavaScript timestamp. // Initialize timestamp from a JavaScript timestamp.
Time::Time(double jstime) { Time::Time(double jstime) {
t() = static_cast<uint64_t>(jstime) * kTimeScaler + kTimeEpoc; t() = static_cast<int64_t>(jstime) * kTimeScaler + kTimeEpoc;
} }
......
...@@ -962,6 +962,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Match( ...@@ -962,6 +962,8 @@ RegExpMacroAssemblerIA32::Result RegExpMacroAssemblerIA32::Match(
int previous_index) { int previous_index) {
ASSERT(subject->IsFlat()); ASSERT(subject->IsFlat());
ASSERT(previous_index >= 0);
ASSERT(previous_index <= subject->length());
// No allocations before calling the regexp, but we can't use // No allocations before calling the regexp, but we can't use
// AssertNoAllocation, since regexps might be preempted, and another thread // AssertNoAllocation, since regexps might be preempted, and another thread
......
...@@ -1038,7 +1038,9 @@ static Object* Runtime_RegExpExec(Arguments args) { ...@@ -1038,7 +1038,9 @@ static Object* Runtime_RegExpExec(Arguments args) {
CONVERT_CHECKED(Smi, index, args[2]); CONVERT_CHECKED(Smi, index, args[2]);
CONVERT_CHECKED(JSArray, raw_last_match_info, args[3]); CONVERT_CHECKED(JSArray, raw_last_match_info, args[3]);
Handle<JSArray> last_match_info(raw_last_match_info); Handle<JSArray> last_match_info(raw_last_match_info);
CHECK(last_match_info->HasFastElements()); RUNTIME_ASSERT(last_match_info->HasFastElements());
RUNTIME_ASSERT(index->value() >= 0);
RUNTIME_ASSERT(index->value() <= subject->length());
Handle<Object> result = RegExpImpl::Exec(regexp, Handle<Object> result = RegExpImpl::Exec(regexp,
subject, subject,
index->value(), index->value(),
...@@ -1048,23 +1050,6 @@ static Object* Runtime_RegExpExec(Arguments args) { ...@@ -1048,23 +1050,6 @@ static Object* Runtime_RegExpExec(Arguments args) {
} }
static Object* Runtime_RegExpExecGlobal(Arguments args) {
HandleScope scope;
ASSERT(args.length() == 3);
CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]);
Handle<JSRegExp> regexp(raw_regexp);
CONVERT_CHECKED(String, raw_subject, args[1]);
Handle<String> subject(raw_subject);
CONVERT_CHECKED(JSArray, raw_last_match_info, args[2]);
Handle<JSArray> last_match_info(raw_last_match_info);
CHECK(last_match_info->HasFastElements());
Handle<Object> result =
RegExpImpl::ExecGlobal(regexp, subject, last_match_info);
if (result.is_null()) return Failure::Exception();
return *result;
}
static Object* Runtime_MaterializeRegExpLiteral(Arguments args) { static Object* Runtime_MaterializeRegExpLiteral(Arguments args) {
HandleScope scope; HandleScope scope;
ASSERT(args.length() == 4); ASSERT(args.length() == 4);
...@@ -2337,6 +2322,57 @@ static Object* Runtime_StringSlice(Arguments args) { ...@@ -2337,6 +2322,57 @@ static Object* Runtime_StringSlice(Arguments args) {
} }
static Object* Runtime_StringMatch(Arguments args) {
ASSERT_EQ(3, args.length());
CONVERT_ARG_CHECKED(String, subject, 0);
CONVERT_ARG_CHECKED(JSRegExp, regexp, 1);
CONVERT_ARG_CHECKED(JSArray, regexp_info, 2);
HandleScope handles;
Handle<Object> match = RegExpImpl::Exec(regexp, subject, 0, regexp_info);
if (match.is_null()) {
return Failure::Exception();
}
if (match->IsNull()) {
return Heap::null_value();
}
int length = subject->length();
ZoneScope zone_space(DELETE_ON_EXIT);
ZoneList<int> offsets(8);
do {
int start;
int end;
{
AssertNoAllocation no_alloc;
FixedArray* elements = regexp_info->elements();
start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value();
end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value();
}
offsets.Add(start);
offsets.Add(end);
int index = start < end ? end : end + 1;
if (index > length) break;
match = RegExpImpl::Exec(regexp, subject, index, regexp_info);
if (match.is_null()) {
return Failure::Exception();
}
} while (!match->IsNull());
int matches = offsets.length() / 2;
Handle<FixedArray> elements = Factory::NewFixedArray(matches);
for (int i = 0; i < matches ; i++) {
int from = offsets.at(i * 2);
int to = offsets.at(i * 2 + 1);
elements->set(i, *Factory::NewStringSlice(subject, from, to));
}
Handle<JSArray> result = Factory::NewJSArrayWithElements(elements);
result->set_length(Smi::FromInt(matches));
return *result;
}
static Object* Runtime_NumberToRadixString(Arguments args) { static Object* Runtime_NumberToRadixString(Arguments args) {
NoHandleAllocation ha; NoHandleAllocation ha;
ASSERT(args.length() == 2); ASSERT(args.length() == 2);
......
...@@ -138,7 +138,6 @@ namespace v8 { namespace internal { ...@@ -138,7 +138,6 @@ namespace v8 { namespace internal {
/* Regular expressions */ \ /* Regular expressions */ \
F(RegExpCompile, 3) \ F(RegExpCompile, 3) \
F(RegExpExec, 4) \ F(RegExpExec, 4) \
F(RegExpExecGlobal, 3) \
\ \
/* Strings */ \ /* Strings */ \
F(StringCharCodeAt, 2) \ F(StringCharCodeAt, 2) \
...@@ -147,6 +146,7 @@ namespace v8 { namespace internal { ...@@ -147,6 +146,7 @@ namespace v8 { namespace internal {
F(StringLocaleCompare, 2) \ F(StringLocaleCompare, 2) \
F(StringSlice, 3) \ F(StringSlice, 3) \
F(StringReplaceRegExpWithString, 4) \ F(StringReplaceRegExpWithString, 4) \
F(StringMatch, 3) \
\ \
/* Numbers */ \ /* Numbers */ \
F(NumberToRadixString, 2) \ F(NumberToRadixString, 2) \
......
...@@ -157,21 +157,8 @@ function StringMatch(regexp) { ...@@ -157,21 +157,8 @@ function StringMatch(regexp) {
if (!regexp.global) return regexp.exec(subject); if (!regexp.global) return regexp.exec(subject);
%_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]); %_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);
var matches = DoRegExpExecGlobal(regexp, subject); // lastMatchInfo is defined in regexp-delay.js.
return %StringMatch(subject, regexp, lastMatchInfo);
// If the regexp did not match, return null.
if (matches.length == 0) return null;
// Build the result array.
var result = new $Array(match_string);
for (var i = 0; i < matches.length; ++i) {
var matchInfo = matches[i];
var match_string = subject.slice(matchInfo[CAPTURE0],
matchInfo[CAPTURE1]);
result[i] = match_string;
}
return result;
} }
......
...@@ -30,15 +30,25 @@ function CheckMatch(re, str, matches) { ...@@ -30,15 +30,25 @@ function CheckMatch(re, str, matches) {
var result = str.match(re); var result = str.match(re);
if (matches.length > 0) { if (matches.length > 0) {
assertEquals(matches.length, result.length); assertEquals(matches.length, result.length);
for (idx in matches) { var lastExpected;
var lastFrom;
var lastLength;
for (var idx = 0; idx < matches.length; idx++) {
var from = matches[idx][0]; var from = matches[idx][0];
var length = matches[idx][1]; var length = matches[idx][1];
var expected = str.substr(from, length); var expected = str.substr(from, length);
assertEquals(expected, result[idx]); var name = str + "[" + from + ".." + (from+length) + "]";
assertEquals(expected, result[idx], name);
if (re.global || idx == 0) {
lastExpected = expected;
lastFrom = from;
lastLength = length;
}
} }
assertEquals(expected, RegExp.lastMatch); assertEquals(lastExpected, RegExp.lastMatch, "lastMatch");
assertEquals(str.substr(0, from), RegExp.leftContext); assertEquals(str.substr(0, lastFrom), RegExp.leftContext, "leftContext");
assertEquals(str.substr(from + length), RegExp.rightContext); assertEquals(
str.substr(lastFrom + lastLength), RegExp.rightContext, "rightContext");
} else { } else {
assertTrue(result === null); assertTrue(result === null);
} }
...@@ -58,3 +68,10 @@ assertEquals("xxxdefxxxdefxxx", "xxxabcxxxabcxxx".replace(/abc/g, "def")); ...@@ -58,3 +68,10 @@ assertEquals("xxxdefxxxdefxxx", "xxxabcxxxabcxxx".replace(/abc/g, "def"));
assertEquals("o-o-oofo-ofo", "ofooofoooofofooofo".replace(/foo/g, "-")); assertEquals("o-o-oofo-ofo", "ofooofoooofofooofo".replace(/foo/g, "-"));
assertEquals("deded", "deded".replace(/x/g, "-")); assertEquals("deded", "deded".replace(/x/g, "-"));
assertEquals("-a-b-c-d-e-f-", "abcdef".replace(new RegExp("", "g"), "-")); assertEquals("-a-b-c-d-e-f-", "abcdef".replace(new RegExp("", "g"), "-"));
CheckMatch(/a(.)/, "xyzzyabxyzzzyacxyzzy", [[5, 2], [6, 1]]);
CheckMatch(/a(.)/g, "xyzzyabxyzzyacxyzzy", [[5, 2], [12, 2]]);
CheckMatch(/a|(?:)/g, "aba", [[0, 1], [1, 0], [2, 1], [3, 0]]);
CheckMatch(/a|(?:)/g, "baba", [[0, 0], [1, 1], [2, 0], [3, 1], [4, 0]]);
CheckMatch(/a|(?:)/g, "bab", [[0, 0], [1, 1], [2, 0], [3, 0]]);
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment