Commit 05a55992 authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Port split

This CL ports RegExp.prototype[@@split] to C++. Performance
regressions are expected due to:

* Slow RegExpImpl::Exec implementation instead of RegExpExec
  stub. We should be able to improve this by straight-lining
  RegExpImpl::Exec.
* Slow Factory::NewSubString instead of SubStringStub.
* Slow elements access to lastMatchInfo.

These points will be addressed in a follow-up CL.

BUG=v8:5339

Review-Url: https://codereview.chromium.org/2401643002
Cr-Commit-Position: refs/heads/master@{#40161}
parent 86ec0923
......@@ -1748,6 +1748,13 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
Builtins::kRegExpPrototypeSearch, 1, false);
InstallFunction(prototype, fun, factory->search_symbol(), DONT_ENUM);
}
{
Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.split]"),
Builtins::kRegExpPrototypeSplit, 2, false);
InstallFunction(prototype, fun, factory->split_symbol(), DONT_ENUM);
}
}
{
......
This diff is collapsed.
......@@ -590,6 +590,7 @@ namespace internal {
CPP(RegExpPrototypeSearch) \
CPP(RegExpPrototypeSourceGetter) \
CPP(RegExpPrototypeSpeciesGetter) \
CPP(RegExpPrototypeSplit) \
TFJ(RegExpPrototypeStickyGetter, 1) \
CPP(RegExpPrototypeTest) \
CPP(RegExpPrototypeToString) \
......
......@@ -121,159 +121,6 @@ function RegExpSubclassExec(regexp, string, exec) {
%SetForceInlineFlag(RegExpSubclassExec);
function AtSurrogatePair(subject, index) {
if (index + 1 >= subject.length) return false;
var first = %_StringCharCodeAt(subject, index);
if (first < 0xD800 || first > 0xDBFF) return false;
var second = %_StringCharCodeAt(subject, index + 1);
return second >= 0xDC00 && second <= 0xDFFF;
}
// Fast path implementation of RegExp.prototype[Symbol.split] which
// doesn't properly call the underlying exec, @@species methods
function RegExpSplit(string, limit) {
if (!IS_REGEXP(this)) {
throw %make_type_error(kIncompatibleMethodReceiver,
"RegExp.prototype.@@split", this);
}
var separator = this;
var subject = TO_STRING(string);
limit = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit);
var length = subject.length;
if (limit === 0) return [];
if (length === 0) {
if (DoRegExpExec(separator, subject, 0, 0) !== null) return [];
return [subject];
}
var currentIndex = 0;
var startIndex = 0;
var startMatch = 0;
var result = new InternalArray();
outer_loop:
while (true) {
if (startIndex === length) {
result[result.length] = %_SubString(subject, currentIndex, length);
break;
}
var matchInfo = DoRegExpExec(separator, subject, startIndex);
if (matchInfo === null || length === (startMatch = matchInfo[CAPTURE0])) {
result[result.length] = %_SubString(subject, currentIndex, length);
break;
}
var endIndex = matchInfo[CAPTURE1];
// We ignore a zero-length match at the currentIndex.
if (startIndex === endIndex && endIndex === currentIndex) {
if (REGEXP_UNICODE(this) && AtSurrogatePair(subject, startIndex)) {
startIndex += 2;
} else {
startIndex++;
}
continue;
}
result[result.length] = %_SubString(subject, currentIndex, startMatch);
if (result.length === limit) break;
var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE;
for (var i = REGEXP_FIRST_CAPTURE + 2; i < matchinfo_len; ) {
var start = matchInfo[i++];
var end = matchInfo[i++];
if (end != -1) {
result[result.length] = %_SubString(subject, start, end);
} else {
result[result.length] = UNDEFINED;
}
if (result.length === limit) break outer_loop;
}
startIndex = currentIndex = endIndex;
}
var array_result = [];
%MoveArrayContents(result, array_result);
return array_result;
}
// ES#sec-regexp.prototype-@@split
// RegExp.prototype [ @@split ] ( string, limit )
function RegExpSubclassSplit(string, limit) {
if (!IS_RECEIVER(this)) {
throw %make_type_error(kIncompatibleMethodReceiver,
"RegExp.prototype.@@split", this);
}
string = TO_STRING(string);
var constructor = SpeciesConstructor(this, GlobalRegExp);
var flags = TO_STRING(this.flags);
// TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec'
// is actually a data property on RegExp.prototype.
if (IS_REGEXP(this) && constructor === GlobalRegExp) {
var exec = this.exec;
if (exec === RegExpExecJS) {
return %_Call(RegExpSplit, this, string, limit);
}
}
var unicode = %StringIndexOf(flags, 'u', 0) >= 0;
var sticky = %StringIndexOf(flags, 'y', 0) >= 0;
var newFlags = sticky ? flags : flags + "y";
var splitter = new constructor(this, newFlags);
var array = new GlobalArray();
var arrayIndex = 0;
var lim = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit);
var size = string.length;
var prevStringIndex = 0;
if (lim === 0) return array;
var result;
if (size === 0) {
result = RegExpSubclassExec(splitter, string);
if (IS_NULL(result)) %AddElement(array, 0, string);
return array;
}
var stringIndex = prevStringIndex;
while (stringIndex < size) {
splitter.lastIndex = stringIndex;
result = RegExpSubclassExec(splitter, string);
if (IS_NULL(result)) {
stringIndex += AdvanceStringIndex(string, stringIndex, unicode);
} else {
var end = MinSimple(TO_LENGTH(splitter.lastIndex), size);
if (end === prevStringIndex) {
stringIndex += AdvanceStringIndex(string, stringIndex, unicode);
} else {
%AddElement(
array, arrayIndex,
%_SubString(string, prevStringIndex, stringIndex));
arrayIndex++;
if (arrayIndex === lim) return array;
prevStringIndex = end;
var numberOfCaptures = MaxSimple(TO_LENGTH(result.length), 0);
for (var i = 1; i < numberOfCaptures; i++) {
%AddElement(array, arrayIndex, result[i]);
arrayIndex++;
if (arrayIndex === lim) return array;
}
stringIndex = prevStringIndex;
}
}
}
%AddElement(array, arrayIndex,
%_SubString(string, prevStringIndex, size));
return array;
}
%FunctionRemovePrototype(RegExpSubclassSplit);
// Legacy implementation of RegExp.prototype[Symbol.replace] which
// doesn't properly call the underlying exec method.
......@@ -673,7 +520,6 @@ function RegExpSubclassReplace(string, replace) {
utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
replaceSymbol, RegExpSubclassReplace,
splitSymbol, RegExpSubclassSplit,
]);
%InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment