Commit 80f3c6ce authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Port test, match, and search

This CL ports RegExp.prototype.test, RegExp.prototype.match
and RegExp.prototype.search to C++.

Performance regressions are expected but should be improved
in an upcoming CL.

BUG=v8:5339

Review-Url: https://codereview.chromium.org/2394713003
Cr-Commit-Position: refs/heads/master@{#40122}
parent 640ae3f9
......@@ -1702,8 +1702,12 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
JSObject::AddProperty(prototype, factory->constructor_string(),
regexp_fun, DONT_ENUM);
SimpleInstallFunction(prototype, "exec", Builtins::kRegExpPrototypeExec,
1, true, DONT_ENUM);
{
Handle<JSFunction> fun = SimpleInstallFunction(
prototype, "exec", Builtins::kRegExpPrototypeExec, 1, true,
DONT_ENUM);
native_context()->set_regexp_exec_function(*fun);
}
SimpleInstallGetter(prototype, factory->flags_string(),
Builtins::kRegExpPrototypeFlagsGetter, true);
......@@ -1726,6 +1730,22 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
SimpleInstallFunction(prototype, factory->toString_string(),
Builtins::kRegExpPrototypeToString, 0, false,
DONT_ENUM);
SimpleInstallFunction(prototype, "test", Builtins::kRegExpPrototypeTest,
1, false, DONT_ENUM);
{
Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.match]"),
Builtins::kRegExpPrototypeMatch, 1, false);
InstallFunction(prototype, fun, factory->match_symbol(), DONT_ENUM);
}
{
Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.search]"),
Builtins::kRegExpPrototypeSearch, 1, false);
InstallFunction(prototype, fun, factory->search_symbol(), DONT_ENUM);
}
}
{
......@@ -1812,7 +1832,7 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
// ECMA-262, section 15.10.7.5.
PropertyAttributes writable =
static_cast<PropertyAttributes>(DONT_ENUM | DONT_DELETE);
DataDescriptor field(factory->last_index_string(),
DataDescriptor field(factory->lastIndex_string(),
JSRegExp::kLastIndexFieldIndex, writable,
Representation::Tagged());
initial_map->AppendDescriptor(&field);
......
......@@ -216,7 +216,7 @@ compiler::Node* LoadLastIndex(CodeStubAssembler* a, compiler::Node* context,
{
// Load through the GetProperty stub.
Node* const name =
a->HeapConstant(a->isolate()->factory()->last_index_string());
a->HeapConstant(a->isolate()->factory()->lastIndex_string());
Callable getproperty_callable = CodeFactory::GetProperty(a->isolate());
var_value.Bind(a->CallStub(getproperty_callable, context, regexp, name));
a->Goto(&out);
......@@ -249,7 +249,7 @@ void StoreLastIndex(CodeStubAssembler* a, compiler::Node* context,
// Store through runtime.
// TODO(ishell): Use SetPropertyStub here once available.
Node* const name =
a->HeapConstant(a->isolate()->factory()->last_index_string());
a->HeapConstant(a->isolate()->factory()->lastIndex_string());
Node* const language_mode = a->SmiConstant(Smi::FromInt(STRICT));
a->CallRuntime(Runtime::kSetProperty, context, regexp, name, value,
language_mode);
......@@ -976,5 +976,264 @@ BUILTIN(RegExpRightContextGetter) {
return *isolate->factory()->NewSubString(last_subject, start_index, len);
}
namespace {
V8_INLINE bool HasInitialRegExpMap(Isolate* isolate, Handle<JSReceiver> recv) {
return recv->map() == isolate->regexp_function()->initial_map();
}
} // namespace
// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
// Also takes an optional exec method in case our caller
// has already fetched exec.
MaybeHandle<Object> RegExpExec(Isolate* isolate, Handle<JSReceiver> regexp,
Handle<String> string, Handle<Object> exec) {
if (exec->IsUndefined(isolate)) {
ASSIGN_RETURN_ON_EXCEPTION(
isolate, exec,
Object::GetProperty(
regexp, isolate->factory()->NewStringFromAsciiChecked("exec")),
Object);
}
if (exec->IsCallable()) {
const int argc = 1;
ScopedVector<Handle<Object>> argv(argc);
argv[0] = string;
Handle<Object> result;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, result,
Execution::Call(isolate, exec, regexp, argc, argv.start()), Object);
if (!result->IsJSReceiver() && !result->IsNull(isolate)) {
THROW_NEW_ERROR(isolate,
NewTypeError(MessageTemplate::kInvalidRegExpExecResult),
Object);
}
return result;
}
if (!regexp->IsJSRegExp()) {
THROW_NEW_ERROR(isolate,
NewTypeError(MessageTemplate::kIncompatibleMethodReceiver,
isolate->factory()->NewStringFromAsciiChecked(
"RegExp.prototype.exec"),
regexp),
Object);
}
{
Handle<JSFunction> regexp_exec = isolate->regexp_exec_function();
const int argc = 1;
ScopedVector<Handle<Object>> argv(argc);
argv[0] = string;
return Execution::Call(isolate, exec, regexp_exec, argc, argv.start());
}
}
// ES#sec-regexp.prototype.test
// RegExp.prototype.test ( S )
BUILTIN(RegExpPrototypeTest) {
HandleScope scope(isolate);
CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.test");
Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
Handle<String> string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
Object::ToString(isolate, string_obj));
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
RegExpExec(isolate, recv, string, isolate->factory()->undefined_value()));
return isolate->heap()->ToBoolean(!result->IsNull(isolate));
}
namespace {
// ES#sec-advancestringindex
// AdvanceStringIndex ( S, index, unicode )
int AdvanceStringIndex(Isolate* isolate, Handle<String> string, int index,
bool unicode) {
int increment = 1;
if (unicode && index < string->length()) {
const uint16_t first = string->Get(index);
if (first >= 0xD800 && first <= 0xDBFF && string->length() > index + 1) {
const uint16_t second = string->Get(index + 1);
if (second >= 0xDC00 && second <= 0xDFFF) {
increment = 2;
}
}
}
return increment;
}
MaybeHandle<Object> SetLastIndex(Isolate* isolate, Handle<JSReceiver> recv,
int value) {
if (HasInitialRegExpMap(isolate, recv)) {
JSRegExp::cast(*recv)->SetLastIndex(value);
return recv;
} else {
return Object::SetProperty(recv, isolate->factory()->lastIndex_string(),
handle(Smi::FromInt(value), isolate), STRICT);
}
}
MaybeHandle<Object> GetLastIndex(Isolate* isolate, Handle<JSReceiver> recv) {
if (HasInitialRegExpMap(isolate, recv)) {
return handle(JSRegExp::cast(*recv)->LastIndex(), isolate);
} else {
return Object::GetProperty(recv, isolate->factory()->lastIndex_string());
}
}
MaybeHandle<Object> SetAdvancedStringIndex(Isolate* isolate,
Handle<JSReceiver> regexp,
Handle<String> string,
bool unicode) {
Handle<Object> last_index_obj;
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
GetLastIndex(isolate, regexp), Object);
ASSIGN_RETURN_ON_EXCEPTION(isolate, last_index_obj,
Object::ToLength(isolate, last_index_obj), Object);
const int last_index = Handle<Smi>::cast(last_index_obj)->value();
const int new_last_index =
last_index + AdvanceStringIndex(isolate, string, last_index, unicode);
return SetLastIndex(isolate, regexp, new_last_index);
}
} // namespace
// ES#sec-regexp.prototype-@@match
// RegExp.prototype [ @@match ] ( string )
BUILTIN(RegExpPrototypeMatch) {
HandleScope scope(isolate);
CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@match");
Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
Handle<String> string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
Object::ToString(isolate, string_obj));
Handle<Object> global_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, global_obj,
JSReceiver::GetProperty(recv, isolate->factory()->global_string()));
const bool global = global_obj->BooleanValue();
if (!global) {
RETURN_RESULT_OR_FAILURE(isolate,
RegExpExec(isolate, recv, string,
isolate->factory()->undefined_value()));
}
Handle<Object> unicode_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, unicode_obj,
JSReceiver::GetProperty(recv, isolate->factory()->unicode_string()));
const bool unicode = unicode_obj->BooleanValue();
RETURN_FAILURE_ON_EXCEPTION(isolate, SetLastIndex(isolate, recv, 0));
static const int kInitialArraySize = 8;
Handle<FixedArray> elems =
isolate->factory()->NewFixedArrayWithHoles(kInitialArraySize);
int n = 0;
for (;; n++) {
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpExec(isolate, recv, string,
isolate->factory()->undefined_value()));
if (result->IsNull(isolate)) {
if (n == 0) return isolate->heap()->null_value();
break;
}
Handle<Object> match_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match_obj,
Object::GetElement(isolate, result, 0));
Handle<String> match;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, match,
Object::ToString(isolate, match_obj));
elems = FixedArray::SetAndGrow(elems, n, match);
if (match->length() == 0) {
RETURN_FAILURE_ON_EXCEPTION(
isolate, SetAdvancedStringIndex(isolate, recv, string, unicode));
}
}
elems->Shrink(n);
return *isolate->factory()->NewJSArrayWithElements(elems);
}
// ES#sec-regexp.prototype-@@search
// RegExp.prototype [ @@search ] ( string )
BUILTIN(RegExpPrototypeSearch) {
HandleScope scope(isolate);
CHECK_RECEIVER(JSReceiver, recv, "RegExp.prototype.@@search");
Handle<Object> string_obj = args.atOrUndefined(isolate, 1);
Handle<String> string;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
Object::ToString(isolate, string_obj));
Handle<Object> previous_last_index_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, previous_last_index_obj,
GetLastIndex(isolate, recv));
if (!previous_last_index_obj->IsSmi() ||
Smi::cast(*previous_last_index_obj)->value() != 0) {
RETURN_FAILURE_ON_EXCEPTION(isolate, SetLastIndex(isolate, recv, 0));
}
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result,
RegExpExec(isolate, recv, string, isolate->factory()->undefined_value()));
Handle<Object> current_last_index_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, current_last_index_obj,
GetLastIndex(isolate, recv));
Maybe<bool> is_last_index_unchanged =
Object::Equals(current_last_index_obj, previous_last_index_obj);
if (is_last_index_unchanged.IsNothing()) return isolate->pending_exception();
if (!is_last_index_unchanged.FromJust()) {
if (previous_last_index_obj->IsSmi()) {
RETURN_FAILURE_ON_EXCEPTION(
isolate, SetLastIndex(isolate, recv,
Smi::cast(*previous_last_index_obj)->value()));
} else {
RETURN_FAILURE_ON_EXCEPTION(
isolate,
Object::SetProperty(recv, isolate->factory()->lastIndex_string(),
previous_last_index_obj, STRICT));
}
}
if (result->IsNull(isolate)) return Smi::FromInt(-1);
RETURN_RESULT_OR_FAILURE(
isolate, Object::GetProperty(result, isolate->factory()->index_string()));
}
} // namespace internal
} // namespace v8
......@@ -583,10 +583,13 @@ namespace internal {
TFJ(RegExpPrototypeFlagsGetter, 1) \
TFJ(RegExpPrototypeGlobalGetter, 1) \
TFJ(RegExpPrototypeIgnoreCaseGetter, 1) \
CPP(RegExpPrototypeMatch) \
TFJ(RegExpPrototypeMultilineGetter, 1) \
CPP(RegExpPrototypeSearch) \
CPP(RegExpPrototypeSourceGetter) \
CPP(RegExpPrototypeSpeciesGetter) \
TFJ(RegExpPrototypeStickyGetter, 1) \
CPP(RegExpPrototypeTest) \
CPP(RegExpPrototypeToString) \
TFJ(RegExpPrototypeUnicodeGetter, 1) \
CPP(RegExpRightContextGetter) \
......
......@@ -198,6 +198,7 @@ enum ContextLookupFlags {
V(PROXY_FUNCTION_INDEX, JSFunction, proxy_function) \
V(PROXY_FUNCTION_MAP_INDEX, Map, proxy_function_map) \
V(PROXY_MAP_INDEX, Map, proxy_map) \
V(REGEXP_EXEC_FUNCTION_INDEX, JSFunction, regexp_exec_function) \
V(REGEXP_FUNCTION_INDEX, JSFunction, regexp_function) \
V(REGEXP_RESULT_MAP_INDEX, Map, regexp_result_map) \
V(SCRIPT_CONTEXT_TABLE_INDEX, ScriptContextTable, script_context_table) \
......
......@@ -95,7 +95,7 @@
V(isView_string, "isView") \
V(KeyedLoadMonomorphic_string, "KeyedLoadMonomorphic") \
V(KeyedStoreMonomorphic_string, "KeyedStoreMonomorphic") \
V(last_index_string, "lastIndex") \
V(lastIndex_string, "lastIndex") \
V(length_string, "length") \
V(line_string, "line") \
V(literal_string, "literal") \
......
......@@ -103,7 +103,6 @@ macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
endmacro
// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
// Also takes an optional exec method in case our caller
// has already fetched exec.
......@@ -123,19 +122,6 @@ function RegExpSubclassExec(regexp, string, exec) {
%SetForceInlineFlag(RegExpSubclassExec);
// ES#sec-regexp.prototype.test RegExp.prototype.test ( S )
function RegExpSubclassTest(string) {
if (!IS_RECEIVER(this)) {
throw %make_type_error(kIncompatibleMethodReceiver,
'RegExp.prototype.test', this);
}
string = TO_STRING(string);
var match = RegExpSubclassExec(this, string);
return !IS_NULL(match);
}
%FunctionRemovePrototype(RegExpSubclassTest);
function AtSurrogatePair(subject, index) {
if (index + 1 >= subject.length) return false;
var first = %_StringCharCodeAt(subject, index);
......@@ -289,39 +275,6 @@ function RegExpSubclassSplit(string, limit) {
%FunctionRemovePrototype(RegExpSubclassSplit);
// ES#sec-regexp.prototype-@@match
// RegExp.prototype [ @@match ] ( string )
function RegExpSubclassMatch(string) {
if (!IS_RECEIVER(this)) {
throw %make_type_error(kIncompatibleMethodReceiver,
"RegExp.prototype.@@match", this);
}
string = TO_STRING(string);
var global = this.global;
if (!global) return RegExpSubclassExec(this, string);
var unicode = this.unicode;
this.lastIndex = 0;
var array = new InternalArray();
var n = 0;
var result;
while (true) {
result = RegExpSubclassExec(this, string);
if (IS_NULL(result)) {
if (n === 0) return null;
break;
}
var matchStr = TO_STRING(result[0]);
array[n] = matchStr;
if (matchStr === "") SetAdvancedStringIndex(this, string, unicode);
n++;
}
var resultArray = [];
%MoveArrayContents(array, resultArray);
return resultArray;
}
%FunctionRemovePrototype(RegExpSubclassMatch);
// Legacy implementation of RegExp.prototype[Symbol.replace] which
// doesn't properly call the underlying exec method.
......@@ -716,32 +669,11 @@ function RegExpSubclassReplace(string, replace) {
%FunctionRemovePrototype(RegExpSubclassReplace);
// ES#sec-regexp.prototype-@@search
// RegExp.prototype [ @@search ] ( string )
function RegExpSubclassSearch(string) {
if (!IS_RECEIVER(this)) {
throw %make_type_error(kIncompatibleMethodReceiver,
"RegExp.prototype.@@search", this);
}
string = TO_STRING(string);
var previousLastIndex = this.lastIndex;
if (previousLastIndex != 0) this.lastIndex = 0;
var result = RegExpSubclassExec(this, string);
var currentLastIndex = this.lastIndex;
if (currentLastIndex != previousLastIndex) this.lastIndex = previousLastIndex;
if (IS_NULL(result)) return -1;
return result.index;
}
%FunctionRemovePrototype(RegExpSubclassSearch);
// -------------------------------------------------------------------
utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
"test", RegExpSubclassTest,
matchSymbol, RegExpSubclassMatch,
replaceSymbol, RegExpSubclassReplace,
searchSymbol, RegExpSubclassSearch,
splitSymbol, RegExpSubclassSplit,
]);
......
......@@ -7030,6 +7030,18 @@ void JSRegExp::SetDataAt(int index, Object* value) {
FixedArray::cast(data())->set(index, value);
}
void JSRegExp::SetLastIndex(int index) {
static const int offset =
kSize + JSRegExp::kLastIndexFieldIndex * kPointerSize;
Smi* value = Smi::FromInt(index);
WRITE_FIELD(this, offset, value);
}
Object* JSRegExp::LastIndex() {
static const int offset =
kSize + JSRegExp::kLastIndexFieldIndex * kPointerSize;
return READ_FIELD(this, offset);
}
ElementsKind JSObject::GetElementsKind() {
ElementsKind kind = map()->elements_kind();
......
......@@ -16506,7 +16506,7 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
} else {
// Map has changed, so use generic, but slower, method.
RETURN_ON_EXCEPTION(isolate, JSReceiver::SetProperty(
regexp, factory->last_index_string(),
regexp, factory->lastIndex_string(),
Handle<Smi>(Smi::kZero, isolate), STRICT),
JSRegExp);
}
......
......@@ -8551,6 +8551,9 @@ class JSRegExp: public JSObject {
// Set implementation data after the object has been prepared.
inline void SetDataAt(int index, Object* value);
inline void SetLastIndex(int index);
inline Object* LastIndex();
static int code_index(bool is_latin1) {
if (is_latin1) {
return kIrregexpLatin1CodeIndex;
......
......@@ -78,7 +78,7 @@ bytecodes: [
/* 15 S> */ B(LdrUndefined), R(0),
B(CreateArrayLiteral), U8(0), U8(0), U8(9),
B(Star), R(1),
B(CallJSRuntime), U8(142), R(0), U8(2),
B(CallJSRuntime), U8(143), R(0), U8(2),
/* 44 S> */ B(Return),
]
constant pool: [
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment