Commit eb10dc4c authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Use consistent map checks for fast paths

These map checks were implemented for TF code already. This CL makes
sure that parts implemented in C++ follow the same logic, which is:

An object is an unmodified regexp if:
1) it's a receiver,
2) its map is the initial regexp map,
3) its prototype is a receiver,
4) and its prototype's map is the initial prototype's initial map.

We can now be smarter in @@replace and @@split since checking maps
(unlike the previous check of RegExp.prototype.exec) is not observable,
so we can perform fast-path checks at a time of our choosing.

BUG=v8:5339,v8:5434,v8:5123

Review-Url: https://chromiumcodereview.appspot.com/2434983002
Cr-Commit-Position: refs/heads/master@{#40501}
parent da2f6103
......@@ -1290,23 +1290,17 @@ BUILTIN(RegExpPrototypeSplit) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(isolate, string,
Object::ToString(isolate, string_obj));
if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
RETURN_RESULT_OR_FAILURE(
isolate,
RegExpSplit(isolate, Handle<JSRegExp>::cast(recv), string, limit_obj));
}
Handle<JSFunction> regexp_fun = isolate->regexp_function();
Handle<Object> ctor;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, ctor, SpeciesConstructor(isolate, recv, regexp_fun));
if (recv->IsJSRegExp() && *ctor == *regexp_fun) {
Handle<Object> exec;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, exec, JSObject::GetProperty(
recv, factory->NewStringFromAsciiChecked("exec")));
if (RegExpUtils::IsBuiltinExec(exec)) {
RETURN_RESULT_OR_FAILURE(
isolate, RegExpSplit(isolate, Handle<JSRegExp>::cast(recv), string,
limit_obj));
}
}
Handle<Object> flags_obj;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, flags_obj, JSObject::GetProperty(recv, factory->flags_string()));
......
......@@ -136,13 +136,24 @@ Maybe<bool> RegExpUtils::IsRegExp(Isolate* isolate, Handle<Object> object) {
return Just(object->IsJSRegExp());
}
bool RegExpUtils::IsBuiltinExec(Handle<Object> exec) {
if (!exec->IsJSFunction()) return false;
bool RegExpUtils::IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj) {
// TODO(ishell): Update this check once map changes for constant field
// tracking are landing.
Code* code = Handle<JSFunction>::cast(exec)->code();
if (code == nullptr) return false;
if (!obj->IsJSReceiver()) return false;
return (code->builtin_index() == Builtins::kRegExpPrototypeExec);
JSReceiver* recv = JSReceiver::cast(*obj);
// Check the receiver's map.
Handle<JSFunction> regexp_function = isolate->regexp_function();
if (recv->map() != regexp_function->initial_map()) return false;
// Check the receiver's prototype's map.
Object* proto = recv->map()->prototype();
if (!proto->IsJSReceiver()) return false;
Handle<Map> initial_proto_initial_map = isolate->regexp_prototype_map();
return (JSReceiver::cast(proto)->map() == *initial_proto_initial_map);
}
int RegExpUtils::AdvanceStringIndex(Isolate* isolate, Handle<String> string,
......
......@@ -34,8 +34,9 @@ class RegExpUtils : public AllStatic {
// Includes checking of the match property.
static Maybe<bool> IsRegExp(Isolate* isolate, Handle<Object> object);
// Checks whether exec is identical to the initial RegExp.prototype.exec.
static bool IsBuiltinExec(Handle<Object> exec);
// Checks whether the given object is an unmodified JSRegExp instance.
// Neither the object's map, nor its prototype's map may be modified.
static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<Object> obj);
// ES#sec-advancestringindex
// AdvanceStringIndex ( S, index, unicode )
......
......@@ -1500,6 +1500,13 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
string = String::Flatten(string);
// Fast-path for unmodified JSRegExps.
if (RegExpUtils::IsUnmodifiedRegExp(isolate, recv)) {
RETURN_RESULT_OR_FAILURE(
isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
replace_obj));
}
const int length = string->length();
const bool functional_replace = replace_obj->IsCallable();
......@@ -1527,35 +1534,14 @@ RUNTIME_FUNCTION(Runtime_RegExpReplace) {
RegExpUtils::SetLastIndex(isolate, recv, 0));
}
// TODO(jgruber): Here and at all other fast path checks, rely on map checks
// instead.
// TODO(jgruber): We could speed up the fast path by checking flags
// afterwards, but that would violate the spec (which states that exec is
// accessed after global and unicode).
// TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec'
// is actually a data property on RegExp.prototype.
Handle<Object> exec = factory->undefined_value();
if (recv->IsJSRegExp()) {
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, exec, JSObject::GetProperty(
recv, factory->NewStringFromAsciiChecked("exec")));
if (RegExpUtils::IsBuiltinExec(exec)) {
RETURN_RESULT_OR_FAILURE(
isolate, RegExpReplace(isolate, Handle<JSRegExp>::cast(recv), string,
replace_obj));
}
}
Zone zone(isolate->allocator(), ZONE_NAME);
ZoneVector<Handle<Object>> results(&zone);
while (true) {
Handle<Object> result;
ASSIGN_RETURN_FAILURE_ON_EXCEPTION(
isolate, result, RegExpUtils::RegExpExec(isolate, recv, string, exec));
// Ensure exec will be read again on the next loop through.
exec = factory->undefined_value();
isolate, result, RegExpUtils::RegExpExec(isolate, recv, string,
factory->undefined_value()));
if (result->IsNull(isolate)) break;
......
......@@ -45,8 +45,12 @@ assertEquals(4, get_count);
// Overridden flag getters affect string.replace
// TODO(adamk): Add more tests here once we've switched
// to use [[OriginalFlags]] in more cases.
assertEquals(expected, string.replace(r3, "X"));
assertEquals(5, get_count);
// TODO(jgruber): This exact case actually causes an infinite loop in the spec
// (@@replace sees global = true while BuiltinExec sees global = false).
// Comment the test for now and remove / fix once this has been resolved on
// the spec side.
//assertEquals(expected, string.replace(r3, "X"));
//assertEquals(5, get_count);
function testName(name) {
......
......@@ -105,10 +105,6 @@
'built-ins/RegExp/prototype/Symbol.replace/y-init-lastindex': [FAIL],
'built-ins/RegExp/prototype/Symbol.replace/y-set-lastindex': [FAIL],
# https://bugs.chromium.org/p/v8/issues/detail?id=5123
'built-ins/RegExp/prototype/Symbol.replace/coerce-global': [FAIL],
'built-ins/RegExp/prototype/Symbol.replace/coerce-unicode': [FAIL],
###### END REGEXP SUBCLASSING SECTION ######
# https://code.google.com/p/v8/issues/detail?id=4360
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment