Commit 33a4faa4 authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Port RegExp.prototype[@@replace]

This moves the implementation of @@replace from regexp.js to builtins-regexp.cc
(the TurboFan fast path) and runtime-regexp.cc (slow path). The fast path
handles all cases in which the regexp itself is an unmodified JSRegExp
instance, the given 'replace' argument is not callable and does not contain any
'$' characters (i.e. we are doing a string replacement).

BUG=v8:5339

Review-Url: https://codereview.chromium.org/2398423002
Cr-Commit-Position: refs/heads/master@{#40253}
parent 897d89ee
......@@ -1700,7 +1700,6 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
shared->set_instance_class_name(isolate->heap()->RegExp_string());
shared->DontAdaptArguments();
shared->set_length(2);
{
// RegExp.prototype setup.
......@@ -1746,6 +1745,13 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
InstallFunction(prototype, fun, factory->match_symbol(), DONT_ENUM);
}
{
Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.replace]"),
Builtins::kRegExpPrototypeReplace, 2, true);
InstallFunction(prototype, fun, factory->replace_symbol(), DONT_ENUM);
}
{
Handle<JSFunction> fun = SimpleCreateFunction(
isolate, factory->InternalizeUtf8String("[Symbol.search]"),
......@@ -1759,6 +1765,10 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
Builtins::kRegExpPrototypeSplit, 2, false);
InstallFunction(prototype, fun, factory->split_symbol(), DONT_ENUM);
}
// Store the initial RegExp.prototype map. This is used in fast-path
// checks. Do not alter the prototype after this point.
isolate->native_context()->set_regexp_prototype_map(prototype->map());
}
{
......
This diff is collapsed.
......@@ -587,6 +587,7 @@ namespace internal {
TFJ(RegExpPrototypeIgnoreCaseGetter, 1) \
CPP(RegExpPrototypeMatch) \
TFJ(RegExpPrototypeMultilineGetter, 1) \
TFJ(RegExpPrototypeReplace, 3) \
CPP(RegExpPrototypeSearch) \
CPP(RegExpPrototypeSourceGetter) \
CPP(RegExpPrototypeSpeciesGetter) \
......
This diff is collapsed.
......@@ -360,6 +360,19 @@ class CodeStubAssembler : public compiler::CodeAssembler {
compiler::Node* parent,
compiler::Node* offset);
// Allocate a one-byte ConsString with the given length, first and second
// parts. |length| is expected to be tagged, and |first| and |second| are
// expected to be one-byte strings.
compiler::Node* AllocateOneByteConsString(compiler::Node* length,
compiler::Node* first,
compiler::Node* second);
// Allocate a two-byte ConsString with the given length, first and second
// parts. |length| is expected to be tagged, and |first| and |second| are
// expected to be two-byte strings.
compiler::Node* AllocateTwoByteConsString(compiler::Node* length,
compiler::Node* first,
compiler::Node* second);
// Allocate a RegExpResult with the given length (the number of captures,
// including the match itself), index (the index where the match starts),
// and input string. |length| and |index| are expected to be tagged, and
......@@ -420,12 +433,16 @@ class CodeStubAssembler : public compiler::CodeAssembler {
ParameterMode mode = INTEGER_PARAMETERS);
// Copies |character_count| elements from |from_string| to |to_string|
// starting at the |from_index|'th character. |from_index| and
// |character_count| must be Smis s.t.
// 0 <= |from_index| <= |from_index| + |character_count| < from_string.length.
// starting at the |from_index|'th character. |from_string| and |to_string|
// must be either both one-byte strings or both two-byte strings.
// |from_index|, |to_index| and |character_count| must be Smis s.t.
// 0 <= |from_index| <= |from_index| + |character_count| <= from_string.length
// and
// 0 <= |to_index| <= |to_index| + |character_count| <= to_string.length.
void CopyStringCharacters(compiler::Node* from_string,
compiler::Node* to_string,
compiler::Node* from_index,
compiler::Node* to_index,
compiler::Node* character_count,
String::Encoding encoding);
......@@ -516,6 +533,19 @@ class CodeStubAssembler : public compiler::CodeAssembler {
compiler::Node* SubString(compiler::Node* context, compiler::Node* string,
compiler::Node* from, compiler::Node* to);
// Return a new string object produced by concatenating |first| with |second|.
compiler::Node* StringConcat(compiler::Node* context, compiler::Node* first,
compiler::Node* second);
// Return the first index >= {from} at which {needle_char} was found in
// {string}, or -1 if such an index does not exist. The returned value is
// a Smi, {string} is expected to be a String, {needle_char} is an intptr,
// and {from} is expected to be tagged.
compiler::Node* StringIndexOfChar(compiler::Node* context,
compiler::Node* string,
compiler::Node* needle_char,
compiler::Node* from);
compiler::Node* StringFromCodePoint(compiler::Node* codepoint,
UnicodeEncoding encoding);
......
......@@ -203,6 +203,7 @@ enum ContextLookupFlags {
V(PROXY_MAP_INDEX, Map, proxy_map) \
V(REGEXP_EXEC_FUNCTION_INDEX, JSFunction, regexp_exec_function) \
V(REGEXP_FUNCTION_INDEX, JSFunction, regexp_function) \
V(REGEXP_PROTOTYPE_MAP_INDEX, Map, regexp_prototype_map) \
V(REGEXP_RESULT_MAP_INDEX, Map, regexp_result_map) \
V(SCRIPT_CONTEXT_TABLE_INDEX, ScriptContextTable, script_context_table) \
V(SCRIPT_FUNCTION_INDEX, JSFunction, script_function) \
......
......@@ -208,7 +208,6 @@ function PostNatives(utils) {
"promise_state_symbol",
"reflect_apply",
"reflect_construct",
"regexp_flags_symbol",
"to_string_tag_symbol",
"object_to_string",
"species_symbol",
......
This diff is collapsed.
......@@ -11652,6 +11652,101 @@ int String::IndexOf(Isolate* isolate, Handle<String> receiver,
start_index);
}
MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
Handle<String> replacement) {
Factory* factory = isolate->factory();
const int replacement_length = replacement->length();
const int captures_length = match->CaptureCount();
replacement = String::Flatten(replacement);
Handle<String> dollar_string =
factory->LookupSingleCharacterStringFromCode('$');
int next = String::IndexOf(isolate, replacement, dollar_string, 0);
if (next < 0) {
return replacement;
}
IncrementalStringBuilder builder(isolate);
if (next > 0) {
builder.AppendString(factory->NewSubString(replacement, 0, next));
}
while (true) {
int pos = next + 1;
if (pos < replacement_length) {
const uint16_t peek = replacement->Get(pos);
if (peek == '$') { // $$
pos++;
builder.AppendCharacter('$');
} else if (peek == '&') { // $& - match
pos++;
builder.AppendString(match->GetMatch());
} else if (peek == '`') { // $` - prefix
pos++;
builder.AppendString(match->GetPrefix());
} else if (peek == '\'') { // $' - suffix
pos++;
builder.AppendString(match->GetSuffix());
} else if (peek >= '0' && peek <= '9') {
// Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
int scaled_index = (peek - '0');
int advance = 1;
if (pos + 1 < replacement_length) {
const uint16_t next_peek = replacement->Get(pos + 1);
if (next_peek >= '0' && next_peek <= '9') {
const int new_scaled_index = scaled_index * 10 + (next_peek - '0');
if (new_scaled_index < captures_length) {
scaled_index = new_scaled_index;
advance = 2;
}
}
}
if (scaled_index != 0 && scaled_index < captures_length) {
bool capture_exists;
Handle<String> capture;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, capture,
match->GetCapture(scaled_index, &capture_exists), String);
if (capture_exists) builder.AppendString(capture);
pos += advance;
} else {
builder.AppendCharacter('$');
}
} else {
builder.AppendCharacter('$');
}
} else {
builder.AppendCharacter('$');
}
// Go the the next $ in the replacement.
next = String::IndexOf(isolate, replacement, dollar_string, pos);
// Return if there are no more $ characters in the replacement. If we
// haven't reached the end, we need to append the suffix.
if (next < 0) {
if (pos < replacement_length) {
builder.AppendString(
factory->NewSubString(replacement, pos, replacement_length));
}
return builder.Finish();
}
// Append substring between the previous and the next $ character.
if (next > pos) {
builder.AppendString(factory->NewSubString(replacement, pos, next));
}
}
UNREACHABLE();
return MaybeHandle<String>();
}
namespace { // for String.Prototype.lastIndexOf
template <typename schar, typename pchar>
......
......@@ -9484,6 +9484,25 @@ class String: public Name {
static Object* LastIndexOf(Isolate* isolate, Handle<Object> receiver,
Handle<Object> search, Handle<Object> position);
// Encapsulates logic related to a match and its capture groups as required
// by GetSubstitution.
class Match {
public:
virtual Handle<String> GetMatch() = 0;
virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
virtual Handle<String> GetPrefix() = 0;
virtual Handle<String> GetSuffix() = 0;
virtual int CaptureCount() = 0;
virtual ~Match() {}
};
// ES#sec-getsubstitution
// GetSubstitution(matched, str, position, captures, replacement)
// Expand the $-expressions in the string and return a new string with
// the result.
MUST_USE_RESULT static MaybeHandle<String> GetSubstitution(
Isolate* isolate, Match* match, Handle<String> replacement);
// String equality operations.
inline bool Equals(String* other);
inline static bool Equals(Handle<String> one, Handle<String> two);
......
This diff is collapsed.
......@@ -461,10 +461,10 @@ namespace internal {
F(StringSplit, 3, 1) \
F(RegExpExec, 4, 1) \
F(RegExpFlags, 1, 1) \
F(RegExpReplace, 3, 1) \
F(RegExpSource, 1, 1) \
F(RegExpConstructResult, 3, 1) \
F(RegExpInitializeAndCompile, 3, 1) \
F(RegExpExecMultiple, 4, 1) \
F(RegExpExecReThrow, 4, 1) \
F(IsRegExp, 1, 1)
......
......@@ -174,12 +174,6 @@ class FixedArrayBuilder {
int capacity() { return array_->length(); }
Handle<JSArray> ToJSArray(Handle<JSArray> target_array) {
JSArray::SetContent(target_array, array_);
target_array->set_length(Smi::FromInt(length_));
return target_array;
}
private:
Handle<FixedArray> array_;
......
......@@ -78,7 +78,7 @@ bytecodes: [
/* 15 S> */ B(LdrUndefined), R(0),
B(CreateArrayLiteral), U8(0), U8(0), U8(9),
B(Star), R(1),
B(CallJSRuntime), U8(145), R(0), U8(2),
B(CallJSRuntime), U8(146), R(0), U8(2),
/* 44 S> */ B(Return),
]
constant pool: [
......
......@@ -731,3 +731,11 @@ assertEquals(["acbc", "c", "c"], /a(.\2)b(\1)/.exec("aabcacbc"));
// \u{daff}\u{e000} is not a surrogate pair, while \u{daff}\u{dfff} is.
assertEquals(["\u{daff}", "\u{e000}"], "\u{daff}\u{e000}".split(/[a-z]{0,1}/u));
assertEquals(["\u{daff}\u{dfff}"], "\u{daff}\u{dfff}".split(/[a-z]{0,1}/u));
// Test that changing a property on RegExp.prototype results in us taking the
// slow path, which executes RegExp.prototype.exec instead of our
// RegExpExecStub.
const RegExpPrototypeExec = RegExp.prototype.exec;
RegExp.prototype.exec = function() { throw new Error(); }
assertThrows(() => "abc".replace(/./, ""));
RegExp.prototype.exec = RegExpPrototypeExec;
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment