Commit 9403edfa authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Named capture support for string replacements

This implements support for named captures in
RegExp.prototype[@@replace] for when the replaceValue is not callable.

Named captures can be referenced from replacement strings by using the
"$<name>" syntax. A couple of examples:

let re = /(?<fst>.)(?<snd>.)/u;
"abcd".replace(re, "$<snd>$<fst>")  // "bacd"
"abcd".replace(re, "$2$1")     // "bacd" (numbered refs work as always)
"abcd".replace(re, "$<snd")    // SyntaxError (unterminated named ref)
"abcd".replace(re, "$<42$1>")  // "cd" (invalid name)
"abcd".replace(re, "$<thd>")   // "cd" (non-existent name)
"abcd".replace(/(?<fst>.)|(?<snd>.)/u, "$<snd>")  // "cd" (non-matched capture)

Support is currently behind the --harmony-regexp-named-captures flag.

BUG=v8:5437

Review-Url: https://codereview.chromium.org/2775303002
Cr-Original-Commit-Position: refs/heads/master@{#44171}
Committed: https://chromium.googlesource.com/v8/v8/+/17f13863b64b25eccf565e0aa9c4c441f0562b84
Review-Url: https://codereview.chromium.org/2775303002
Cr-Commit-Position: refs/heads/master@{#44182}
parent b95b6362
......@@ -456,6 +456,7 @@ class ErrorUtils : public AllStatic {
T(ReduceNoInitial, "Reduce of empty array with no initial value") \
T(RegExpFlags, \
"Cannot supply flags when constructing one RegExp from another") \
T(RegExpInvalidReplaceString, "Invalid replacement string: '%'") \
T(RegExpNonObject, "% getter called on non-object %") \
T(RegExpNonRegExp, "% getter called on non-RegExp object") \
T(ReinitializeIntl, "Trying to re-initialize % object.") \
......
......@@ -11403,6 +11403,8 @@ int String::IndexOf(Isolate* isolate, Handle<String> receiver,
MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
Handle<String> replacement) {
DCHECK_IMPLIES(match->HasNamedCaptures(), FLAG_harmony_regexp_named_captures);
Factory* factory = isolate->factory();
const int replacement_length = replacement->length();
......@@ -11489,6 +11491,37 @@ MaybeHandle<String> String::GetSubstitution(Isolate* isolate, Match* match,
continue_from_ix = peek_ix + advance;
break;
}
case '<': { // $<name> - named capture
if (!match->HasNamedCaptures()) {
builder.AppendCharacter('$');
continue_from_ix = peek_ix;
break;
}
Handle<String> bracket_string =
factory->LookupSingleCharacterStringFromCode('>');
const int closing_bracket_ix =
String::IndexOf(isolate, replacement, bracket_string, peek_ix + 1);
if (closing_bracket_ix == -1) {
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kRegExpInvalidReplaceString,
replacement),
String);
}
Handle<String> capture_name =
factory->NewSubString(replacement, peek_ix + 1, closing_bracket_ix);
bool capture_exists;
Handle<String> capture;
ASSIGN_RETURN_ON_EXCEPTION(
isolate, capture,
match->GetNamedCapture(capture_name, &capture_exists), String);
if (capture_exists) builder.AppendString(capture);
continue_from_ix = closing_bracket_ix + 1;
break;
}
default:
builder.AppendCharacter('$');
continue_from_ix = peek_ix;
......@@ -11659,6 +11692,15 @@ bool String::IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match) {
return (allow_prefix_match || i == slen) && remaining_in_str == 0;
}
template <>
bool String::IsEqualTo(Vector<const uint8_t> str) {
return IsOneByteEqualTo(str);
}
template <>
bool String::IsEqualTo(Vector<const uc16> str) {
return IsTwoByteEqualTo(str);
}
bool String::IsOneByteEqualTo(Vector<const uint8_t> str) {
int slen = length();
......
......@@ -8269,6 +8269,7 @@ class JSRegExp: public JSObject {
Handle<String> flags_string);
inline Type TypeTag();
// Number of captures (without the match itself).
inline int CaptureCount();
inline Flags GetFlags();
inline String* Pattern();
......@@ -8341,7 +8342,7 @@ class JSRegExp: public JSObject {
// Number of captures in the compiled regexp.
static const int kIrregexpCaptureCountIndex = kDataIndex + 5;
// Maps names of named capture groups (at indices 2i) to their corresponding
// capture group indices (at indices 2i + 1).
// (1-based) capture group indices (at indices 2i + 1).
static const int kIrregexpCaptureNameMapIndex = kDataIndex + 6;
static const int kIrregexpDataSize = kIrregexpCaptureNameMapIndex + 1;
......@@ -9196,10 +9197,15 @@ class String: public Name {
class Match {
public:
virtual Handle<String> GetMatch() = 0;
virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
virtual Handle<String> GetPrefix() = 0;
virtual Handle<String> GetSuffix() = 0;
virtual int CaptureCount() = 0;
virtual bool HasNamedCaptures() = 0;
virtual MaybeHandle<String> GetCapture(int i, bool* capture_exists) = 0;
virtual MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) = 0;
virtual ~Match() {}
};
......@@ -9214,6 +9220,11 @@ class String: public Name {
inline bool Equals(String* other);
inline static bool Equals(Handle<String> one, Handle<String> two);
bool IsUtf8EqualTo(Vector<const char> str, bool allow_prefix_match = false);
// Dispatches to Is{One,Two}ByteEqualTo.
template <typename Char>
bool IsEqualTo(Vector<const Char> str);
bool IsOneByteEqualTo(Vector<const uint8_t> str);
bool IsTwoByteEqualTo(Vector<const uc16> str);
......
This diff is collapsed.
......@@ -31,13 +31,20 @@ RUNTIME_FUNCTION(Runtime_GetSubstitution) {
: match_(match), prefix_(prefix), suffix_(suffix) {}
Handle<String> GetMatch() override { return match_; }
Handle<String> GetPrefix() override { return prefix_; }
Handle<String> GetSuffix() override { return suffix_; }
int CaptureCount() override { return 0; }
bool HasNamedCaptures() override { return false; }
MaybeHandle<String> GetCapture(int i, bool* capture_exists) override {
*capture_exists = false;
return match_; // Return arbitrary string handle.
}
Handle<String> GetPrefix() override { return prefix_; }
Handle<String> GetSuffix() override { return suffix_; }
int CaptureCount() override { return 0; }
MaybeHandle<String> GetNamedCapture(Handle<String> name,
bool* capture_exists) override {
UNREACHABLE();
return MaybeHandle<String>();
}
private:
Handle<String> match_, prefix_, suffix_;
......
......@@ -217,7 +217,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(12),
B(LdaConstant), U8(11),
B(Star), R(13),
......@@ -701,7 +701,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(12),
B(LdaConstant), U8(11),
B(Star), R(13),
......@@ -1219,7 +1219,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(12),
B(LdaConstant), U8(11),
B(Star), R(13),
......@@ -1627,7 +1627,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(11),
B(LdaConstant), U8(10),
B(Star), R(12),
......
......@@ -85,7 +85,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(12),
B(LdaConstant), U8(8),
B(Star), R(13),
......@@ -226,7 +226,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(13),
B(LdaConstant), U8(8),
B(Star), R(14),
......@@ -380,7 +380,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(12),
B(LdaConstant), U8(8),
B(Star), R(13),
......@@ -524,7 +524,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(11),
B(LdaConstant), U8(10),
B(Star), R(12),
......
......@@ -493,7 +493,7 @@ bytecodes: [
B(TestTypeOf), U8(5),
B(JumpIfFalse), U8(4),
B(Jump), U8(18),
B(Wide), B(LdaSmi), I16(129),
B(Wide), B(LdaSmi), I16(130),
B(Star), R(11),
B(LdaConstant), U8(10),
B(Star), R(12),
......
......@@ -208,3 +208,59 @@ function toSlowMode(re) {
});
assertEquals("bacd", result);
}
// @@replace with a string replacement argument (no named captures).
{
let re = /(.)(.)/u;
assertEquals("$<snd>$<fst>cd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertEquals("$<sndcd", "abcd".replace(re, "$<snd"));
assertEquals("$<42a>cd", "abcd".replace(re, "$<42$1>"));
assertEquals("$<thd>cd", "abcd".replace(re, "$<thd>"));
assertEquals("$<a>cd", "abcd".replace(re, "$<$1>"));
}
// @@replace with a string replacement argument (global, named captures).
{
let re = /(?<fst>.)(?<snd>.)/gu;
assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("badc", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("", "abcd".replace(re, "$<42$1>"));
assertEquals("", "abcd".replace(re, "$<thd>"));
assertEquals("", "abcd".replace(re, "$<$1>"));
}
// @@replace with a string replacement argument (non-global, named captures).
{
let re = /(?<fst>.)(?<snd>.)/u;
assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("cd", "abcd".replace(re, "$<42$1>"));
assertEquals("cd", "abcd".replace(re, "$<thd>"));
assertEquals("cd", "abcd".replace(re, "$<$1>"));
}
// @@replace with a string replacement argument (slow, global, named captures).
{
let re = toSlowMode(/(?<fst>.)(?<snd>.)/gu);
assertEquals("badc", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("badc", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("", "abcd".replace(re, "$<42$1>"));
assertEquals("", "abcd".replace(re, "$<thd>"));
assertEquals("", "abcd".replace(re, "$<$1>"));
}
// @@replace with a string replacement argument (slow, non-global,
// named captures).
{
let re = toSlowMode(/(?<fst>.)(?<snd>.)/u);
assertEquals("bacd", "abcd".replace(re, "$<snd>$<fst>"));
assertEquals("bacd", "abcd".replace(re, "$2$1"));
assertThrows(() => "abcd".replace(re, "$<snd"), SyntaxError);
assertEquals("cd", "abcd".replace(re, "$<42$1>"));
assertEquals("cd", "abcd".replace(re, "$<thd>"));
assertEquals("cd", "abcd".replace(re, "$<$1>"));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment