Commit 98b8ca89 authored by Martin Bidlingmaier's avatar Martin Bidlingmaier Committed by Commit Bot

[regexp] Support capture groups in experimental engine

This commit adds support for capture groups (as in e.g. /x(123|abc)y/)
in the experimental regexp engine.  Now every InterpreterThread owns a
register array containing (sub)match boundaries. There is a new
instruction to record the current input index in some register.

Submatches in quantifier bodies should be reported only if they occur
during the last repetition.  Thus we reset those registers before
attempting to match the body of a quantifier.  This is implemented with
another new instruction.

Because of concerns for the growing sizeof the NfaInterpreter object
(which is allocated on the stack), this commit replaces the
`SmallVector` members of the NfaInterpreter with zone-allocated arrays.
Register arrays, which for a fixed regexp are all the same size, are
allocated with a RecyclingZoneAllocator for cheap memory reclamation via
a linked list of equally-sized free blocks.

Possible optimizations for management of register array memory:
1. If there are few register per thread, then it is likely faster to
   store them inline in the InterpreterThread struct.
2. re2 implements copy-on-write:  InterpreterThreads can share the same
   register array. If a thread attempts to write to shared register
   array, the register array is cloned first.
3. The register at index 1 contains the end of the match; this is only
   written to right before an ACCEPT statement.  We could make ACCEPT
   equivalent to what's currently CAPTURE 1 followed by ACCEPT.  We
   could then save the memory for register 1 for threads that haven't
   finished yet.  This is particularly interesting if now optimization 1
   kicks in.

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: I2c0503206ce331e13ac9912945bb66736d740197
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2390770
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69929}
parent 10ffb113
......@@ -257,11 +257,15 @@ TNode<JSRegExpResult> RegExpBuiltinsAssembler::ConstructNewResultFromMatchInfo(
TNode<FixedArray> data =
CAST(LoadObjectField(regexp, JSRegExp::kDataOffset));
// We reach this point only if captures exist, implying that this is an
// IRREGEXP JSRegExp.
CSA_ASSERT(this,
SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
SmiConstant(JSRegExp::IRREGEXP)));
// We reach this point only if captures exist, implying that the assigned
// regexp engine must be able to handle captures.
CSA_ASSERT(
this,
Word32Or(
SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
SmiConstant(JSRegExp::IRREGEXP)),
SmiEqual(CAST(LoadFixedArrayElement(data, JSRegExp::kTagIndex)),
SmiConstant(JSRegExp::EXPERIMENTAL))));
// The names fixed array associates names at even indices with a capture
// index at odd indices.
......
......@@ -1242,10 +1242,8 @@ void JSRegExp::JSRegExpVerify(Isolate* isolate) {
CHECK_EQ(arr.get(JSRegExp::kIrregexpMaxRegisterCountIndex),
uninitialized);
// TODO(mbid,v8:10765): Once the EXPERIMENTAL regexps support captures,
// the capture count should be allowed to be a Smi >= 0.
CHECK_EQ(arr.get(JSRegExp::kIrregexpCaptureCountIndex), Smi::FromInt(0));
CHECK_EQ(arr.get(JSRegExp::kIrregexpCaptureNameMapIndex), uninitialized);
CHECK(arr.get(JSRegExp::kIrregexpCaptureCountIndex).IsSmi());
CHECK_GE(Smi::ToInt(arr.get(JSRegExp::kIrregexpCaptureCountIndex)), 0);
CHECK_EQ(arr.get(JSRegExp::kIrregexpTicksUntilTierUpIndex),
uninitialized);
CHECK_EQ(arr.get(JSRegExp::kIrregexpBacktrackLimit), uninitialized);
......@@ -1282,6 +1280,7 @@ void JSRegExp::JSRegExpVerify(Isolate* isolate) {
CHECK_IMPLIES(uc16_data.IsSmi(), uc16_bytecode.IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpCaptureCountIndex).IsSmi());
CHECK_GE(Smi::ToInt(arr.get(JSRegExp::kIrregexpCaptureCountIndex)), 0);
CHECK(arr.get(JSRegExp::kIrregexpMaxRegisterCountIndex).IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpTicksUntilTierUpIndex).IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpBacktrackLimit).IsSmi());
......
......@@ -67,7 +67,7 @@ String JSRegExp::Pattern() {
Object JSRegExp::CaptureNameMap() {
DCHECK(this->data().IsFixedArray());
DCHECK_EQ(TypeTag(), IRREGEXP);
DCHECK(TypeSupportsCaptures(TypeTag()));
Object value = DataAt(kIrregexpCaptureNameMapIndex);
DCHECK_NE(value, Smi::FromInt(JSRegExp::kUninitializedValue));
return value;
......@@ -85,6 +85,14 @@ void JSRegExp::SetDataAt(int index, Object value) {
FixedArray::cast(data()).set(index, value);
}
void JSRegExp::SetCaptureNameMap(Handle<FixedArray> capture_name_map) {
if (capture_name_map.is_null()) {
SetDataAt(JSRegExp::kIrregexpCaptureNameMapIndex, Smi::zero());
} else {
SetDataAt(JSRegExp::kIrregexpCaptureNameMapIndex, *capture_name_map);
}
}
bool JSRegExp::HasCompiledCode() const {
if (TypeTag() != IRREGEXP) return false;
Smi uninitialized = Smi::FromInt(kUninitializedValue);
......
......@@ -89,6 +89,9 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
void MarkTierUpForNextExec();
inline Type TypeTag() const;
static bool TypeSupportsCaptures(Type t) {
return t == IRREGEXP || t == EXPERIMENTAL;
}
// Maximum number of captures allowed.
static constexpr int kMaxCaptures = 1 << 16;
......@@ -105,6 +108,7 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
inline Object DataAt(int index) const;
// Set implementation data after the object has been prepared.
inline void SetDataAt(int index, Object value);
inline void SetCaptureNameMap(Handle<FixedArray> capture_name_map);
static constexpr int code_index(bool is_latin1) {
return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;
......
......@@ -41,6 +41,12 @@ std::ostream& operator<<(std::ostream& os, const RegExpInstruction& inst) {
case RegExpInstruction::ACCEPT:
os << "ACCEPT";
break;
case RegExpInstruction::SET_REGISTER_TO_CP:
os << "SET_REGISTER_TO_CP " << inst.payload.register_index;
break;
case RegExpInstruction::CLEAR_REGISTER:
os << "CLEAR_REGISTER " << inst.payload.register_index;
break;
}
return os;
}
......
......@@ -46,6 +46,10 @@
// - JMP: Instead of incrementing the PC value after execution of this
// instruction by 1, set PC of this thread to the value specified in the
// instruction payload and continue there.
// - SET_REGISTER_TO_CP: Set a register specified in the paylod to the current
// position (CP) within the input, then continue with the next instruction.
// - CLEAR_REGISTER: Clear the register specified in the payload by resetting
// it to the initial value -1.
//
// Special care must be exercised with respect to thread priority. It is
// possible that more than one thread executes an ACCEPT statement. The output
......@@ -91,6 +95,8 @@ struct RegExpInstruction {
FORK,
JMP,
ACCEPT,
SET_REGISTER_TO_CP,
CLEAR_REGISTER,
};
struct Uc16Range {
......@@ -125,12 +131,28 @@ struct RegExpInstruction {
return result;
}
static RegExpInstruction SetRegisterToCp(int32_t register_index) {
RegExpInstruction result;
result.opcode = SET_REGISTER_TO_CP;
result.payload.register_index = register_index;
return result;
}
static RegExpInstruction ClearRegister(int32_t register_index) {
RegExpInstruction result;
result.opcode = CLEAR_REGISTER;
result.payload.register_index = register_index;
return result;
}
Opcode opcode;
union {
// Payload of CONSUME_RANGE:
Uc16Range consume_range;
// Payload of FORK and JMP, the next/forked program counter (pc):
int32_t pc;
// Payload of SET_REGISTER_TO_CP and CLEAR_REGISTER:
int32_t register_index;
} payload;
STATIC_ASSERT(sizeof(payload) == 4);
};
......
......@@ -21,9 +21,7 @@ class CanBeHandledVisitor final : private RegExpVisitor {
public:
static bool Check(RegExpTree* node, JSRegExp::Flags flags, int capture_count,
Zone* zone) {
if (!AreSuitableFlags(flags) || capture_count > 0) {
return false;
}
if (!AreSuitableFlags(flags)) return false;
CanBeHandledVisitor visitor(zone);
node->Accept(&visitor, nullptr);
return visitor.result_;
......@@ -151,9 +149,7 @@ class CanBeHandledVisitor final : private RegExpVisitor {
}
void* VisitCapture(RegExpCapture* node, void*) override {
// TODO(mbid, v8:10765): This can be implemented with the NFA interpreter,
// but not with the lazy DFA. See also re2.
result_ = false;
node->body()->Accept(this, nullptr);
return nullptr;
}
......@@ -287,7 +283,9 @@ class CompileVisitor : private RegExpVisitor {
Zone* zone) {
CompileVisitor compiler(zone);
compiler.code_.Add(RegExpInstruction::SetRegisterToCp(0), zone);
tree->Accept(&compiler, nullptr);
compiler.code_.Add(RegExpInstruction::SetRegisterToCp(1), zone);
compiler.code_.Add(RegExpInstruction::Accept(), zone);
return std::move(compiler.code_);
......@@ -404,11 +402,35 @@ class CompileVisitor : private RegExpVisitor {
return nullptr;
}
void ClearRegisters(Interval indices) {
if (indices.is_empty()) return;
DCHECK_EQ(indices.from() % 2, 0);
DCHECK_EQ(indices.to() % 2, 1);
for (int i = indices.from(); i <= indices.to(); i += 2) {
// It suffices to clear the register containing the `begin` of a capture
// because this indicates that the capture is undefined, regardless of
// the value in the `end` register.
code_.Add(RegExpInstruction::ClearRegister(i), zone_);
}
}
void* VisitQuantifier(RegExpQuantifier* node, void*) override {
// First repeat the body `min()` times.
for (int i = 0; i != node->min(); ++i) {
// Emit the body, but clear registers occuring in body first.
//
// TODO(mbid,v8:10765): It's not always necessary to a) capture registers
// and b) clear them. For example, we don't have to capture anything for
// the first 4 repetitions if node->min() >= 5, and then we don't have to
// clear registers in the first node->min() repetitions.
// Later, and if node->min() == 0, we don't have to clear registers before
// the first optional repetition.
Interval body_registers = node->body()->CaptureRegisters();
auto emit_body = [&]() {
ClearRegisters(body_registers);
node->body()->Accept(this, nullptr);
}
};
// First repeat the body `min()` times.
for (int i = 0; i != node->min(); ++i) emit_body();
switch (node->quantifier_type()) {
case RegExpQuantifier::POSSESSIVE:
......@@ -430,7 +452,7 @@ class CompileVisitor : private RegExpVisitor {
DeferredLabel end;
AddForkTo(end, code_, zone_);
node->body()->Accept(this, nullptr);
emit_body();
AddJmpTo(begin, code_, zone_);
std::move(end).Bind(code_);
......@@ -452,7 +474,7 @@ class CompileVisitor : private RegExpVisitor {
DeferredLabel end;
for (int i = node->min(); i != node->max(); ++i) {
AddForkTo(end, code_, zone_);
node->body()->Accept(this, nullptr);
emit_body();
}
std::move(end).Bind(code_);
}
......@@ -478,7 +500,7 @@ class CompileVisitor : private RegExpVisitor {
DCHECK_EQ(body.index(), code_.length());
node->body()->Accept(this, nullptr);
emit_body();
AddForkTo(body, code_, zone_);
std::move(end).Bind(code_);
......@@ -509,20 +531,24 @@ class CompileVisitor : private RegExpVisitor {
DCHECK_EQ(body.index(), code_.length());
node->body()->Accept(this, nullptr);
emit_body();
}
std::move(end).Bind(code_);
}
break;
}
}
return nullptr;
}
void* VisitCapture(RegExpCapture* node, void*) override {
// TODO(mbid,v8:10765): Support this case.
UNREACHABLE();
int index = node->index();
int start_register = RegExpCapture::StartRegister(index);
int end_register = RegExpCapture::EndRegister(index);
code_.Add(RegExpInstruction::SetRegisterToCp(start_register), zone_);
node->body()->Accept(this, nullptr);
code_.Add(RegExpInstruction::SetRegisterToCp(end_register), zone_);
return nullptr;
}
void* VisitGroup(RegExpGroup* node, void*) override {
......
......@@ -11,15 +11,10 @@
namespace v8 {
namespace internal {
class Zone;
class ExperimentalRegExpInterpreter final : public AllStatic {
public:
// A half-open range in an a string denoting a (sub)match. Used to access
// output registers of regexp execution grouped by [begin, end) pairs.
struct MatchRange {
int32_t begin; // inclusive
int32_t end; // exclusive
};
// Executes a bytecode program in breadth-first NFA mode, without
// backtracking, to find matching substrings. Trys to find up to
// `max_match_num` matches in `input`, starting at `start_index`. Returns
......@@ -27,11 +22,14 @@ class ExperimentalRegExpInterpreter final : public AllStatic {
// are written to `matches_out`. Provided in variants for one-byte and
// two-byte strings.
static int FindMatchesNfaOneByte(Vector<const RegExpInstruction> bytecode,
int capture_count,
Vector<const uint8_t> input, int start_index,
MatchRange* matches_out, int max_match_num);
int32_t* output_registers,
int output_register_count, Zone* zone);
static int FindMatchesNfaTwoByte(Vector<const RegExpInstruction> bytecode,
Vector<const uc16> input, int start_index,
MatchRange* matches_out, int max_match_num);
int capture_count, Vector<const uc16> input,
int start_index, int32_t* output_registers,
int output_register_count, Zone* zone);
};
} // namespace internal
......
......@@ -44,7 +44,7 @@ bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
Smi::FromInt(JSRegExp::kUninitializedValue);
}
void ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL);
#ifdef VERIFY_HEAP
re->JSRegExpVerify(isolate);
......@@ -63,11 +63,15 @@ void ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
FlatStringReader reader(isolate, source);
DCHECK(!isolate->has_pending_exception());
// The pattern was already parsed during initialization, so it should never
// fail here:
bool parse_success =
RegExpParser::ParseRegExp(isolate, &zone, &reader, flags, &parse_result);
CHECK(parse_success);
if (!parse_success) {
// The pattern was already parsed successfully during initialization, so
// the only way parsing can fail now is because of stack overflow.
CHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
USE(RegExp::ThrowRegExpException(isolate, re, source, parse_result.error));
return false;
}
ZoneList<RegExpInstruction> bytecode =
ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone);
......@@ -84,6 +88,10 @@ void ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
Handle<Code> trampoline = BUILTIN_CODE(isolate, RegExpExperimentalTrampoline);
re->SetDataAt(JSRegExp::kIrregexpLatin1CodeIndex, *trampoline);
re->SetDataAt(JSRegExp::kIrregexpUC16CodeIndex, *trampoline);
re->SetCaptureNameMap(parse_result.capture_name_map);
return true;
}
Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
......@@ -94,11 +102,9 @@ Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
return Vector<RegExpInstruction>(inst_begin, inst_num);
}
using MatchRange = ExperimentalRegExpInterpreter::MatchRange;
// Returns the number of matches.
int32_t ExperimentalRegExp::ExecRaw(JSRegExp regexp, String subject,
int32_t* output_registers,
int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate, JSRegExp regexp,
String subject, int32_t* output_registers,
int32_t output_register_count,
int32_t subject_index) {
DisallowHeapAllocation no_gc;
......@@ -118,21 +124,22 @@ int32_t ExperimentalRegExp::ExecRaw(JSRegExp regexp, String subject,
StdoutStream{} << bytecode << std::endl;
}
int register_count_per_match =
JSRegExp::RegistersForCaptureCount(regexp.CaptureCount());
DCHECK(subject.IsFlat());
String::FlatContent subject_content = subject.GetFlatContent(no_gc);
DCHECK_EQ(output_register_count % 2, 0);
MatchRange* matches = reinterpret_cast<MatchRange*>(output_registers);
const int32_t max_match_num = output_register_count / 2;
Zone zone(isolate->allocator(), ZONE_NAME);
if (subject_content.IsOneByte()) {
return ExperimentalRegExpInterpreter::FindMatchesNfaOneByte(
bytecode, subject_content.ToOneByteVector(), subject_index, matches,
max_match_num);
bytecode, register_count_per_match, subject_content.ToOneByteVector(),
subject_index, output_registers, output_register_count, &zone);
} else {
return ExperimentalRegExpInterpreter::FindMatchesNfaTwoByte(
bytecode, subject_content.ToUC16Vector(), subject_index, matches,
max_match_num);
bytecode, register_count_per_match, subject_content.ToUC16Vector(),
subject_index, output_registers, output_register_count, &zone);
}
}
......@@ -156,7 +163,7 @@ int32_t ExperimentalRegExp::MatchForCallFromJs(
JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
return ExecRaw(regexp_obj, subject_string, output_registers,
return ExecRaw(isolate, regexp_obj, subject_string, output_registers,
output_register_count, start_position);
}
......@@ -170,22 +177,28 @@ MaybeHandle<Object> ExperimentalRegExp::Exec(
regexp->JSRegExpVerify(isolate);
#endif
if (!IsCompiled(regexp, isolate)) {
Compile(isolate, regexp);
if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) {
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
}
DCHECK(IsCompiled(regexp, isolate));
subject = String::Flatten(isolate, subject);
MatchRange match;
int32_t* output_registers = &match.begin;
int32_t output_register_count = sizeof(MatchRange) / sizeof(int32_t);
int capture_count = regexp->CaptureCount();
int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);
int32_t* output_registers;
std::unique_ptr<int32_t[]> output_registers_release;
if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
output_registers = isolate->jsregexp_static_offsets_vector();
} else {
output_registers = NewArray<int32_t>(output_register_count);
output_registers_release.reset(output_registers);
}
int num_matches = ExecRaw(*regexp, *subject, output_registers,
int num_matches = ExecRaw(isolate, *regexp, *subject, output_registers,
output_register_count, subject_index);
if (num_matches == 0) {
......
......@@ -25,7 +25,8 @@ class ExperimentalRegExp final : public AllStatic {
Handle<String> pattern, JSRegExp::Flags flags,
int capture_count);
static bool IsCompiled(Handle<JSRegExp> re, Isolate* isolate);
static void Compile(Isolate* isolate, Handle<JSRegExp> re);
V8_WARN_UNUSED_RESULT
static bool Compile(Isolate* isolate, Handle<JSRegExp> re);
// Execution:
static int32_t MatchForCallFromJs(Address subject, int32_t start_position,
......@@ -38,7 +39,7 @@ class ExperimentalRegExp final : public AllStatic {
static MaybeHandle<Object> Exec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info);
static int32_t ExecRaw(JSRegExp regexp, String subject,
static int32_t ExecRaw(Isolate* isolate, JSRegExp regexp, String subject,
int32_t* output_registers,
int32_t output_register_count, int32_t subject_index);
......
This diff is collapsed.
......@@ -74,6 +74,13 @@ class RegExp final : public AllStatic {
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
JSRegExp::Flags flags, uint32_t backtrack_limit);
// Ensures that a regexp is fully compiled and ready to be executed on a
// subject string. Returns true on success. Return false on failure, and
// then an exception will be pending.
V8_WARN_UNUSED_RESULT static bool EnsureFullyCompiled(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> subject);
enum CallOrigin : int {
kFromRuntime = 0,
kFromJs = 1,
......@@ -97,16 +104,6 @@ class RegExp final : public AllStatic {
RE_EXCEPTION = kInternalRegExpException,
};
// Prepare a RegExp for being executed one or more times (using
// IrregexpExecOnce) on the subject.
// This ensures that the regexp is compiled for the subject, and that
// the subject is flat.
// Returns the number of integer spaces required by IrregexpExecOnce
// as its "registers" argument. If the regexp cannot be compiled,
// an exception is set as pending, and this function returns negative.
static int IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject);
// Set last match info. If match is nullptr, then setting captures is
// omitted.
static Handle<RegExpMatchInfo> SetLastMatchInfo(
......@@ -124,6 +121,14 @@ class RegExp final : public AllStatic {
RegExpNode* node);
static const int kRegExpTooLargeToOptimize = 20 * KB;
V8_WARN_UNUSED_RESULT
static MaybeHandle<Object> ThrowRegExpException(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> pattern,
RegExpError error);
static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
RegExpError error_text);
};
// Uses a special global mode of irregexp-generated code to perform a global
......
......@@ -322,7 +322,7 @@ bool CompiledReplacement::Compile(Isolate* isolate, Handle<JSRegExp> regexp,
FixedArray capture_name_map;
if (capture_count > 0) {
DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
DCHECK(JSRegExp::TypeSupportsCaptures(regexp->TypeTag()));
Object maybe_capture_name_map = regexp->CaptureNameMap();
if (maybe_capture_name_map.IsFixedArray()) {
capture_name_map = FixedArray::cast(maybe_capture_name_map);
......@@ -611,13 +611,9 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
JSRegExp::Type typeTag = regexp->TypeTag();
if (typeTag == JSRegExp::IRREGEXP) {
// Ensure the RegExp is compiled so we can access the capture-name map.
if (RegExp::IrregexpPrepare(isolate, regexp, subject) == -1) {
DCHECK(isolate->has_pending_exception());
return ReadOnlyRoots(isolate).exception();
}
// Ensure the RegExp is compiled so we can access the capture-name map.
if (!RegExp::EnsureFullyCompiled(isolate, regexp, subject)) {
return ReadOnlyRoots(isolate).exception();
}
// CompiledReplacement uses zone allocation.
......@@ -627,7 +623,7 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
isolate, regexp, replacement, capture_count, subject_length);
// Shortcut for simple non-regexp global replacements
if (typeTag == JSRegExp::ATOM && simple_replace) {
if (regexp->TypeTag() == JSRegExp::ATOM && simple_replace) {
if (subject->IsOneByteRepresentation() &&
replacement->IsOneByteRepresentation()) {
return StringReplaceGlobalAtomRegExpWithString<SeqOneByteString>(
......@@ -1460,8 +1456,7 @@ RUNTIME_FUNCTION(Runtime_StringReplaceNonGlobalRegExpWithFunction) {
bool has_named_captures = false;
Handle<FixedArray> capture_map;
if (m > 1) {
// The existence of capture groups implies IRREGEXP kind.
DCHECK_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
DCHECK(JSRegExp::TypeSupportsCaptures(regexp->TypeTag()));
Object maybe_capture_map = regexp->CaptureNameMap();
if (maybe_capture_map.IsFixedArray()) {
......
......@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax
// Flags: --allow-natives-syntax --no-enable-experimental-regexp-engine
const kNoBacktrackLimit = 0; // To match JSRegExp::kNoBacktrackLimit.
const re0 = %NewRegExpWithBacktrackLimit("(\\d+)+x", "", kNoBacktrackLimit);
......
......@@ -60,5 +60,16 @@ Test(/(?:asdf)/, "123asdfxyz", ["asdf"], 0);
Test(/(?:asdf)|123/, "xyz123asdf", ["123"], 0);
Test(/asdf(?:[0-9]|(?:xy|x)*)*/, "kkkasdf5xyx8xyyky", ["asdf5xyx8xy"], 0);
// Capturing groups.
Test(/()/, "asdf", ["", ""], 0);
Test(/(123)/, "asdf123xyz", ["123", "123"], 0);
Test(/asdf(123)xyz/, "asdf123xyz", ["asdf123xyz", "123"], 0);
Test(/(123|xyz)/, "123", ["123", "123"], 0);
Test(/(123|xyz)/, "xyz", ["xyz", "xyz"], 0);
Test(/(123)|(xyz)/, "123", ["123", "123", undefined], 0);
Test(/(123)|(xyz)/, "xyz", ["xyz", undefined, "xyz"], 0);
Test(/(?:(123)|(xyz))*/, "xyz123", ["xyz123", "123", undefined], 0);
Test(/((123)|(xyz)*)*/, "xyz123xyz", ["xyz123xyz", "xyz", undefined, "xyz"], 0);
// The global flag.
Test(/asdf/g, "fjasdfkkasdf", ["asdf"], 6);
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment