Commit 3f4f4a0d authored by Martin Bidlingmaier's avatar Martin Bidlingmaier Committed by Commit Bot

[regexp] Handle zero-length matches in experimental engine

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: I9e23fb5ba8a21aa1ddf96ab7936058f671bcc9ae
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2397895
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69736}
parent 3d40ec8d
......@@ -4,6 +4,7 @@
#include "src/regexp/experimental/experimental-compiler.h"
#include "src/regexp/experimental/experimental.h"
#include "src/zone/zone-list-inl.h"
namespace v8 {
......@@ -34,8 +35,10 @@ class CanBeHandledVisitor final : private RegExpVisitor {
static bool AreSuitableFlags(JSRegExp::Flags flags) {
// TODO(mbid, v8:10765): We should be able to support all flags in the
// future.
static constexpr JSRegExp::Flags allowed_flags = JSRegExp::kGlobal;
return (flags & ~allowed_flags) == 0;
static constexpr JSRegExp::Flags kAllowedFlags = JSRegExp::kGlobal;
STATIC_ASSERT(!ExperimentalRegExp::kSupportsUnicode ||
(kAllowedFlags & JSRegExp::kUnicode) == 0);
return (flags & ~kAllowedFlags) == 0;
}
void* VisitDisjunction(RegExpDisjunction* node, void*) override {
......
......@@ -6,6 +6,7 @@
#include "src/base/optional.h"
#include "src/base/small-vector.h"
#include "src/regexp/experimental/experimental.h"
namespace v8 {
namespace internal {
......@@ -84,15 +85,33 @@ class NfaInterpreter {
// `matches_out`. The search begins at the current input index. Returns the
// number of matches found.
int FindMatches(MatchRange* matches_out, int max_match_num) {
int match_num;
for (match_num = 0; match_num != max_match_num; ++match_num) {
int match_num = 0;
while (match_num != max_match_num) {
base::Optional<MatchRange> match = FindNextMatch();
if (!match.has_value()) {
break;
}
matches_out[match_num] = *match;
SetInputIndex(match->end);
++match_num;
int match_length = match->end - match->begin;
if (match_length != 0) {
SetInputIndex(match->end);
} else if (match->end == input_.length()) {
// Zero-length match, input exhausted.
SetInputIndex(match->end);
break;
} else {
// Zero-length match, more input. We don't want to report more matches
// here endlessly, so we advance by 1.
SetInputIndex(match->end + 1);
// TODO(mbid,v8:10765): If we're in unicode mode, we have to advance to
// the next codepoint, not to the next code unit. See also
// `RegExpUtils::AdvanceStringIndex`.
STATIC_ASSERT(!ExperimentalRegExp::kSupportsUnicode);
}
}
return match_num;
}
......
......@@ -41,6 +41,8 @@ class ExperimentalRegExp final : public AllStatic {
static int32_t ExecRaw(JSRegExp regexp, String subject,
int32_t* output_registers,
int32_t output_register_count, int32_t subject_index);
static constexpr bool kSupportsUnicode = false;
};
} // namespace internal
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment