Commit a2b9710f authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[regexp] More cleanups

- Anonymous namespaces instead of static functions.
- Comments.
- Reserve enough space in the range ZoneList.

Change-Id: Ie79fda770974796cd590a155dc5fd504472e5bc9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3220341
Auto-Submit: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarPatrick Thier <pthier@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77391}
parent b36b2074
...@@ -27,14 +27,16 @@ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) ...@@ -27,14 +27,16 @@ FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE)
#undef MAKE_TYPE_CASE #undef MAKE_TYPE_CASE
namespace {
static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) { Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) {
Interval result = Interval::Empty(); Interval result = Interval::Empty();
for (int i = 0; i < children->length(); i++) for (int i = 0; i < children->length(); i++)
result = result.Union(children->at(i)->CaptureRegisters()); result = result.Union(children->at(i)->CaptureRegisters());
return result; return result;
} }
} // namespace
Interval RegExpAlternative::CaptureRegisters() { Interval RegExpAlternative::CaptureRegisters() {
return ListCaptureRegisters(nodes()); return ListCaptureRegisters(nodes());
...@@ -130,6 +132,7 @@ bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); } ...@@ -130,6 +132,7 @@ bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); }
bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); } bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); }
namespace {
// Convert regular expression trees to a simple sexp representation. // Convert regular expression trees to a simple sexp representation.
// This representation should be different from the input grammar // This representation should be different from the input grammar
...@@ -148,6 +151,7 @@ class RegExpUnparser final : public RegExpVisitor { ...@@ -148,6 +151,7 @@ class RegExpUnparser final : public RegExpVisitor {
Zone* zone_; Zone* zone_;
}; };
} // namespace
void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) {
os_ << "(|"; os_ << "(|";
...@@ -312,8 +316,9 @@ RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) ...@@ -312,8 +316,9 @@ RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives)
} }
} }
namespace {
static int IncreaseBy(int previous, int increase) { int IncreaseBy(int previous, int increase) {
if (RegExpTree::kInfinity - previous < increase) { if (RegExpTree::kInfinity - previous < increase) {
return RegExpTree::kInfinity; return RegExpTree::kInfinity;
} else { } else {
...@@ -321,6 +326,7 @@ static int IncreaseBy(int previous, int increase) { ...@@ -321,6 +326,7 @@ static int IncreaseBy(int previous, int increase) {
} }
} }
} // namespace
RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes)
: nodes_(nodes) { : nodes_(nodes) {
......
...@@ -45,8 +45,10 @@ RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler, ...@@ -45,8 +45,10 @@ RegExpNode* RegExpText::ToNode(RegExpCompiler* compiler,
on_success); on_success);
} }
static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, namespace {
const int* special_class, int length) {
bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
const int* special_class, int length) {
length--; // Remove final marker. length--; // Remove final marker.
DCHECK_EQ(kRangeEndMarker, special_class[length]); DCHECK_EQ(kRangeEndMarker, special_class[length]);
DCHECK_NE(0, ranges->length()); DCHECK_NE(0, ranges->length());
...@@ -74,8 +76,8 @@ static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges, ...@@ -74,8 +76,8 @@ static bool CompareInverseRanges(ZoneList<CharacterRange>* ranges,
return true; return true;
} }
static bool CompareRanges(ZoneList<CharacterRange>* ranges, bool CompareRanges(ZoneList<CharacterRange>* ranges, const int* special_class,
const int* special_class, int length) { int length) {
length--; // Remove final marker. length--; // Remove final marker.
DCHECK_EQ(kRangeEndMarker, special_class[length]); DCHECK_EQ(kRangeEndMarker, special_class[length]);
if (ranges->length() * 2 != length) { if (ranges->length() * 2 != length) {
...@@ -91,6 +93,8 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges, ...@@ -91,6 +93,8 @@ static bool CompareRanges(ZoneList<CharacterRange>* ranges,
return true; return true;
} }
} // namespace
bool RegExpCharacterClass::is_standard(Zone* zone) { bool RegExpCharacterClass::is_standard(Zone* zone) {
// TODO(lrn): Remove need for this function, by not throwing away information // TODO(lrn): Remove need for this function, by not throwing away information
// along the way. // along the way.
...@@ -442,6 +446,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler, ...@@ -442,6 +446,8 @@ RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
} }
} }
namespace {
int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) { int CompareFirstChar(RegExpTree* const* a, RegExpTree* const* b) {
RegExpAtom* atom1 = (*a)->AsAtom(); RegExpAtom* atom1 = (*a)->AsAtom();
RegExpAtom* atom2 = (*b)->AsAtom(); RegExpAtom* atom2 = (*b)->AsAtom();
...@@ -464,7 +470,7 @@ int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) { ...@@ -464,7 +470,7 @@ int CompareFirstCharCaseInsensitve(RegExpTree* const* a, RegExpTree* const* b) {
#else #else
static unibrow::uchar Canonical( unibrow::uchar Canonical(
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize, unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize,
unibrow::uchar c) { unibrow::uchar c) {
unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth]; unibrow::uchar chars[unibrow::Ecma262Canonicalize::kMaxWidth];
...@@ -491,6 +497,8 @@ int CompareFirstCharCaseIndependent( ...@@ -491,6 +497,8 @@ int CompareFirstCharCaseIndependent(
} }
#endif // V8_INTL_SUPPORT #endif // V8_INTL_SUPPORT
} // namespace
// We can stable sort runs of atoms, since the order does not matter if they // We can stable sort runs of atoms, since the order does not matter if they
// start with different characters. // start with different characters.
// Returns true if any consecutive atoms were found. // Returns true if any consecutive atoms were found.
...@@ -1035,8 +1043,10 @@ RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler, ...@@ -1035,8 +1043,10 @@ RegExpNode* RegExpAlternative::ToNode(RegExpCompiler* compiler,
return current; return current;
} }
static void AddClass(const int* elmv, int elmc, namespace {
ZoneList<CharacterRange>* ranges, Zone* zone) {
void AddClass(const int* elmv, int elmc, ZoneList<CharacterRange>* ranges,
Zone* zone) {
elmc--; elmc--;
DCHECK_EQ(kRangeEndMarker, elmv[elmc]); DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
for (int i = 0; i < elmc; i += 2) { for (int i = 0; i < elmc; i += 2) {
...@@ -1045,8 +1055,8 @@ static void AddClass(const int* elmv, int elmc, ...@@ -1045,8 +1055,8 @@ static void AddClass(const int* elmv, int elmc,
} }
} }
static void AddClassNegated(const int* elmv, int elmc, void AddClassNegated(const int* elmv, int elmc,
ZoneList<CharacterRange>* ranges, Zone* zone) { ZoneList<CharacterRange>* ranges, Zone* zone) {
elmc--; elmc--;
DCHECK_EQ(kRangeEndMarker, elmv[elmc]); DCHECK_EQ(kRangeEndMarker, elmv[elmc]);
DCHECK_NE(0x0000, elmv[0]); DCHECK_NE(0x0000, elmv[0]);
...@@ -1061,6 +1071,8 @@ static void AddClassNegated(const int* elmv, int elmc, ...@@ -1061,6 +1071,8 @@ static void AddClassNegated(const int* elmv, int elmc,
ranges->Add(CharacterRange::Range(last, kMaxCodePoint), zone); ranges->Add(CharacterRange::Range(last, kMaxCodePoint), zone);
} }
} // namespace
void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set, void CharacterRange::AddClassEscape(StandardCharacterSet standard_character_set,
ZoneList<CharacterRange>* ranges, ZoneList<CharacterRange>* ranges,
bool add_unicode_case_equivalents, bool add_unicode_case_equivalents,
...@@ -1268,10 +1280,11 @@ ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) { ...@@ -1268,10 +1280,11 @@ ZoneList<CharacterRange>* CharacterSet::ranges(Zone* zone) {
return ranges_; return ranges_;
} }
namespace {
// Move a number of elements in a zonelist to another position // Move a number of elements in a zonelist to another position
// in the same list. Handles overlapping source and target areas. // in the same list. Handles overlapping source and target areas.
static void MoveRanges(ZoneList<CharacterRange>* list, int from, int to, void MoveRanges(ZoneList<CharacterRange>* list, int from, int to, int count) {
int count) {
// Ranges are potentially overlapping. // Ranges are potentially overlapping.
if (from < to) { if (from < to) {
for (int i = count - 1; i >= 0; i--) { for (int i = count - 1; i >= 0; i--) {
...@@ -1284,8 +1297,8 @@ static void MoveRanges(ZoneList<CharacterRange>* list, int from, int to, ...@@ -1284,8 +1297,8 @@ static void MoveRanges(ZoneList<CharacterRange>* list, int from, int to,
} }
} }
static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count, int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
CharacterRange insert) { CharacterRange insert) {
// Inserts a range into list[0..count[, which must be sorted // Inserts a range into list[0..count[, which must be sorted
// by from value and non-overlapping and non-adjacent, using at most // by from value and non-overlapping and non-adjacent, using at most
// list[0..count] for the result. Returns the number of resulting // list[0..count] for the result. Returns the number of resulting
...@@ -1340,6 +1353,8 @@ static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count, ...@@ -1340,6 +1353,8 @@ static int InsertRangeInCanonicalList(ZoneList<CharacterRange>* list, int count,
return count - (end_pos - start_pos) + 1; return count - (end_pos - start_pos) + 1;
} }
} // namespace
void CharacterSet::Canonicalize() { void CharacterSet::Canonicalize() {
// Special/default classes are always considered canonical. The result // Special/default classes are always considered canonical. The result
// of calling ranges() will be sorted. // of calling ranges() will be sorted.
...@@ -1405,6 +1420,8 @@ void CharacterRange::Negate(ZoneList<CharacterRange>* ranges, ...@@ -1405,6 +1420,8 @@ void CharacterRange::Negate(ZoneList<CharacterRange>* ranges,
} }
} }
namespace {
// Scoped object to keep track of how much we unroll quantifier loops in the // Scoped object to keep track of how much we unroll quantifier loops in the
// regexp graph generator. // regexp graph generator.
class RegExpExpansionLimiter { class RegExpExpansionLimiter {
...@@ -1442,6 +1459,8 @@ class RegExpExpansionLimiter { ...@@ -1442,6 +1459,8 @@ class RegExpExpansionLimiter {
DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter); DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpExpansionLimiter);
}; };
} // namespace
RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy, RegExpNode* RegExpQuantifier::ToNode(int min, int max, bool is_greedy,
RegExpTree* body, RegExpCompiler* compiler, RegExpTree* body, RegExpCompiler* compiler,
RegExpNode* on_success, RegExpNode* on_success,
......
This diff is collapsed.
...@@ -971,8 +971,9 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis( ...@@ -971,8 +971,9 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
} }
#ifdef DEBUG #ifdef DEBUG
// Currently only used in an DCHECK. namespace {
static bool IsSpecialClassEscape(base::uc32 c) {
bool IsSpecialClassEscape(base::uc32 c) {
switch (c) { switch (c) {
case 'd': case 'd':
case 'D': case 'D':
...@@ -985,6 +986,8 @@ static bool IsSpecialClassEscape(base::uc32 c) { ...@@ -985,6 +986,8 @@ static bool IsSpecialClassEscape(base::uc32 c) {
return false; return false;
} }
} }
} // namespace
#endif #endif
// In order to know whether an escape is a backreference or not we have to scan // In order to know whether an escape is a backreference or not we have to scan
......
...@@ -154,9 +154,11 @@ bool RegExp::IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp) { ...@@ -154,9 +154,11 @@ bool RegExp::IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp) {
return RegExpUtils::IsUnmodifiedRegExp(isolate, regexp); return RegExpUtils::IsUnmodifiedRegExp(isolate, regexp);
} }
namespace {
// Identifies the sort of regexps where the regexp engine is faster // Identifies the sort of regexps where the regexp engine is faster
// than the code used for atom matches. // than the code used for atom matches.
static bool HasFewDifferentCharacters(Handle<String> pattern) { bool HasFewDifferentCharacters(Handle<String> pattern) {
int length = std::min(kMaxLookaheadForBoyerMoore, pattern->length()); int length = std::min(kMaxLookaheadForBoyerMoore, pattern->length());
if (length <= kPatternTooShortForBoyerMoore) return false; if (length <= kPatternTooShortForBoyerMoore) return false;
const int kMod = 128; const int kMod = 128;
...@@ -176,6 +178,8 @@ static bool HasFewDifferentCharacters(Handle<String> pattern) { ...@@ -176,6 +178,8 @@ static bool HasFewDifferentCharacters(Handle<String> pattern) {
return true; return true;
} }
} // namespace
// Generic RegExp methods. Dispatches to implementation specific methods. // Generic RegExp methods. Dispatches to implementation specific methods.
// static // static
...@@ -332,9 +336,11 @@ void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re, ...@@ -332,9 +336,11 @@ void RegExpImpl::AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
re, pattern, JSRegExp::AsJSRegExpFlags(flags), match_pattern); re, pattern, JSRegExp::AsJSRegExpFlags(flags), match_pattern);
} }
static void SetAtomLastCapture(Isolate* isolate, namespace {
Handle<RegExpMatchInfo> last_match_info,
String subject, int from, int to) { void SetAtomLastCapture(Isolate* isolate,
Handle<RegExpMatchInfo> last_match_info, String subject,
int from, int to) {
SealHandleScope shs(isolate); SealHandleScope shs(isolate);
last_match_info->SetNumberOfCaptureRegisters(2); last_match_info->SetNumberOfCaptureRegisters(2);
last_match_info->SetLastSubject(subject); last_match_info->SetLastSubject(subject);
...@@ -343,6 +349,8 @@ static void SetAtomLastCapture(Isolate* isolate, ...@@ -343,6 +349,8 @@ static void SetAtomLastCapture(Isolate* isolate,
last_match_info->SetCapture(1, to); last_match_info->SetCapture(1, to);
} }
} // namespace
int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, int RegExpImpl::AtomExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index, int32_t* output, Handle<String> subject, int index, int32_t* output,
int output_size) { int output_size) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment