Commit f2a832ca authored by Martin Bidlingmaier's avatar Martin Bidlingmaier Committed by Commit Bot

[regexp] Support more quantifiers in experimental engine

Previously to this commit only quantifiers of the form /<x>*/, i.e.
arbitrarily often greedy repetition, were implemented.  Now a much
larger class is supported, e.g. + and ? and their non-greedy variants.
Because it came up repeatedly during the implementation, the commit also
adds the Label and DeferredLabel classes to patch JMP and FORK target
addresses more easily.

Still not supported are the following quantifiers:
- Possessive quantifiers, where I'm not entirely sure whether they could
  be implemented in principle. Re2 doesn't support them.
- Quantifiers with large but finite numbers for min and max numbers of
  repetitions, as in e.g. /<x>{9000, 90000}/. These are currently
  limited to some small value. This is because the body of such
  repetitions is unrolled explicitly, so the size of the bytecode is
  linear in the number of repetitions.

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: Id04d893252588abb0f80c3cb33cfc707f6601ea0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2387575
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#69759}
parent 9f9c4776
...@@ -443,11 +443,12 @@ class RegExpQuantifier final : public RegExpTree { ...@@ -443,11 +443,12 @@ class RegExpQuantifier final : public RegExpTree {
bool IsQuantifier() override; bool IsQuantifier() override;
int min_match() override { return min_match_; } int min_match() override { return min_match_; }
int max_match() override { return max_match_; } int max_match() override { return max_match_; }
int min() { return min_; } int min() const { return min_; }
int max() { return max_; } int max() const { return max_; }
bool is_possessive() { return quantifier_type_ == POSSESSIVE; } QuantifierType quantifier_type() const { return quantifier_type_; }
bool is_possessive() const { return quantifier_type_ == POSSESSIVE; }
bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; } bool is_non_greedy() { return quantifier_type_ == NON_GREEDY; }
bool is_greedy() { return quantifier_type_ == GREEDY; } bool is_greedy() const { return quantifier_type_ == GREEDY; }
RegExpTree* body() { return body_; } RegExpTree* body() { return body_; }
private: private:
......
...@@ -37,10 +37,22 @@ Test(/\w\d/, "?a??a3!!!", ["a3"], 0); ...@@ -37,10 +37,22 @@ Test(/\w\d/, "?a??a3!!!", ["a3"], 0);
// surrogate characters that make up 💩. The leading surrogate is 0xD83D. // surrogate characters that make up 💩. The leading surrogate is 0xD83D.
Test(/[💩]/, "f💩", [String.fromCodePoint(0xD83D)], 0); Test(/[💩]/, "f💩", [String.fromCodePoint(0xD83D)], 0);
// Greedy quantifier for 0 or more matches. // Greedy and non-greedy quantifiers.
Test(/x*/, "asdfxk", [""], 0); Test(/x*/, "asdfxk", [""], 0);
Test(/xx*a/, "xxa", ["xxa"], 0); Test(/xx*a/, "xxa", ["xxa"], 0);
Test(/asdf*/, "aasdfffk", ["asdfff"], 0); Test(/x*[xa]/, "xxaa", ["xxa"], 0);
Test(/x*?[xa]/, "xxaa", ["x"], 0);
Test(/x*?a/, "xxaa", ["xxa"], 0);
Test(/x+a/, "axxa", ["xxa"], 0);
Test(/x+?[ax]/, "axxa", ["xx"], 0);
Test(/xx?[xa]/, "xxaa", ["xxa"], 0);
Test(/xx??[xa]/, "xxaa", ["xx"], 0);
Test(/xx??a/, "xxaa", ["xxa"], 0);
Test(/x{4}/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{4,}/, "xxxxxxxxx", ["xxxxxxxxx"], 0);
Test(/x{4,}?/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{2,4}/, "xxxxxxxxx", ["xxxx"], 0);
Test(/x{2,4}?/, "xxxxxxxxx", ["xx"], 0);
// Non-capturing groups and nested operators. // Non-capturing groups and nested operators.
Test(/(?:)/, "asdf", [""], 0); Test(/(?:)/, "asdf", [""], 0);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment