Commit e83ee945 authored by Erik's avatar Erik Committed by Commit Bot

RegExp: Add the ability to switch flags on and off within the regexp.

This is a reland of https://chromium-review.googlesource.com/c/v8/v8/+/752522
which was itself a reupload of
https://chromium-review.googlesource.com/c/v8/v8/+/571746 where reviews took
place.

R=yangguo@chromium.org

Bug: 
Change-Id: Ia4dbdd6e9a362e272753ff10dc66b7f72d81ee20
Reviewed-on: https://chromium-review.googlesource.com/753596Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Commit-Queue: Erik Corry <erikcorry@chromium.org>
Cr-Commit-Position: refs/heads/master@{#49129}
parent 6ebb5641
......@@ -959,6 +959,7 @@ DEFINE_BOOL(serialization_statistics, false,
// Regexp
DEFINE_BOOL(regexp_optimization, true, "generate optimized regexp code")
DEFINE_BOOL(regexp_mode_modifiers, false, "enable inline flags in regexp.")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_BOOL(testing_bool_flag, true, "testing_bool_flag")
......
This diff is collapsed.
......@@ -21,6 +21,36 @@ class RegExpNode;
class RegExpTree;
class BoyerMooreLookahead;
inline bool IgnoreCase(JSRegExp::Flags flags) {
return (flags & JSRegExp::kIgnoreCase) != 0;
}
inline bool IsUnicode(JSRegExp::Flags flags) {
return (flags & JSRegExp::kUnicode) != 0;
}
inline bool IsSticky(JSRegExp::Flags flags) {
return (flags & JSRegExp::kSticky) != 0;
}
inline bool IsGlobal(JSRegExp::Flags flags) {
return (flags & JSRegExp::kGlobal) != 0;
}
inline bool DotAll(JSRegExp::Flags flags) {
return (flags & JSRegExp::kDotAll) != 0;
}
inline bool Multiline(JSRegExp::Flags flags) {
return (flags & JSRegExp::kMultiline) != 0;
}
inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
// Both unicode and ignore_case flags are set. We need to use ICU to find
// the closure over case equivalents.
return IsUnicode(flags) && IgnoreCase(flags);
}
class RegExpImpl {
public:
// Whether V8 is compiled with native regexp support or not.
......@@ -495,9 +525,7 @@ class RegExpNode: public ZoneObject {
// If we know that the input is one-byte then there are some nodes that can
// never match. This method returns a node that can be substituted for
// itself, or nullptr if the node can never match.
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case) {
return this;
}
virtual RegExpNode* FilterOneByte(int depth) { return this; }
// Helper for FilterOneByte.
RegExpNode* replacement() {
DCHECK(info()->replacement_calculated);
......@@ -569,7 +597,7 @@ class SeqRegExpNode: public RegExpNode {
: RegExpNode(on_success->zone()), on_success_(on_success) { }
RegExpNode* on_success() { return on_success_; }
void set_on_success(RegExpNode* node) { on_success_ = node; }
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual RegExpNode* FilterOneByte(int depth);
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start) {
on_success_->FillInBMInfo(isolate, offset, budget - 1, bm, not_at_start);
......@@ -577,7 +605,7 @@ class SeqRegExpNode: public RegExpNode {
}
protected:
RegExpNode* FilterSuccessor(int depth, bool ignore_case);
RegExpNode* FilterSuccessor(int depth);
private:
RegExpNode* on_success_;
......@@ -682,13 +710,15 @@ class TextNode: public SeqRegExpNode {
static TextNode* CreateForCharacterRanges(Zone* zone,
ZoneList<CharacterRange>* ranges,
bool read_backward,
RegExpNode* on_success);
RegExpNode* on_success,
JSRegExp::Flags flags);
// Create TextNode for a surrogate pair with a range given for the
// lead and the trail surrogate each.
static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
CharacterRange trail,
bool read_backward,
RegExpNode* on_success);
RegExpNode* on_success,
JSRegExp::Flags flags);
virtual void Accept(NodeVisitor* visitor);
virtual void Emit(RegExpCompiler* compiler, Trace* trace);
virtual int EatsAtLeast(int still_to_find, int budget, bool not_at_start);
......@@ -705,7 +735,7 @@ class TextNode: public SeqRegExpNode {
virtual void FillInBMInfo(Isolate* isolate, int offset, int budget,
BoyerMooreLookahead* bm, bool not_at_start);
void CalculateOffsets();
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual RegExpNode* FilterOneByte(int depth);
private:
enum TextEmitPassType {
......@@ -715,7 +745,7 @@ class TextNode: public SeqRegExpNode {
CASE_CHARACTER_MATCH, // Case-independent single character check.
CHARACTER_CLASS_MATCH // Character class.
};
static bool SkipPass(int pass, bool ignore_case);
static bool SkipPass(TextEmitPassType pass, bool ignore_case);
static const int kFirstRealPass = SIMPLE_CHARACTER_MATCH;
static const int kLastPass = CHARACTER_CLASS_MATCH;
void TextEmitPass(RegExpCompiler* compiler,
......@@ -779,11 +809,12 @@ class AssertionNode: public SeqRegExpNode {
class BackReferenceNode: public SeqRegExpNode {
public:
BackReferenceNode(int start_reg, int end_reg, bool read_backward,
RegExpNode* on_success)
BackReferenceNode(int start_reg, int end_reg, JSRegExp::Flags flags,
bool read_backward, RegExpNode* on_success)
: SeqRegExpNode(on_success),
start_reg_(start_reg),
end_reg_(end_reg),
flags_(flags),
read_backward_(read_backward) {}
virtual void Accept(NodeVisitor* visitor);
int start_register() { return start_reg_; }
......@@ -805,6 +836,7 @@ class BackReferenceNode: public SeqRegExpNode {
private:
int start_reg_;
int end_reg_;
JSRegExp::Flags flags_;
bool read_backward_;
};
......@@ -929,7 +961,7 @@ class ChoiceNode: public RegExpNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return true;
}
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual RegExpNode* FilterOneByte(int depth);
virtual bool read_backward() { return false; }
protected:
......@@ -1001,7 +1033,7 @@ class NegativeLookaroundChoiceNode : public ChoiceNode {
virtual bool try_to_emit_quick_check_for_alternative(bool is_first) {
return !is_first;
}
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual RegExpNode* FilterOneByte(int depth);
};
......@@ -1028,7 +1060,7 @@ class LoopChoiceNode: public ChoiceNode {
bool body_can_be_zero_length() { return body_can_be_zero_length_; }
virtual bool read_backward() { return read_backward_; }
virtual void Accept(NodeVisitor* visitor);
virtual RegExpNode* FilterOneByte(int depth, bool ignore_case);
virtual RegExpNode* FilterOneByte(int depth);
private:
// AddAlternative is made private for loop nodes because alternatives
......@@ -1435,11 +1467,8 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
// +-------+ +------------+
class Analysis: public NodeVisitor {
public:
Analysis(Isolate* isolate, JSRegExp::Flags flags, bool is_one_byte)
: isolate_(isolate),
flags_(flags),
is_one_byte_(is_one_byte),
error_message_(nullptr) {}
Analysis(Isolate* isolate, bool is_one_byte)
: isolate_(isolate), is_one_byte_(is_one_byte), error_message_(nullptr) {}
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) \
......@@ -1459,12 +1488,8 @@ FOR_EACH_NODE_TYPE(DECLARE_VISIT)
Isolate* isolate() const { return isolate_; }
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
private:
Isolate* isolate_;
JSRegExp::Flags flags_;
bool is_one_byte_;
const char* error_message_;
......
......@@ -6,6 +6,7 @@
#define V8_REGEXP_REGEXP_AST_H_
#include "src/objects.h"
#include "src/objects/js-regexp.h"
#include "src/objects/string.h"
#include "src/utils.h"
#include "src/zone/zone-containers.h"
......@@ -144,7 +145,7 @@ class CharacterSet final BASE_EMBEDDED {
explicit CharacterSet(ZoneList<CharacterRange>* ranges)
: ranges_(ranges), standard_set_type_(0) {}
ZoneList<CharacterRange>* ranges(Zone* zone);
uc16 standard_set_type() { return standard_set_type_; }
uc16 standard_set_type() const { return standard_set_type_; }
void set_standard_set_type(uc16 special_set_type) {
standard_set_type_ = special_set_type;
}
......@@ -274,7 +275,8 @@ class RegExpAssertion final : public RegExpTree {
BOUNDARY,
NON_BOUNDARY
};
explicit RegExpAssertion(AssertionType type) : assertion_type_(type) {}
RegExpAssertion(AssertionType type, JSRegExp::Flags flags)
: assertion_type_(type), flags_(flags) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpAssertion* AsAssertion() override;
......@@ -286,7 +288,8 @@ class RegExpAssertion final : public RegExpTree {
AssertionType assertion_type() { return assertion_type_; }
private:
AssertionType assertion_type_;
const AssertionType assertion_type_;
const JSRegExp::Flags flags_;
};
......@@ -300,12 +303,18 @@ class RegExpCharacterClass final : public RegExpTree {
NEGATED = 1 << 0,
CONTAINS_SPLIT_SURROGATE = 1 << 1,
};
typedef base::Flags<Flag> Flags;
explicit RegExpCharacterClass(ZoneList<CharacterRange>* ranges,
Flags flags = Flags())
: set_(ranges), flags_(flags) {}
explicit RegExpCharacterClass(uc16 type) : set_(type), flags_(0) {}
typedef base::Flags<Flag> CharacterClassFlags;
RegExpCharacterClass(
ZoneList<CharacterRange>* ranges, JSRegExp::Flags flags,
CharacterClassFlags character_class_flags = CharacterClassFlags())
: set_(ranges),
flags_(flags),
character_class_flags_(character_class_flags) {}
RegExpCharacterClass(uc16 type, JSRegExp::Flags flags)
: set_(type),
flags_(flags),
character_class_flags_(CharacterClassFlags()) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpCharacterClass* AsCharacterClass() override;
......@@ -332,22 +341,25 @@ class RegExpCharacterClass final : public RegExpTree {
// D : non-ASCII digit
// . : non-newline
// * : All characters, for advancing unanchored regexp
uc16 standard_type() { return set_.standard_set_type(); }
uc16 standard_type() const { return set_.standard_set_type(); }
ZoneList<CharacterRange>* ranges(Zone* zone) { return set_.ranges(zone); }
bool is_negated() const { return (flags_ & NEGATED) != 0; }
bool is_negated() const { return (character_class_flags_ & NEGATED) != 0; }
JSRegExp::Flags flags() const { return flags_; }
bool contains_split_surrogate() const {
return (flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
return (character_class_flags_ & CONTAINS_SPLIT_SURROGATE) != 0;
}
private:
CharacterSet set_;
const Flags flags_;
const JSRegExp::Flags flags_;
const CharacterClassFlags character_class_flags_;
};
class RegExpAtom final : public RegExpTree {
public:
explicit RegExpAtom(Vector<const uc16> data) : data_(data) {}
explicit RegExpAtom(Vector<const uc16> data, JSRegExp::Flags flags)
: data_(data), flags_(flags) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpAtom* AsAtom() override;
......@@ -358,9 +370,12 @@ class RegExpAtom final : public RegExpTree {
void AppendToText(RegExpText* text, Zone* zone) override;
Vector<const uc16> data() { return data_; }
int length() { return data_.length(); }
JSRegExp::Flags flags() const { return flags_; }
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
private:
Vector<const uc16> data_;
const JSRegExp::Flags flags_;
};
......@@ -532,9 +547,10 @@ class RegExpLookaround final : public RegExpTree {
class RegExpBackReference final : public RegExpTree {
public:
RegExpBackReference() : capture_(nullptr), name_(nullptr) {}
explicit RegExpBackReference(RegExpCapture* capture)
: capture_(capture), name_(nullptr) {}
explicit RegExpBackReference(JSRegExp::Flags flags)
: capture_(nullptr), name_(nullptr), flags_(flags) {}
RegExpBackReference(RegExpCapture* capture, JSRegExp::Flags flags)
: capture_(capture), name_(nullptr), flags_(flags) {}
void* Accept(RegExpVisitor* visitor, void* data) override;
RegExpNode* ToNode(RegExpCompiler* compiler, RegExpNode* on_success) override;
RegExpBackReference* AsBackReference() override;
......@@ -552,6 +568,7 @@ class RegExpBackReference final : public RegExpTree {
private:
RegExpCapture* capture_;
const ZoneVector<uc16>* name_;
const JSRegExp::Flags flags_;
};
......
This diff is collapsed.
......@@ -99,7 +99,7 @@ class BufferedZoneList {
// Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder : public ZoneObject {
public:
RegExpBuilder(Zone* zone, bool ignore_case, bool unicode);
RegExpBuilder(Zone* zone, JSRegExp::Flags flags);
void AddCharacter(uc16 character);
void AddUnicodeCharacter(uc32 character);
void AddEscapedUnicodeCharacter(uc32 character);
......@@ -114,7 +114,14 @@ class RegExpBuilder : public ZoneObject {
void NewAlternative(); // '|'
bool AddQuantifierToAtom(int min, int max,
RegExpQuantifier::QuantifierType type);
void FlushText();
RegExpTree* ToRegExp();
JSRegExp::Flags flags() const { return flags_; }
void set_flags(JSRegExp::Flags flags) { flags_ = flags; }
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; }
bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; }
bool dotall() const { return (flags_ & JSRegExp::kDotAll) != 0; }
private:
static const uc16 kNoPendingSurrogate = 0;
......@@ -122,18 +129,15 @@ class RegExpBuilder : public ZoneObject {
void AddTrailSurrogate(uc16 trail_surrogate);
void FlushPendingSurrogate();
void FlushCharacters();
void FlushText();
void FlushTerms();
bool NeedsDesugaringForUnicode(RegExpCharacterClass* cc);
bool NeedsDesugaringForIgnoreCase(uc32 c);
Zone* zone() const { return zone_; }
bool ignore_case() const { return ignore_case_; }
bool unicode() const { return unicode_; }
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
Zone* zone_;
bool pending_empty_;
bool ignore_case_;
bool unicode_;
JSRegExp::Flags flags_;
ZoneList<uc16>* characters_;
uc16 pending_surrogate_;
BufferedZoneList<RegExpTree, 2> terms_;
......@@ -159,7 +163,6 @@ class RegExpParser BASE_EMBEDDED {
RegExpTree* ParsePattern();
RegExpTree* ParseDisjunction();
RegExpTree* ParseGroup();
RegExpTree* ParseCharacterClass();
// Parses a {...,...} quantifier and stores the range in the given
// out parameters.
......@@ -175,6 +178,7 @@ class RegExpParser BASE_EMBEDDED {
bool ParseUnicodeEscape(uc32* value);
bool ParseUnlimitedLengthHexNumber(int max_value, uc32* value);
bool ParsePropertyClass(ZoneList<CharacterRange>* result, bool negate);
RegExpTree* ParseCharacterClass(const RegExpBuilder* state);
uc32 ParseOctalLiteral();
......@@ -205,10 +209,9 @@ class RegExpParser BASE_EMBEDDED {
int captures_started() { return captures_started_; }
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
bool dotall() const { return dotall_; }
bool ignore_case() const { return ignore_case_; }
bool multiline() const { return multiline_; }
bool unicode() const { return unicode_; }
// The Unicode flag can't be changed using in-regexp syntax, so it's OK to
// just read the initial flag value here.
bool unicode() const { return (top_level_flags_ & JSRegExp::kUnicode) != 0; }
static bool IsSyntaxCharacterOrSlash(uc32 c);
......@@ -226,34 +229,35 @@ class RegExpParser BASE_EMBEDDED {
class RegExpParserState : public ZoneObject {
public:
// Push a state on the stack.
RegExpParserState(RegExpParserState* previous_state,
SubexpressionType group_type,
RegExpLookaround::Type lookaround_type,
int disjunction_capture_index,
const ZoneVector<uc16>* capture_name, bool ignore_case,
bool unicode, Zone* zone)
const ZoneVector<uc16>* capture_name,
JSRegExp::Flags flags, Zone* zone)
: previous_state_(previous_state),
builder_(new (zone) RegExpBuilder(zone, ignore_case, unicode)),
builder_(new (zone) RegExpBuilder(zone, flags)),
group_type_(group_type),
lookaround_type_(lookaround_type),
disjunction_capture_index_(disjunction_capture_index),
capture_name_(capture_name) {}
// Parser state of containing expression, if any.
RegExpParserState* previous_state() { return previous_state_; }
RegExpParserState* previous_state() const { return previous_state_; }
bool IsSubexpression() { return previous_state_ != nullptr; }
// RegExpBuilder building this regexp's AST.
RegExpBuilder* builder() { return builder_; }
RegExpBuilder* builder() const { return builder_; }
// Type of regexp being parsed (parenthesized group or entire regexp).
SubexpressionType group_type() { return group_type_; }
SubexpressionType group_type() const { return group_type_; }
// Lookahead or Lookbehind.
RegExpLookaround::Type lookaround_type() { return lookaround_type_; }
RegExpLookaround::Type lookaround_type() const { return lookaround_type_; }
// Index in captures array of first capture in this sub-expression, if any.
// Also the capture index of this sub-expression itself, if group_type
// is CAPTURE.
int capture_index() { return disjunction_capture_index_; }
int capture_index() const { return disjunction_capture_index_; }
// The name of the current sub-expression, if group_type is CAPTURE. Only
// used for named captures.
const ZoneVector<uc16>* capture_name() { return capture_name_; }
const ZoneVector<uc16>* capture_name() const { return capture_name_; }
bool IsNamedCapture() const { return capture_name_ != nullptr; }
......@@ -264,17 +268,17 @@ class RegExpParser BASE_EMBEDDED {
private:
// Linked list implementation of stack of states.
RegExpParserState* previous_state_;
RegExpParserState* const previous_state_;
// Builder for the stored disjunction.
RegExpBuilder* builder_;
RegExpBuilder* const builder_;
// Stored disjunction type (capture, look-ahead or grouping), if any.
SubexpressionType group_type_;
const SubexpressionType group_type_;
// Stored read direction.
RegExpLookaround::Type lookaround_type_;
const RegExpLookaround::Type lookaround_type_;
// Stored disjunction's capture index (if any).
int disjunction_capture_index_;
const int disjunction_capture_index_;
// Stored capture name (if any).
const ZoneVector<uc16>* capture_name_;
const ZoneVector<uc16>* const capture_name_;
};
// Return the 1-indexed RegExpCapture object, allocate if necessary.
......@@ -291,6 +295,7 @@ class RegExpParser BASE_EMBEDDED {
bool ParseNamedBackReference(RegExpBuilder* builder,
RegExpParserState* state);
RegExpParserState* ParseOpenParenthesis(RegExpParserState* state);
// After the initial parsing pass, patch corresponding RegExpCapture objects
// into all RegExpBackReferences. This is done after initial parsing in order
......@@ -323,10 +328,10 @@ class RegExpParser BASE_EMBEDDED {
ZoneList<RegExpBackReference*>* named_back_references_;
FlatStringReader* in_;
uc32 current_;
bool dotall_;
bool ignore_case_;
bool multiline_;
bool unicode_;
// These are the flags specified outside the regexp syntax ie after the
// terminating '/' or in the second argument to the constructor. The current
// flags are stored on the RegExpBuilder.
JSRegExp::Flags top_level_flags_;
int next_pos_;
int captures_started_;
int capture_count_; // Only valid after we have scanned for captures.
......
......@@ -158,6 +158,8 @@
'es6/unicode-regexp-ignore-case-noi18n': [FAIL, ['no_i18n == True', PASS]],
'regress/regress-5036': [PASS, ['no_i18n == True', FAIL]],
'es7/regexp-ui-word': [PASS, ['no_i18n == True', FAIL]],
'regexp-modifiers-i18n': [PASS, ['no_i18n == True', FAIL]],
'regexp-modifiers-autogenerated-i18n': [PASS, ['no_i18n == True', FAIL]],
# desugaring regexp property class relies on ICU.
'harmony/regexp-property-*': [PASS, ['no_i18n == True', FAIL]],
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers --harmony-regexp-property
// These regexps are just grepped out of the other tests we already have
// and the syntax changed from out-of-line i flag to inline i flag.
// These tests won't all run on the noi18n build of V8.
assertTrue(/(?i)\u00e5/u.test("\u00c5"));
assertTrue(/(?i)\u00e5/u.test("\u00e5"));
assertTrue(/(?i)\u00c5/u.test("\u00e5"));
assertTrue(/(?i)\u00c5/u.test("\u00c5"));
assertTrue(/(?i)\u212b/u.test("\u212b"));
assertFalse(/(?i)\u00df/u.test("SS"));
assertFalse(/(?i)\u1f8d/u.test("\u1f05\u03b9"));
assertTrue(/(?i)\u1f6b/u.test("\u1f63"));
assertTrue(/(?i)\u00e5/u.test("\u212b"));
assertTrue(/(?i)\u00e5/u.test("\u00c5"));
assertTrue(/(?i)\u00e5/u.test("\u00e5"));
assertTrue(/(?i)\u00e5/u.test("\u212b"));
assertTrue(/(?i)\u00c5/u.test("\u00e5"));
assertTrue(/(?i)\u00c5/u.test("\u212b"));
assertTrue(/(?i)\u00c5/u.test("\u00c5"));
assertTrue(/(?i)\u212b/u.test("\u00c5"));
assertTrue(/(?i)\u212b/u.test("\u00e5"));
assertTrue(/(?i)\u212b/u.test("\u212b"));
assertTrue(/(?i)\u{10400}/u.test("\u{10428}"));
assertTrue(/(?i)\ud801\udc00/u.test("\u{10428}"));
assertTrue(/(?i)[\u{10428}]/u.test("\u{10400}"));
assertTrue(/(?i)[\ud801\udc28]/u.test("\u{10400}"));
assertFalse(/(?i)\u00df/u.test("SS"));
assertFalse(/(?i)\u1f8d/u.test("\u1f05\u03b9"));
assertTrue(/(?i)\u1f8d/u.test("\u1f85"));
assertTrue(/(?i)\u1f6b/u.test("\u1f63"));
assertTrue(/(?i)\u00e5\u00e5\u00e5/u.test("\u212b\u00e5\u00c5"));
assertTrue(/(?i)AB\u{10400}/u.test("ab\u{10428}"));
assertTrue(/(?i)\w/u.test('\u017F'));
assertTrue(/(?i)\w/u.test('\u212A'));
assertFalse(/(?i)\W/u.test('\u017F'));
assertFalse(/(?i)\W/u.test('\u212A'));
assertFalse(/(?i)\W/u.test('s'));
assertFalse(/(?i)\W/u.test('S'));
assertFalse(/(?i)\W/u.test('K'));
assertFalse(/(?i)\W/u.test('k'));
assertTrue(/(?i)[\w]/u.test('\u017F'));
assertTrue(/(?i)[\w]/u.test('\u212A'));
assertFalse(/(?i)[\W]/u.test('\u017F'));
assertFalse(/(?i)[\W]/u.test('\u212A'));
assertFalse(/(?i)[\W]/u.test('s'));
assertFalse(/(?i)[\W]/u.test('S'));
assertFalse(/(?i)[\W]/u.test('K'));
assertFalse(/(?i)[\W]/u.test('k'));
assertTrue(/(?i)\b/u.test('\u017F'));
assertTrue(/(?i)\b/u.test('\u212A'));
assertTrue(/(?i)\b/u.test('s'));
assertTrue(/(?i)\b/u.test('S'));
assertFalse(/(?i)\B/u.test('\u017F'));
assertFalse(/(?i)\B/u.test('\u212A'));
assertFalse(/(?i)\B/u.test('s'));
assertFalse(/(?i)\B/u.test('S'));
assertFalse(/(?i)\B/u.test('K'));
assertFalse(/(?i)\B/u.test('k'));
assertTrue(/(?i)\p{Ll}/u.test("a"));
assertTrue(/(?i)\p{Ll}/u.test("\u{118D4}"));
assertTrue(/(?i)\p{Ll}/u.test("A"));
assertTrue(/(?i)\p{Ll}/u.test("\u{118B4}"));
assertTrue(/(?i)\P{Ll}/u.test("a"));
assertTrue(/(?i)\P{Ll}/u.test("\u{118D4}"));
assertTrue(/(?i)\P{Ll}/u.test("A"));
assertTrue(/(?i)\P{Ll}/u.test("\u{118B4}"));
assertTrue(/(?i)\p{Lu}/u.test("a"));
assertTrue(/(?i)\p{Lu}/u.test("\u{118D4}"));
assertTrue(/(?i)\p{Lu}/u.test("A"));
assertTrue(/(?i)\p{Lu}/u.test("\u{118B4}"));
assertTrue(/(?i)\P{Lu}/u.test("a"));
assertTrue(/(?i)\P{Lu}/u.test("\u{118D4}"));
assertTrue(/(?i)\P{Lu}/u.test("A"));
assertTrue(/(?i)\P{Lu}/u.test("\u{118B4}"));
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers --harmony-regexp-property
// These regexps are just grepped out of the other tests we already have
// and the syntax changed from out-of-line i flag to inline i flag.
assertFalse(/(?i)x(...)\1/.test("x\u03a3\u03c2\u03c3\u03c2\u03c3"));
assertTrue(/(?i)\u03a3((?:))\1\1x/.test("\u03c2x"), "backref-UC16-empty");
assertTrue(/(?i)x(?:...|(...))\1x/.test("x\u03a3\u03c2\u03c3x"));
assertTrue(/(?i)x(?:...|(...))\1x/.test("x\u03c2\u03c3\u039b\u03a3\u03c2\u03bbx"));
assertFalse(/(?i)\xc1/.test('fooA'), "quickcheck-uc16-pattern-ascii-subject");
assertFalse(/(?i)x(...)\1/.test("xaaaaa"), "backref-ASCII-short");
assertTrue(/(?i)x((?:))\1\1x/.test("xx"), "backref-ASCII-empty");
assertTrue(/(?i)x(?:...|(...))\1x/.test("xabcx"), "backref-ASCII-uncaptured");
assertTrue(/(?i)x(?:...|(...))\1x/.test("xabcABCx"), "backref-ASCII-backtrack");
assertFalse(/(?i)f/.test('b'));
assertFalse(/(?i)[abc]f/.test('x'));
assertFalse(/(?i)[abc]f/.test('xa'));
assertFalse(/(?i)[abc]</.test('x'));
assertFalse(/(?i)[abc]</.test('xa'));
assertFalse(/(?i)f[abc]/.test('x'));
assertFalse(/(?i)f[abc]/.test('xa'));
assertFalse(/(?i)<[abc]/.test('x'));
assertFalse(/(?i)<[abc]/.test('xa'));
assertFalse(/(?i)[\u00e5]/.test("\u212b"));
assertFalse(/(?i)[\u212b]/.test("\u00e5\u1234"));
assertFalse(/(?i)[\u212b]/.test("\u00e5"));
assertFalse(/(?i)\u{10400}/.test("\u{10428}"));
assertFalse(/(?i)[\u00e5]/.test("\u212b"));
assertFalse(/(?i)[\u212b]/.test("\u00e5\u1234"));
assertFalse(/(?i)[\u212b]/.test("\u00e5"));
assertFalse(/(?i)\u{10400}/.test("\u{10428}"));
assertTrue(/(?i)[@-A]/.test("a"));
assertTrue(/(?i)[@-A]/.test("A"));
assertTrue(/(?i)[@-A]/.test("@"));
assertFalse(/(?i)[¿-À]/.test('¾'));
assertTrue(/(?i)[¿-À]/.test('¿'));
assertTrue(/(?i)[¿-À]/.test('À'));
assertTrue(/(?i)[¿-À]/.test('à'));
assertFalse(/(?i)[¿-À]/.test('á'));
assertFalse(/(?i)[¿-À]/.test('Á'));
assertFalse(/(?i)[¿-À]/.test('Á'));
assertFalse(/(?i)[Ö-×]/.test('Õ'));
assertTrue(/(?i)[Ö-×]/.test('Ö'));
assertTrue(/(?i)[Ö-×]/.test('ö'));
assertTrue(/(?i)[Ö-×]/.test('×'));
assertFalse(/(?i)[Ö-×]/.test('Ø'));
assertTrue(/(?i)(a[\u1000A])+/.test('aa'));
assertTrue(/(?i)\u0178/.test('\u00ff'));
assertTrue(/(?i)\u039c/.test('\u00b5'));
assertTrue(/(?i)\u039c/.test('\u03bc'));
assertTrue(/(?i)\u00b5/.test('\u03bc'));
assertTrue(/(?i)[\u039b-\u039d]/.test('\u00b5'));
assertFalse(/(?i)[^\u039b-\u039d]/.test('\u00b5'));
assertTrue(/(?m)^bar/.test("bar"));
assertTrue(/(?m)^bar/.test("bar\nfoo"));
assertTrue(/(?m)^bar/.test("foo\nbar"));
assertTrue(/(?m)bar$/.test("bar"));
assertTrue(/(?m)bar$/.test("bar\nfoo"));
assertTrue(/(?m)bar$/.test("foo\nbar"));
assertFalse(/(?m)^bxr/.test("bar"));
assertFalse(/(?m)^bxr/.test("bar\nfoo"));
assertFalse(/(?m)^bxr/.test("foo\nbar"));
assertFalse(/(?m)bxr$/.test("bar"));
assertFalse(/(?m)bxr$/.test("bar\nfoo"));
assertFalse(/(?m)bxr$/.test("foo\nbar"));
assertTrue(/(?m)^.*$/.test("\n"));
assertTrue(/(?m)^([()]|.)*$/.test("()\n()"));
assertTrue(/(?m)^([()]|.)*$/.test("()\n"));
assertTrue(/(?m)^[()]*$/.test("()\n."));
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers --harmony-regexp-dotall
// S flag switches dotall mode on and off. Combine with i flag changes to test
// the parser.
test(/.(?s).(?i-s).a(?-i)a/);
test(/.(?s:.)(?i:.a)a/);
test(/.(?s).(?i-s).a(?-i)a/u);
test(/.(?s:.)(?i:.a)a/u);
// m flag makes no difference
test(/.(?sm).(?i-s).a(?-i)a/);
test(/.(?s:.)(?i:.a)a/);
test(/.(?sm).(?im-s).a(?m-i)a/u);
test(/.(?s:.)(?i:.a)a/u);
function test(re) {
assertTrue(re.test("...aa"));
assertTrue(re.test(".\n.aa"));
assertTrue(re.test(".\n.Aa"));
assertFalse(re.test("\n\n.Aa"));
assertFalse(re.test(".\n\nAa"));
assertFalse(re.test(".\n.AA"));
}
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers
// These tests won't all run on the noi18n build of V8.
aa(/(a)(?i)\1/u);
aa(/([az])(?i)\1/u);
function aa(re) {
assertTrue(re.test("aa"));
assertTrue(re.test("aA"));
assertFalse(re.test("Aa"));
assertFalse(re.test("AA"));
}
aai(/(a)(?-i)\1/iu);
aai(/([az])(?-i)\1/iu);
function aai(re) {
assertTrue(re.test("aa"));
assertFalse(re.test("aA"));
assertFalse(re.test("Aa"));
assertTrue(re.test("AA"));
}
abcd(/a(b(?i)c)d/u);
abcd(/[aw]([bx](?i)[cy])[dz]/u);
function abcd(re) {
assertTrue(re.test("abcd"));
assertFalse(re.test("abcD"));
assertTrue(re.test("abCd"));
assertFalse(re.test("abCD"));
assertFalse(re.test("aBcd"));
assertFalse(re.test("aBcD"));
assertFalse(re.test("aBCd"));
assertFalse(re.test("aBCD"));
assertFalse(re.test("Abcd"));
assertFalse(re.test("AbcD"));
assertFalse(re.test("AbCd"));
assertFalse(re.test("AbCD"));
assertFalse(re.test("ABcd"));
assertFalse(re.test("ABcD"));
assertFalse(re.test("ABCd"));
assertFalse(re.test("ABCD"));
}
abcdei(/a(b(?-i)c)d/iu);
abcdei(/[aw]([bx](?-i)[cy])[dz]/iu);
function abcdei(re) {
assertTrue(re.test("abcd"));
assertTrue(re.test("abcD"));
assertFalse(re.test("abCd"));
assertFalse(re.test("abCD"));
assertTrue(re.test("aBcd"));
assertTrue(re.test("aBcD"));
assertFalse(re.test("aBCd"));
assertFalse(re.test("aBCD"));
assertTrue(re.test("Abcd"));
assertTrue(re.test("AbcD"));
assertFalse(re.test("AbCd"));
assertFalse(re.test("AbCD"));
assertTrue(re.test("ABcd"));
assertTrue(re.test("ABcD"));
assertFalse(re.test("ABCd"));
assertFalse(re.test("ABCD"));
}
abc(/a(?i:b)c/u);
abc(/[ax](?i:[by])[cz]/u);
function abc(re) {
assertTrue(re.test("abc"));
assertFalse(re.test("abC"));
assertTrue(re.test("aBc"));
assertFalse(re.test("aBC"));
assertFalse(re.test("Abc"));
assertFalse(re.test("AbC"));
assertFalse(re.test("ABc"));
assertFalse(re.test("ABC"));
}
abci(/a(?-i:b)c/iu);
abci(/[ax](?-i:[by])[cz]/iu);
function abci(re) {
assertTrue(re.test("abc"));
assertTrue(re.test("abC"));
assertFalse(re.test("aBc"));
assertFalse(re.test("aBC"));
assertTrue(re.test("Abc"));
assertTrue(re.test("AbC"));
assertFalse(re.test("ABc"));
assertFalse(re.test("ABC"));
}
// The following tests are taken from test/mjsunit/es7/regexp-ui-word.js but
// using inline syntax instead of the global /i flag.
assertTrue(/(?i)\w/u.test('\u017F'));
assertTrue(/(?i)\w/u.test('\u212A'));
assertFalse(/(?i)\W/u.test('\u017F'));
assertFalse(/(?i)\W/u.test('\u212A'));
assertFalse(/(?i)\W/u.test('s'));
assertFalse(/(?i)\W/u.test('S'));
assertFalse(/(?i)\W/u.test('K'));
assertFalse(/(?i)\W/u.test('k'));
assertTrue(/(?i)[\w]/u.test('\u017F'));
assertTrue(/(?i)[\w]/u.test('\u212A'));
assertFalse(/(?i)[\W]/u.test('\u017F'));
assertFalse(/(?i)[\W]/u.test('\u212A'));
assertFalse(/(?i)[\W]/u.test('s'));
assertFalse(/(?i)[\W]/u.test('S'));
assertFalse(/(?i)[\W]/u.test('K'));
assertFalse(/(?i)[\W]/u.test('k'));
assertTrue(/(?i)\b/u.test('\u017F'));
assertFalse(/(?i:)\b/u.test('\u017F'));
assertTrue(/(?i)\b/u.test('\u212A'));
assertFalse(/(?i:)\b/u.test('\u212A'));
assertTrue(/(?i)\b/u.test('s'));
assertTrue(/(?i)\b/u.test('S'));
assertFalse(/(?i)\B/u.test('\u017F'));
assertFalse(/(?i)\B/u.test('\u212A'));
assertFalse(/(?i)\B/u.test('s'));
assertFalse(/(?i)\B/u.test('S'));
assertFalse(/(?i)\B/u.test('K'));
assertFalse(/(?i)\B/u.test('k'));
assertEquals(["abcd\u017F", "\u017F"], /a.*?(.)(?i)\b/u.exec('abcd\u017F cd'));
assertEquals(["abcd\u212A", "\u212A"], /a.*?(.)(?i)\b/u.exec('abcd\u212A cd'));
assertEquals(["a\u017F", "\u017F"], /a.*?(?i:\B)(.)/u.exec('a\u017F '));
assertEquals(["a\u212A", "\u212A"], /a.*?(?i:\B)(.)/u.exec('a\u212A '));
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --regexp-mode-modifiers
aa(/(a)(?i)\1/);
aa(/([az])(?i)\1/);
function aa(re) {
assertTrue(re.test("aa"));
assertTrue(re.test("aA"));
assertFalse(re.test("Aa"));
assertFalse(re.test("AA"));
}
aai(/(a)(?-i)\1/i);
aai(/([az])(?-i)\1/i);
function aai(re) {
assertTrue(re.test("aa"));
assertFalse(re.test("aA"));
assertFalse(re.test("Aa"));
assertTrue(re.test("AA"));
}
abcd(/a(b(?i)c)d/);
abcd(/[aw]([bx](?i)[cy])[dz]/);
function abcd(re) {
assertTrue(re.test("abcd"));
assertFalse(re.test("abcD"));
assertTrue(re.test("abCd"));
assertFalse(re.test("abCD"));
assertFalse(re.test("aBcd"));
assertFalse(re.test("aBcD"));
assertFalse(re.test("aBCd"));
assertFalse(re.test("aBCD"));
assertFalse(re.test("Abcd"));
assertFalse(re.test("AbcD"));
assertFalse(re.test("AbCd"));
assertFalse(re.test("AbCD"));
assertFalse(re.test("ABcd"));
assertFalse(re.test("ABcD"));
assertFalse(re.test("ABCd"));
assertFalse(re.test("ABCD"));
}
abcdei(/a(b(?-i)c)d/i);
abcdei(/[aw]([bx](?-i)[cy])[dz]/i);
function abcdei(re) {
assertTrue(re.test("abcd"));
assertTrue(re.test("abcD"));
assertFalse(re.test("abCd"));
assertFalse(re.test("abCD"));
assertTrue(re.test("aBcd"));
assertTrue(re.test("aBcD"));
assertFalse(re.test("aBCd"));
assertFalse(re.test("aBCD"));
assertTrue(re.test("Abcd"));
assertTrue(re.test("AbcD"));
assertFalse(re.test("AbCd"));
assertFalse(re.test("AbCD"));
assertTrue(re.test("ABcd"));
assertTrue(re.test("ABcD"));
assertFalse(re.test("ABCd"));
assertFalse(re.test("ABCD"));
}
abc(/a(?i:b)c/);
abc(/[ax](?i:[by])[cz]/);
function abc(re) {
assertTrue(re.test("abc"));
assertFalse(re.test("abC"));
assertTrue(re.test("aBc"));
assertFalse(re.test("aBC"));
assertFalse(re.test("Abc"));
assertFalse(re.test("AbC"));
assertFalse(re.test("ABc"));
assertFalse(re.test("ABC"));
}
abci(/a(?-i:b)c/i);
abci(/[ax](?-i:[by])[cz]/i);
function abci(re) {
assertTrue(re.test("abc"));
assertTrue(re.test("abC"));
assertFalse(re.test("aBc"));
assertFalse(re.test("aBC"));
assertTrue(re.test("Abc"));
assertTrue(re.test("AbC"));
assertFalse(re.test("ABc"));
assertFalse(re.test("ABC"));
}
assertThrows(() => new RegExp("foo(?i:"));
assertThrows(() => new RegExp("foo(?--i)"));
assertThrows(() => new RegExp("foo(?i-i)"));
assertThrows(() => new RegExp("foo(?m:"));
assertThrows(() => new RegExp("foo(?--m)"));
assertThrows(() => new RegExp("foo(?m-m)"));
var re = /^\s(?m)^.$\s(?-m)$/;
assertTrue(re.test("\n.\n"));
assertFalse(re.test(" .\n"));
assertFalse(re.test("\n. "));
assertFalse(re.test(" . "));
assertFalse(re.test("_\n.\n"));
assertFalse(re.test("\n.\n_"));
assertFalse(re.test("_\n.\n_"));
assertEquals(["abcd", "d"], /a.*?(.)(?i)\b/.exec('abcd\u017F cd'));
assertEquals(["abcd", "d"], /a.*?(.)(?i)\b/.exec('abcd\u212A cd'));
assertEquals(["a\u017F ", " "], /a.*?(?i)\B(.)/.exec('a\u017F '));
assertEquals(["a\u212A ", " "], /a.*?(?i)\B(.)/.exec('a\u212A '));
// Nested flags.
var res = [
/^a(?i:b(?-i:c(?i:d)e)f)g$/,
/^a(?i:b(?-i)c(?i)d(?-i)e(?i)f)g$/,
/^(?-i:a(?i:b(?-i:c(?i:d)e)f)g)$/i,
/^(?-i:a(?i:b(?-i)c(?i)d(?-i)e(?i)f)g)$/i,
];
for (var idx = 0; idx < res.length; idx++) {
var re = res[idx];
for (var i = 0; i < 128; i++) {
var s = (i & 1) ? "A" : "a";
s += (i & 2) ? "B" : "b";
s += (i & 4) ? "C" : "c";
s += (i & 8) ? "D" : "d";
s += (i & 16) ? "E" : "e";
s += (i & 32) ? "F" : "f";
s += (i & 64) ? "G" : "g";
if ((i & (1 | 4 | 16 | 64)) != 0) {
assertFalse(re.test(s), s);
} else {
assertTrue(re.test(s), s);
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment