Commit cec39ad1 authored by jgruber's avatar jgruber Committed by Commit bot

[regexp] Add support for dotAll flag

The dotAll flag changes behavior of the dot '.' character to match every
possible single character instead of excluding certain line terminators.

The implementation is staged behind --harmony-regexp-dotall.

Spec proposal: https://github.com/mathiasbynens/es-regexp-dotall-flag

BUG=v8:6172

Review-Url: https://codereview.chromium.org/2780173002
Cr-Commit-Position: refs/heads/master@{#44295}
parent 85ff725c
......@@ -4692,11 +4692,12 @@ class V8_EXPORT RegExp : public Object {
*/
enum Flags {
kNone = 0,
kGlobal = 1,
kIgnoreCase = 2,
kMultiline = 4,
kSticky = 8,
kUnicode = 16
kGlobal = 1 << 0,
kIgnoreCase = 1 << 1,
kMultiline = 1 << 2,
kSticky = 1 << 3,
kUnicode = 1 << 4,
kDotAll = 1 << 5,
};
/**
......
......@@ -1228,6 +1228,11 @@ ExternalReference ExternalReference::address_of_regexp_stack_limit(
return ExternalReference(isolate->regexp_stack()->limit_address());
}
ExternalReference ExternalReference::address_of_regexp_dotall_flag(
Isolate* isolate) {
return ExternalReference(&FLAG_harmony_regexp_dotall);
}
ExternalReference ExternalReference::store_buffer_top(Isolate* isolate) {
return ExternalReference(isolate->heap()->store_buffer_top_address());
}
......
......@@ -923,6 +923,9 @@ class ExternalReference BASE_EMBEDDED {
// Static variable RegExpStack::limit_address()
static ExternalReference address_of_regexp_stack_limit(Isolate* isolate);
// Direct access to FLAG_harmony_regexp_dotall.
static ExternalReference address_of_regexp_dotall_flag(Isolate* isolate);
// Static variables for RegExp.
static ExternalReference address_of_static_offsets_vector(Isolate* isolate);
static ExternalReference address_of_regexp_stack_memory_address(
......
......@@ -4004,6 +4004,22 @@ void Genesis::InitializeGlobal_harmony_promise_finally() {
}
}
void Genesis::InitializeGlobal_harmony_regexp_dotall() {
if (!FLAG_harmony_regexp_dotall) return;
Handle<JSFunction> constructor(native_context()->regexp_function());
Handle<JSObject> prototype(JSObject::cast(constructor->instance_prototype()));
SimpleInstallGetter(prototype, isolate()->factory()->dotAll_string(),
Builtins::kRegExpPrototypeDotAllGetter, true);
// The regexp prototype map has changed because we added a property
// to it, so we update the saved map.
Handle<Map> prototype_map(prototype->map());
Map::SetShouldBeFastPrototypeMap(prototype_map, true, isolate());
native_context()->set_regexp_prototype_map(*prototype_map);
}
#ifdef V8_I18N_SUPPORT
void Genesis::InitializeGlobal_datetime_format_to_parts() {
if (!FLAG_datetime_format_to_parts) return;
......
......@@ -737,6 +737,8 @@ namespace internal {
TFJ(RegExpPrototypeCompile, 2, kPattern, kFlags) \
/* ES #sec-regexp.prototype.exec */ \
TFJ(RegExpPrototypeExec, 1, kString) \
/* ES #sec-get-regexp.prototype.dotAll */ \
TFJ(RegExpPrototypeDotAllGetter, 0) \
/* ES #sec-get-regexp.prototype.flags */ \
TFJ(RegExpPrototypeFlagsGetter, 0) \
/* ES #sec-get-regexp.prototype.global */ \
......
......@@ -781,6 +781,8 @@ Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
Variable var_length(this, MachineType::PointerRepresentation(), int_zero);
Variable var_flags(this, MachineType::PointerRepresentation());
Node* const is_dotall_enabled = IsDotAllEnabled(isolate);
// First, count the number of characters we will need and check which flags
// are set.
......@@ -802,6 +804,13 @@ Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
CASE_FOR_FLAG(JSRegExp::kGlobal);
CASE_FOR_FLAG(JSRegExp::kIgnoreCase);
CASE_FOR_FLAG(JSRegExp::kMultiline);
{
Label next(this);
GotoIfNot(is_dotall_enabled, &next);
CASE_FOR_FLAG(JSRegExp::kDotAll);
Goto(&next);
Bind(&next);
}
CASE_FOR_FLAG(JSRegExp::kUnicode);
CASE_FOR_FLAG(JSRegExp::kSticky);
#undef CASE_FOR_FLAG
......@@ -828,6 +837,13 @@ Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
CASE_FOR_FLAG("global", JSRegExp::kGlobal);
CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase);
CASE_FOR_FLAG("multiline", JSRegExp::kMultiline);
{
Label next(this);
GotoIfNot(is_dotall_enabled, &next);
CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll);
Goto(&next);
Bind(&next);
}
CASE_FOR_FLAG("unicode", JSRegExp::kUnicode);
CASE_FOR_FLAG("sticky", JSRegExp::kSticky);
#undef CASE_FOR_FLAG
......@@ -859,6 +875,13 @@ Node* RegExpBuiltinsAssembler::FlagsGetter(Node* const context,
CASE_FOR_FLAG(JSRegExp::kGlobal, 'g');
CASE_FOR_FLAG(JSRegExp::kIgnoreCase, 'i');
CASE_FOR_FLAG(JSRegExp::kMultiline, 'm');
{
Label next(this);
GotoIfNot(is_dotall_enabled, &next);
CASE_FOR_FLAG(JSRegExp::kDotAll, 's');
Goto(&next);
Bind(&next);
}
CASE_FOR_FLAG(JSRegExp::kUnicode, 'u');
CASE_FOR_FLAG(JSRegExp::kSticky, 'y');
#undef CASE_FOR_FLAG
......@@ -1211,6 +1234,9 @@ Node* RegExpBuiltinsAssembler::SlowFlagGetter(Node* const context,
case JSRegExp::kMultiline:
name = factory->multiline_string();
break;
case JSRegExp::kDotAll:
UNREACHABLE(); // Never called for dotAll.
break;
case JSRegExp::kSticky:
name = factory->sticky_string();
break;
......@@ -1251,8 +1277,7 @@ Node* RegExpBuiltinsAssembler::FlagGetter(Node* const context,
}
void RegExpBuiltinsAssembler::FlagGetter(Node* context, Node* receiver,
JSRegExp::Flag flag,
v8::Isolate::UseCounterFeature counter,
JSRegExp::Flag flag, int counter,
const char* method_name) {
Isolate* isolate = this->isolate();
......@@ -1290,8 +1315,10 @@ void RegExpBuiltinsAssembler::FlagGetter(Node* context, Node* receiver,
Bind(&if_isprototype);
{
Node* const counter_smi = SmiConstant(Smi::FromInt(counter));
CallRuntime(Runtime::kIncrementUseCounter, context, counter_smi);
if (counter != -1) {
Node* const counter_smi = SmiConstant(Smi::FromInt(counter));
CallRuntime(Runtime::kIncrementUseCounter, context, counter_smi);
}
Return(UndefinedConstant());
}
......@@ -1338,6 +1365,23 @@ TF_BUILTIN(RegExpPrototypeMultilineGetter, RegExpBuiltinsAssembler) {
"RegExp.prototype.multiline");
}
Node* RegExpBuiltinsAssembler::IsDotAllEnabled(Isolate* isolate) {
Node* flag_ptr = ExternalConstant(
ExternalReference::address_of_regexp_dotall_flag(isolate));
Node* flag_value = Load(MachineType::IntPtr(), flag_ptr);
return WordNotEqual(flag_value, IntPtrConstant(0));
}
// ES #sec-get-regexp.prototype.dotAll
TF_BUILTIN(RegExpPrototypeDotAllGetter, RegExpBuiltinsAssembler) {
Node* context = Parameter(Descriptor::kContext);
Node* receiver = Parameter(Descriptor::kReceiver);
static const int kNoCounter = -1;
CSA_ASSERT(this, IsDotAllEnabled(isolate()));
FlagGetter(context, receiver, JSRegExp::kDotAll, kNoCounter,
"RegExp.prototype.dotAll");
}
// ES6 21.2.5.12.
// ES #sec-get-regexp.prototype.sticky
TF_BUILTIN(RegExpPrototypeStickyGetter, RegExpBuiltinsAssembler) {
......
......@@ -72,8 +72,10 @@ class RegExpBuiltinsAssembler : public CodeStubAssembler {
Node* FlagGetter(Node* const context, Node* const regexp, JSRegExp::Flag flag,
bool is_fastpath);
void FlagGetter(Node* context, Node* receiver, JSRegExp::Flag flag,
v8::Isolate::UseCounterFeature counter,
const char* method_name);
int counter, const char* method_name);
// Utility method, remove once dotall is unstaged.
Node* IsDotAllEnabled(Isolate* isolate);
Node* IsRegExp(Node* const context, Node* const maybe_receiver);
Node* RegExpInitialize(Node* const context, Node* const regexp,
......
......@@ -273,6 +273,9 @@ void ExternalReferenceTable::AddReferences(Isolate* isolate) {
Add(ExternalReference::debug_restart_fp_address(isolate).address(),
"Debug::restart_fp_address()");
Add(ExternalReference::address_of_regexp_dotall_flag(isolate).address(),
"FLAG_harmony_regexp_dotall");
#ifndef V8_INTERPRETED_REGEXP
Add(ExternalReference::re_case_insensitive_compare_uc16(isolate).address(),
"NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16()");
......
......@@ -204,6 +204,7 @@ DEFINE_IMPLICATION(es_staging, move_object_start)
V(harmony_tailcalls, "harmony tail calls") \
V(harmony_sharedarraybuffer, "harmony sharedarraybuffer") \
V(harmony_do_expressions, "harmony do-expressions") \
V(harmony_regexp_dotall, "harmony regexp dotall flag") \
V(harmony_regexp_named_captures, "harmony regexp named captures") \
V(harmony_regexp_property, "harmony unicode regexp property classes") \
V(harmony_class_fields, "harmony public fields in class literals") \
......
......@@ -54,6 +54,7 @@
V(did_handle_string, "didHandle") \
V(display_name_string, "displayName") \
V(done_string, "done") \
V(dotAll_string, "dotAll") \
V(dot_catch_string, ".catch") \
V(dot_for_string, ".for") \
V(dot_generator_object_string, ".generator_object") \
......
......@@ -16144,6 +16144,13 @@ JSRegExp::Flags RegExpFlagsFromString(Handle<String> flags, bool* success) {
case 'm':
flag = JSRegExp::kMultiline;
break;
case 's':
if (FLAG_harmony_regexp_dotall) {
flag = JSRegExp::kDotAll;
} else {
return JSRegExp::Flags(0);
}
break;
case 'u':
flag = JSRegExp::kUnicode;
break;
......
......@@ -8305,6 +8305,7 @@ class JSRegExp: public JSObject {
kMultiline = 1 << 2,
kSticky = 1 << 3,
kUnicode = 1 << 4,
kDotAll = 1 << 5,
};
typedef base::Flags<Flag> Flags;
......
......@@ -1717,6 +1717,13 @@ Maybe<RegExp::Flags> Scanner::ScanRegExpFlags() {
case 'm':
flag = RegExp::kMultiline;
break;
case 's':
if (FLAG_harmony_regexp_dotall) {
flag = RegExp::kDotAll;
} else {
return Nothing<RegExp::Flags>();
}
break;
case 'u':
flag = RegExp::kUnicode;
break;
......
......@@ -29,6 +29,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
named_back_references_(NULL),
in_(in),
current_(kEndMarker),
dotall_(flags & JSRegExp::kDotAll),
ignore_case_(flags & JSRegExp::kIgnoreCase),
multiline_(flags & JSRegExp::kMultiline),
unicode_(flags & JSRegExp::kUnicode),
......@@ -40,6 +41,7 @@ RegExpParser::RegExpParser(FlatStringReader* in, Handle<String>* error,
contains_anchor_(false),
is_scanned_for_captures_(false),
failed_(false) {
DCHECK_IMPLIES(dotall(), FLAG_harmony_regexp_dotall);
Advance();
}
......@@ -270,10 +272,18 @@ RegExpTree* RegExpParser::ParseDisjunction() {
}
case '.': {
Advance();
// everything except \x0a, \x0d, \u2028 and \u2029
ZoneList<CharacterRange>* ranges =
new (zone()) ZoneList<CharacterRange>(2, zone());
CharacterRange::AddClassEscape('.', ranges, false, zone());
if (dotall()) {
// Everything.
DCHECK(FLAG_harmony_regexp_dotall);
CharacterRange::AddClassEscape('*', ranges, false, zone());
} else {
// Everything except \x0a, \x0d, \u2028 and \u2029
CharacterRange::AddClassEscape('.', ranges, false, zone());
}
RegExpCharacterClass* cc =
new (zone()) RegExpCharacterClass(ranges, false);
builder->AddCharacterClass(cc);
......
......@@ -199,6 +199,7 @@ class RegExpParser BASE_EMBEDDED {
int captures_started() { return captures_started_; }
int position() { return next_pos_ - 1; }
bool failed() { return failed_; }
bool dotall() const { return dotall_; }
bool ignore_case() const { return ignore_case_; }
bool multiline() const { return multiline_; }
bool unicode() const { return unicode_; }
......@@ -312,6 +313,7 @@ class RegExpParser BASE_EMBEDDED {
ZoneList<RegExpBackReference*>* named_back_references_;
FlatStringReader* in_;
uc32 current_;
bool dotall_;
bool ignore_case_;
bool multiline_;
bool unicode_;
......
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// This tests that RegExp dotall features are not enabled when
// --harmony-regexp-dotall is not passed.
// Construction does not throw.
{
assertThrows("/./s", SyntaxError);
assertThrows(() => RegExp(".", "s"), SyntaxError);
assertThrows(() => new RegExp(".", "s"), SyntaxError);
assertThrows(() => new RegExp(".", "wtf"), SyntaxError);
}
// The flags accessors.
{
let re = /./gimyu;
assertEquals("gimuy", re.flags);
assertTrue(re.global);
assertTrue(re.ignoreCase);
assertTrue(re.multiline);
assertTrue(re.sticky);
assertTrue(re.unicode);
assertEquals(re.dotAll, undefined);
assertFalse("dotAll" in re);
let callCount = 0;
re.__defineGetter__("dotAll", () => { callCount++; return undefined; });
assertEquals("gimuy", re.flags);
assertEquals(callCount, 0);
}
// Default '.' behavior.
{
let re = /^.$/;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertFalse(re.test("\u{10300}")); // Supplementary plane.
assertFalse(re.test("\n"));
assertFalse(re.test("\r"));
assertFalse(re.test("\u2028"));
assertFalse(re.test("\u2029"));
}
// Default '.' behavior (unicode).
{
let re = /^.$/u;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertTrue(re.test("\u{10300}")); // Supplementary plane.
assertFalse(re.test("\n"));
assertFalse(re.test("\r"));
assertFalse(re.test("\u2028"));
assertFalse(re.test("\u2029"));
}
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --harmony-regexp-dotall
function toSlowMode(re) {
re.exec = (str) => RegExp.prototype.exec.call(re, str);
return re;
}
// Construction does not throw.
{
let re = /./s;
re = RegExp(".", "s");
re = new RegExp(".", "s");
assertThrows(() => new RegExp(".", "wtf"), SyntaxError);
}
// The flags accessors.
{
let re = /./s;
assertEquals("s", re.flags);
assertFalse(re.global);
assertFalse(re.ignoreCase);
assertFalse(re.multiline);
assertFalse(re.sticky);
assertFalse(re.unicode);
assertTrue(re.dotAll);
re = toSlowMode(/./s);
assertEquals("s", re.flags);
assertFalse(re.global);
assertFalse(re.ignoreCase);
assertFalse(re.multiline);
assertFalse(re.sticky);
assertFalse(re.unicode);
assertTrue(re.dotAll);
re = /./gimyus;
assertEquals("gimsuy", re.flags);
assertTrue(re.global);
assertTrue(re.ignoreCase);
assertTrue(re.multiline);
assertTrue(re.sticky);
assertTrue(re.unicode);
assertTrue(re.dotAll);
re = /./gimyu;
assertEquals("gimuy", re.flags);
assertTrue(re.global);
assertTrue(re.ignoreCase);
assertTrue(re.multiline);
assertTrue(re.sticky);
assertTrue(re.unicode);
assertFalse(re.dotAll);
}
// Default '.' behavior.
{
let re = /^.$/;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertFalse(re.test("\u{10300}")); // Supplementary plane.
assertFalse(re.test("\n"));
assertFalse(re.test("\r"));
assertFalse(re.test("\u2028"));
assertFalse(re.test("\u2029"));
}
// Default '.' behavior (unicode).
{
let re = /^.$/u;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertTrue(re.test("\u{10300}")); // Supplementary plane.
assertFalse(re.test("\n"));
assertFalse(re.test("\r"));
assertFalse(re.test("\u2028"));
assertFalse(re.test("\u2029"));
}
// DotAll '.' behavior.
{
let re = /^.$/s;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertFalse(re.test("\u{10300}")); // Supplementary plane.
assertTrue(re.test("\n"));
assertTrue(re.test("\r"));
assertTrue(re.test("\u2028"));
assertTrue(re.test("\u2029"));
}
// DotAll '.' behavior (unicode).
{
let re = /^.$/su;
assertTrue(re.test("a"));
assertTrue(re.test("3"));
assertTrue(re.test("π"));
assertTrue(re.test("\u2027"));
assertTrue(re.test("\u0085"));
assertTrue(re.test("\v"));
assertTrue(re.test("\f"));
assertTrue(re.test("\u180E"));
assertTrue(re.test("\u{10300}")); // Supplementary plane.
assertTrue(re.test("\n"));
assertTrue(re.test("\r"));
assertTrue(re.test("\u2028"));
assertTrue(re.test("\u2029"));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment