Commit 5720d205 authored by Martin Bidlingmaier's avatar Martin Bidlingmaier Committed by Commit Bot

[regexp] Add 'l' flag to force experimental engine

This commit adds the 'l' (linear) RegExp flag (as in e.g. /asdf|123/l)
that forces execution in linear time.  These regexps are handled by the
experimental engine.  If the experimental engine cannot handle the
pattern, an exception is thrown on creation of the regexp.

The commit also adds a new global V8 flag and changes an existing one:
* --enable-experimental-engine, which turns on recognition of the RegExp
  'l' flag.  Previously this flag also caused all supported regexps to
  be executed by the experimental engine; this is not the case anymore.
* --default-to-experimental-regexp-engine takes over the previous
  semantics of --enable-experimental-regexp-engine:  We execute all
  supported regexps with the experimental engine.

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: I5622a89b19404105e8be280d454e9fdd63c003b3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2461244Reviewed-by: 's avatarUlan Degenbaev <ulan@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Reviewed-by: 's avatarSimon Zünd <szuend@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Cr-Commit-Position: refs/heads/master@{#70892}
parent a928f5fc
...@@ -5983,6 +5983,10 @@ class V8_EXPORT RegExp : public Object { ...@@ -5983,6 +5983,10 @@ class V8_EXPORT RegExp : public Object {
/** /**
* Regular expression flag bits. They can be or'ed to enable a set * Regular expression flag bits. They can be or'ed to enable a set
* of flags. * of flags.
* The kLinear value ('l') is experimental and can only be used with
* --enable-experimental-regexp-engine. RegExps with kLinear flag are
* guaranteed to be executed in asymptotic linear time wrt. the length of
* the subject string.
*/ */
enum Flags { enum Flags {
kNone = 0, kNone = 0,
...@@ -5992,9 +5996,10 @@ class V8_EXPORT RegExp : public Object { ...@@ -5992,9 +5996,10 @@ class V8_EXPORT RegExp : public Object {
kSticky = 1 << 3, kSticky = 1 << 3,
kUnicode = 1 << 4, kUnicode = 1 << 4,
kDotAll = 1 << 5, kDotAll = 1 << 5,
kLinear = 1 << 6,
}; };
static constexpr int kFlagCount = 6; static constexpr int kFlagCount = 7;
/** /**
* Creates a regular expression from the given pattern string and * Creates a regular expression from the given pattern string and
......
...@@ -6835,6 +6835,7 @@ REGEXP_FLAG_ASSERT_EQ(kIgnoreCase); ...@@ -6835,6 +6835,7 @@ REGEXP_FLAG_ASSERT_EQ(kIgnoreCase);
REGEXP_FLAG_ASSERT_EQ(kMultiline); REGEXP_FLAG_ASSERT_EQ(kMultiline);
REGEXP_FLAG_ASSERT_EQ(kSticky); REGEXP_FLAG_ASSERT_EQ(kSticky);
REGEXP_FLAG_ASSERT_EQ(kUnicode); REGEXP_FLAG_ASSERT_EQ(kUnicode);
REGEXP_FLAG_ASSERT_EQ(kLinear);
#undef REGEXP_FLAG_ASSERT_EQ #undef REGEXP_FLAG_ASSERT_EQ
v8::RegExp::Flags v8::RegExp::GetFlags() const { v8::RegExp::Flags v8::RegExp::GetFlags() const {
......
...@@ -258,6 +258,7 @@ void CallPrinter::VisitRegExpLiteral(RegExpLiteral* node) { ...@@ -258,6 +258,7 @@ void CallPrinter::VisitRegExpLiteral(RegExpLiteral* node) {
Print("/"); Print("/");
if (node->flags() & RegExp::kGlobal) Print("g"); if (node->flags() & RegExp::kGlobal) Print("g");
if (node->flags() & RegExp::kIgnoreCase) Print("i"); if (node->flags() & RegExp::kIgnoreCase) Print("i");
if (node->flags() & RegExp::kLinear) Print("l");
if (node->flags() & RegExp::kMultiline) Print("m"); if (node->flags() & RegExp::kMultiline) Print("m");
if (node->flags() & RegExp::kUnicode) Print("u"); if (node->flags() & RegExp::kUnicode) Print("u");
if (node->flags() & RegExp::kSticky) Print("y"); if (node->flags() & RegExp::kSticky) Print("y");
...@@ -1163,6 +1164,7 @@ void AstPrinter::VisitRegExpLiteral(RegExpLiteral* node) { ...@@ -1163,6 +1164,7 @@ void AstPrinter::VisitRegExpLiteral(RegExpLiteral* node) {
EmbeddedVector<char, 128> buf; EmbeddedVector<char, 128> buf;
if (node->flags() & RegExp::kGlobal) buf[i++] = 'g'; if (node->flags() & RegExp::kGlobal) buf[i++] = 'g';
if (node->flags() & RegExp::kIgnoreCase) buf[i++] = 'i'; if (node->flags() & RegExp::kIgnoreCase) buf[i++] = 'i';
if (node->flags() & RegExp::kLinear) buf[i++] = 'l';
if (node->flags() & RegExp::kMultiline) buf[i++] = 'm'; if (node->flags() & RegExp::kMultiline) buf[i++] = 'm';
if (node->flags() & RegExp::kUnicode) buf[i++] = 'u'; if (node->flags() & RegExp::kUnicode) buf[i++] = 'u';
if (node->flags() & RegExp::kSticky) buf[i++] = 'y'; if (node->flags() & RegExp::kSticky) buf[i++] = 'y';
......
...@@ -929,6 +929,7 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context, ...@@ -929,6 +929,7 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
CASE_FOR_FLAG(JSRegExp::kGlobal); CASE_FOR_FLAG(JSRegExp::kGlobal);
CASE_FOR_FLAG(JSRegExp::kIgnoreCase); CASE_FOR_FLAG(JSRegExp::kIgnoreCase);
CASE_FOR_FLAG(JSRegExp::kLinear);
CASE_FOR_FLAG(JSRegExp::kMultiline); CASE_FOR_FLAG(JSRegExp::kMultiline);
CASE_FOR_FLAG(JSRegExp::kDotAll); CASE_FOR_FLAG(JSRegExp::kDotAll);
CASE_FOR_FLAG(JSRegExp::kUnicode); CASE_FOR_FLAG(JSRegExp::kUnicode);
...@@ -956,6 +957,7 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context, ...@@ -956,6 +957,7 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
CASE_FOR_FLAG("global", JSRegExp::kGlobal); CASE_FOR_FLAG("global", JSRegExp::kGlobal);
CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase); CASE_FOR_FLAG("ignoreCase", JSRegExp::kIgnoreCase);
CASE_FOR_FLAG("linear", JSRegExp::kLinear);
CASE_FOR_FLAG("multiline", JSRegExp::kMultiline); CASE_FOR_FLAG("multiline", JSRegExp::kMultiline);
CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll); CASE_FOR_FLAG("dotAll", JSRegExp::kDotAll);
CASE_FOR_FLAG("unicode", JSRegExp::kUnicode); CASE_FOR_FLAG("unicode", JSRegExp::kUnicode);
...@@ -986,6 +988,7 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context, ...@@ -986,6 +988,7 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
CASE_FOR_FLAG(JSRegExp::kGlobal, 'g'); CASE_FOR_FLAG(JSRegExp::kGlobal, 'g');
CASE_FOR_FLAG(JSRegExp::kIgnoreCase, 'i'); CASE_FOR_FLAG(JSRegExp::kIgnoreCase, 'i');
CASE_FOR_FLAG(JSRegExp::kLinear, 'l');
CASE_FOR_FLAG(JSRegExp::kMultiline, 'm'); CASE_FOR_FLAG(JSRegExp::kMultiline, 'm');
CASE_FOR_FLAG(JSRegExp::kDotAll, 's'); CASE_FOR_FLAG(JSRegExp::kDotAll, 's');
CASE_FOR_FLAG(JSRegExp::kUnicode, 'u'); CASE_FOR_FLAG(JSRegExp::kUnicode, 'u');
...@@ -1206,6 +1209,8 @@ TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context, ...@@ -1206,6 +1209,8 @@ TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context,
Handle<String> name; Handle<String> name;
switch (flag) { switch (flag) {
case JSRegExp::kNone:
UNREACHABLE();
case JSRegExp::kGlobal: case JSRegExp::kGlobal:
name = isolate()->factory()->global_string(); name = isolate()->factory()->global_string();
break; break;
...@@ -1224,8 +1229,9 @@ TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context, ...@@ -1224,8 +1229,9 @@ TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context,
case JSRegExp::kUnicode: case JSRegExp::kUnicode:
name = isolate()->factory()->unicode_string(); name = isolate()->factory()->unicode_string();
break; break;
default: case JSRegExp::kLinear:
UNREACHABLE(); name = isolate()->factory()->linear_string();
break;
} }
TNode<Object> value = GetProperty(context, regexp, name); TNode<Object> value = GetProperty(context, regexp, name);
......
...@@ -184,7 +184,8 @@ extern enum Flag constexpr 'JSRegExp::Flag' { ...@@ -184,7 +184,8 @@ extern enum Flag constexpr 'JSRegExp::Flag' {
kMultiline, kMultiline,
kSticky, kSticky,
kUnicode, kUnicode,
kDotAll kDotAll,
kLinear
} }
const kRegExpPrototypeOldFlagGetter: constexpr int31 const kRegExpPrototypeOldFlagGetter: constexpr int31
...@@ -244,6 +245,13 @@ transitioning javascript builtin RegExpPrototypeMultilineGetter( ...@@ -244,6 +245,13 @@ transitioning javascript builtin RegExpPrototypeMultilineGetter(
'RegExp.prototype.multiline'); 'RegExp.prototype.multiline');
} }
transitioning javascript builtin RegExpPrototypeLinearGetter(
js-implicit context: NativeContext, receiver: JSAny)(): JSAny {
return FlagGetter(
receiver, Flag::kLinear, kRegExpPrototypeOldFlagGetter,
'RegExp.prototype.linear');
}
// ES #sec-get-regexp.prototype.dotAll // ES #sec-get-regexp.prototype.dotAll
transitioning javascript builtin RegExpPrototypeDotAllGetter( transitioning javascript builtin RegExpPrototypeDotAllGetter(
js-implicit context: NativeContext, receiver: JSAny)(): JSAny { js-implicit context: NativeContext, receiver: JSAny)(): JSAny {
......
...@@ -1491,7 +1491,11 @@ DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing") ...@@ -1491,7 +1491,11 @@ DEFINE_BOOL(trace_regexp_parser, false, "trace regexp parsing")
DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution") DEFINE_BOOL(trace_regexp_tier_up, false, "trace regexp tiering up execution")
DEFINE_BOOL(enable_experimental_regexp_engine, false, DEFINE_BOOL(enable_experimental_regexp_engine, false,
"enable experimental linear time regexp engine") "recognize regexps with 'l' flag, run them on experimental engine")
DEFINE_BOOL(default_to_experimental_regexp_engine, false,
"run regexps with the experimental engine where possible")
DEFINE_IMPLICATION(default_to_experimental_regexp_engine,
enable_experimental_regexp_engine)
DEFINE_BOOL(trace_experimental_regexp_engine, false, DEFINE_BOOL(trace_experimental_regexp_engine, false,
"trace execution of experimental regexp engine") "trace execution of experimental regexp engine")
......
...@@ -225,6 +225,7 @@ class Genesis { ...@@ -225,6 +225,7 @@ class Genesis {
HARMONY_STAGED(DECLARE_FEATURE_INITIALIZATION) HARMONY_STAGED(DECLARE_FEATURE_INITIALIZATION)
HARMONY_SHIPPING(DECLARE_FEATURE_INITIALIZATION) HARMONY_SHIPPING(DECLARE_FEATURE_INITIALIZATION)
#undef DECLARE_FEATURE_INITIALIZATION #undef DECLARE_FEATURE_INITIALIZATION
void InitializeGlobal_regexp_linear_flag();
enum ArrayBufferKind { enum ArrayBufferKind {
ARRAY_BUFFER, ARRAY_BUFFER,
...@@ -3849,6 +3850,7 @@ void Genesis::InitializeExperimentalGlobal() { ...@@ -3849,6 +3850,7 @@ void Genesis::InitializeExperimentalGlobal() {
HARMONY_STAGED(FEATURE_INITIALIZE_GLOBAL) HARMONY_STAGED(FEATURE_INITIALIZE_GLOBAL)
HARMONY_INPROGRESS(FEATURE_INITIALIZE_GLOBAL) HARMONY_INPROGRESS(FEATURE_INITIALIZE_GLOBAL)
#undef FEATURE_INITIALIZE_GLOBAL #undef FEATURE_INITIALIZE_GLOBAL
InitializeGlobal_regexp_linear_flag();
} }
bool Genesis::CompileExtension(Isolate* isolate, v8::Extension* extension) { bool Genesis::CompileExtension(Isolate* isolate, v8::Extension* extension) {
...@@ -4304,6 +4306,20 @@ void Genesis::InitializeGlobal_harmony_string_replaceall() { ...@@ -4304,6 +4306,20 @@ void Genesis::InitializeGlobal_harmony_string_replaceall() {
Builtins::kStringPrototypeReplaceAll, 2, true); Builtins::kStringPrototypeReplaceAll, 2, true);
} }
void Genesis::InitializeGlobal_regexp_linear_flag() {
if (!FLAG_enable_experimental_regexp_engine) return;
Handle<JSFunction> regexp_fun(native_context()->regexp_function(), isolate());
Handle<JSObject> regexp_prototype(
JSObject::cast(regexp_fun->instance_prototype()), isolate());
SimpleInstallGetter(isolate(), regexp_prototype,
isolate()->factory()->linear_string(),
Builtins::kRegExpPrototypeLinearGetter, true);
// Store regexp prototype map again after change.
native_context()->set_regexp_prototype_map(regexp_prototype->map());
}
#ifdef V8_INTL_SUPPORT #ifdef V8_INTL_SUPPORT
void Genesis::InitializeGlobal_harmony_intl_segmenter() { void Genesis::InitializeGlobal_harmony_intl_segmenter() {
......
...@@ -226,6 +226,7 @@ ...@@ -226,6 +226,7 @@
V(_, length_string, "length") \ V(_, length_string, "length") \
V(_, let_string, "let") \ V(_, let_string, "let") \
V(_, line_string, "line") \ V(_, line_string, "line") \
V(_, linear_string, "linear") \
V(_, LinkError_string, "LinkError") \ V(_, LinkError_string, "LinkError") \
V(_, long_string, "long") \ V(_, long_string, "long") \
V(_, Map_string, "Map") \ V(_, Map_string, "Map") \
......
...@@ -272,6 +272,7 @@ String16 descriptionForRegExp(v8::Isolate* isolate, ...@@ -272,6 +272,7 @@ String16 descriptionForRegExp(v8::Isolate* isolate,
v8::RegExp::Flags flags = value->GetFlags(); v8::RegExp::Flags flags = value->GetFlags();
if (flags & v8::RegExp::Flags::kGlobal) description.append('g'); if (flags & v8::RegExp::Flags::kGlobal) description.append('g');
if (flags & v8::RegExp::Flags::kIgnoreCase) description.append('i'); if (flags & v8::RegExp::Flags::kIgnoreCase) description.append('i');
if (flags & v8::RegExp::Flags::kLinear) description.append('l');
if (flags & v8::RegExp::Flags::kMultiline) description.append('m'); if (flags & v8::RegExp::Flags::kMultiline) description.append('m');
if (flags & v8::RegExp::Flags::kDotAll) description.append('s'); if (flags & v8::RegExp::Flags::kDotAll) description.append('s');
if (flags & v8::RegExp::Flags::kUnicode) description.append('u'); if (flags & v8::RegExp::Flags::kUnicode) description.append('u');
......
...@@ -171,13 +171,6 @@ uint32_t JSRegExp::BacktrackLimit() const { ...@@ -171,13 +171,6 @@ uint32_t JSRegExp::BacktrackLimit() const {
// static // static
JSRegExp::Flags JSRegExp::FlagsFromString(Isolate* isolate, JSRegExp::Flags JSRegExp::FlagsFromString(Isolate* isolate,
Handle<String> flags, bool* success) { Handle<String> flags, bool* success) {
STATIC_ASSERT(*JSRegExp::FlagFromChar('g') == JSRegExp::kGlobal);
STATIC_ASSERT(*JSRegExp::FlagFromChar('i') == JSRegExp::kIgnoreCase);
STATIC_ASSERT(*JSRegExp::FlagFromChar('m') == JSRegExp::kMultiline);
STATIC_ASSERT(*JSRegExp::FlagFromChar('s') == JSRegExp::kDotAll);
STATIC_ASSERT(*JSRegExp::FlagFromChar('u') == JSRegExp::kUnicode);
STATIC_ASSERT(*JSRegExp::FlagFromChar('y') == JSRegExp::kSticky);
int length = flags->length(); int length = flags->length();
if (length == 0) { if (length == 0) {
*success = true; *success = true;
......
...@@ -42,8 +42,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -42,8 +42,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
enum Type { NOT_COMPILED, ATOM, IRREGEXP, EXPERIMENTAL }; enum Type { NOT_COMPILED, ATOM, IRREGEXP, EXPERIMENTAL };
DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS() DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
static constexpr base::Optional<Flag> FlagFromChar(char c) { static base::Optional<Flag> FlagFromChar(char c) {
STATIC_ASSERT(kFlagCount == 6); STATIC_ASSERT(kFlagCount == 7);
// clang-format off // clang-format off
return c == 'g' ? base::Optional<Flag>(kGlobal) return c == 'g' ? base::Optional<Flag>(kGlobal)
: c == 'i' ? base::Optional<Flag>(kIgnoreCase) : c == 'i' ? base::Optional<Flag>(kIgnoreCase)
...@@ -51,6 +51,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -51,6 +51,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
: c == 'y' ? base::Optional<Flag>(kSticky) : c == 'y' ? base::Optional<Flag>(kSticky)
: c == 'u' ? base::Optional<Flag>(kUnicode) : c == 'u' ? base::Optional<Flag>(kUnicode)
: c == 's' ? base::Optional<Flag>(kDotAll) : c == 's' ? base::Optional<Flag>(kDotAll)
: (FLAG_enable_experimental_regexp_engine && c == 'l')
? base::Optional<Flag>(kLinear)
: base::Optional<Flag>(); : base::Optional<Flag>();
// clang-format on // clang-format on
} }
...@@ -62,6 +64,7 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -62,6 +64,7 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
STATIC_ASSERT(static_cast<int>(kSticky) == v8::RegExp::kSticky); STATIC_ASSERT(static_cast<int>(kSticky) == v8::RegExp::kSticky);
STATIC_ASSERT(static_cast<int>(kUnicode) == v8::RegExp::kUnicode); STATIC_ASSERT(static_cast<int>(kUnicode) == v8::RegExp::kUnicode);
STATIC_ASSERT(static_cast<int>(kDotAll) == v8::RegExp::kDotAll); STATIC_ASSERT(static_cast<int>(kDotAll) == v8::RegExp::kDotAll);
STATIC_ASSERT(static_cast<int>(kLinear) == v8::RegExp::kLinear);
STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount); STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
DECL_ACCESSORS(last_index, Object) DECL_ACCESSORS(last_index, Object)
......
...@@ -9,6 +9,7 @@ bitfield struct JSRegExpFlags extends uint31 { ...@@ -9,6 +9,7 @@ bitfield struct JSRegExpFlags extends uint31 {
sticky: bool: 1 bit; sticky: bool: 1 bit;
unicode: bool: 1 bit; unicode: bool: 1 bit;
dot_all: bool: 1 bit; dot_all: bool: 1 bit;
linear: bool: 1 bit;
} }
@generateCppClass @generateCppClass
......
...@@ -1642,8 +1642,12 @@ MaybeHandle<JSRegExp> ValueDeserializer::ReadJSRegExp() { ...@@ -1642,8 +1642,12 @@ MaybeHandle<JSRegExp> ValueDeserializer::ReadJSRegExp() {
} }
// Ensure the deserialized flags are valid. // Ensure the deserialized flags are valid.
uint32_t flags_mask = static_cast<uint32_t>(-1) << JSRegExp::kFlagCount; uint32_t bad_flags_mask = static_cast<uint32_t>(-1) << JSRegExp::kFlagCount;
if ((raw_flags & flags_mask) || // kLinear is accepted only with the appropriate flag.
if (!FLAG_enable_experimental_regexp_engine) {
bad_flags_mask |= JSRegExp::kLinear;
}
if ((raw_flags & bad_flags_mask) ||
!JSRegExp::New(isolate_, pattern, static_cast<JSRegExp::Flags>(raw_flags)) !JSRegExp::New(isolate_, pattern, static_cast<JSRegExp::Flags>(raw_flags))
.ToHandle(&regexp)) { .ToHandle(&regexp)) {
return MaybeHandle<JSRegExp>(); return MaybeHandle<JSRegExp>();
......
...@@ -35,7 +35,7 @@ class CanBeHandledVisitor final : private RegExpVisitor { ...@@ -35,7 +35,7 @@ class CanBeHandledVisitor final : private RegExpVisitor {
// future. // future.
static constexpr JSRegExp::Flags kAllowedFlags = static constexpr JSRegExp::Flags kAllowedFlags =
JSRegExp::kGlobal | JSRegExp::kSticky | JSRegExp::kMultiline | JSRegExp::kGlobal | JSRegExp::kSticky | JSRegExp::kMultiline |
JSRegExp::kDotAll; JSRegExp::kDotAll | JSRegExp::kLinear;
// We support Unicode iff kUnicode is among the supported flags. // We support Unicode iff kUnicode is among the supported flags.
STATIC_ASSERT(ExperimentalRegExp::kSupportsUnicode == STATIC_ASSERT(ExperimentalRegExp::kSupportsUnicode ==
((kAllowedFlags & JSRegExp::kUnicode) != 0)); ((kAllowedFlags & JSRegExp::kUnicode) != 0));
......
...@@ -98,6 +98,7 @@ base::Optional<CompilationResult> CompileImpl(Isolate* isolate, ...@@ -98,6 +98,7 @@ base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
} // namespace } // namespace
bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) { bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
DCHECK(FLAG_enable_experimental_regexp_engine);
DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL); DCHECK_EQ(re->TypeTag(), JSRegExp::EXPERIMENTAL);
#ifdef VERIFY_HEAP #ifdef VERIFY_HEAP
re->JSRegExpVerify(isolate); re->JSRegExpVerify(isolate);
......
...@@ -30,6 +30,7 @@ namespace internal { ...@@ -30,6 +30,7 @@ namespace internal {
T(InvalidQuantifier, "Invalid quantifier") \ T(InvalidQuantifier, "Invalid quantifier") \
T(InvalidGroup, "Invalid group") \ T(InvalidGroup, "Invalid group") \
T(MultipleFlagDashes, "Multiple dashes in flag group") \ T(MultipleFlagDashes, "Multiple dashes in flag group") \
T(NotLinear, "Cannot be executed in linear time") \
T(RepeatedFlag, "Repeated flag in flag group") \ T(RepeatedFlag, "Repeated flag in flag group") \
T(InvalidFlagGroup, "Invalid flag group") \ T(InvalidFlagGroup, "Invalid flag group") \
T(TooManyCaptures, "Too many captures") \ T(TooManyCaptures, "Too many captures") \
......
...@@ -17,6 +17,7 @@ ...@@ -17,6 +17,7 @@
#include "src/regexp/regexp-macro-assembler-arch.h" #include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-macro-assembler-tracer.h" #include "src/regexp/regexp-macro-assembler-tracer.h"
#include "src/regexp/regexp-parser.h" #include "src/regexp/regexp-parser.h"
#include "src/regexp/regexp-utils.h"
#include "src/strings/string-search.h" #include "src/strings/string-search.h"
#include "src/utils/ostreams.h" #include "src/utils/ostreams.h"
...@@ -119,6 +120,10 @@ void RegExp::ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, ...@@ -119,6 +120,10 @@ void RegExp::ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
error_text)); error_text));
} }
bool RegExp::IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp) {
return RegExpUtils::IsUnmodifiedRegExp(isolate, regexp);
}
// Identifies the sort of regexps where the regexp engine is faster // Identifies the sort of regexps where the regexp engine is faster
// than the code used for atom matches. // than the code used for atom matches.
static bool HasFewDifferentCharacters(Handle<String> pattern) { static bool HasFewDifferentCharacters(Handle<String> pattern) {
...@@ -182,9 +187,22 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re, ...@@ -182,9 +187,22 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
bool has_been_compiled = false; bool has_been_compiled = false;
if (FLAG_enable_experimental_regexp_engine && if (FLAG_default_to_experimental_regexp_engine &&
ExperimentalRegExp::CanBeHandled(parse_result.tree, flags, ExperimentalRegExp::CanBeHandled(parse_result.tree, flags,
parse_result.capture_count)) { parse_result.capture_count)) {
DCHECK(FLAG_enable_experimental_regexp_engine);
ExperimentalRegExp::Initialize(isolate, re, pattern, flags,
parse_result.capture_count);
has_been_compiled = true;
} else if (flags & JSRegExp::kLinear) {
DCHECK(FLAG_enable_experimental_regexp_engine);
if (!ExperimentalRegExp::CanBeHandled(parse_result.tree, flags,
parse_result.capture_count)) {
// TODO(mbid): The error could provide a reason for why the regexp can't
// be executed in linear time (e.g. due to back references).
return RegExp::ThrowRegExpException(isolate, re, pattern,
RegExpError::kNotLinear);
}
ExperimentalRegExp::Initialize(isolate, re, pattern, flags, ExperimentalRegExp::Initialize(isolate, re, pattern, flags,
parse_result.capture_count); parse_result.capture_count);
has_been_compiled = true; has_been_compiled = true;
......
...@@ -138,6 +138,8 @@ class RegExp final : public AllStatic { ...@@ -138,6 +138,8 @@ class RegExp final : public AllStatic {
RegExpError error); RegExpError error);
static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re, static void ThrowRegExpException(Isolate* isolate, Handle<JSRegExp> re,
RegExpError error_text); RegExpError error_text);
static bool IsUnmodifiedRegExp(Isolate* isolate, Handle<JSRegExp> regexp);
}; };
// Uses a special global mode of irregexp-generated code to perform a global // Uses a special global mode of irregexp-generated code to perform a global
......
...@@ -2,8 +2,6 @@ ...@@ -2,8 +2,6 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
#include "src/runtime/runtime-utils.h"
#include <memory> #include <memory>
#include <sstream> #include <sstream>
...@@ -29,6 +27,8 @@ ...@@ -29,6 +27,8 @@
#include "src/objects/js-function-inl.h" #include "src/objects/js-function-inl.h"
#include "src/objects/js-regexp-inl.h" #include "src/objects/js-regexp-inl.h"
#include "src/objects/smi.h" #include "src/objects/smi.h"
#include "src/regexp/regexp.h"
#include "src/runtime/runtime-utils.h"
#include "src/snapshot/snapshot.h" #include "src/snapshot/snapshot.h"
#include "src/trap-handler/trap-handler.h" #include "src/trap-handler/trap-handler.h"
#include "src/utils/ostreams.h" #include "src/utils/ostreams.h"
...@@ -1289,6 +1289,14 @@ RUNTIME_FUNCTION(Runtime_RegexpTypeTag) { ...@@ -1289,6 +1289,14 @@ RUNTIME_FUNCTION(Runtime_RegexpTypeTag) {
return *isolate->factory()->NewStringFromAsciiChecked(type_str); return *isolate->factory()->NewStringFromAsciiChecked(type_str);
} }
RUNTIME_FUNCTION(Runtime_RegexpIsUnmodified) {
HandleScope shs(isolate);
DCHECK_EQ(1, args.length());
CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 0);
return isolate->heap()->ToBoolean(
RegExp::IsUnmodifiedRegExp(isolate, regexp));
}
#define ELEMENTS_KIND_CHECK_RUNTIME_FUNCTION(Name) \ #define ELEMENTS_KIND_CHECK_RUNTIME_FUNCTION(Name) \
RUNTIME_FUNCTION(Runtime_Has##Name) { \ RUNTIME_FUNCTION(Runtime_Has##Name) { \
CONVERT_ARG_CHECKED(JSObject, obj, 0); \ CONVERT_ARG_CHECKED(JSObject, obj, 0); \
......
...@@ -519,6 +519,7 @@ namespace internal { ...@@ -519,6 +519,7 @@ namespace internal {
F(RegexpHasBytecode, 2, 1) \ F(RegexpHasBytecode, 2, 1) \
F(RegexpHasNativeCode, 2, 1) \ F(RegexpHasNativeCode, 2, 1) \
F(RegexpTypeTag, 1, 1) \ F(RegexpTypeTag, 1, 1) \
F(RegexpIsUnmodified, 1, 1) \
F(MapIteratorProtector, 0, 1) \ F(MapIteratorProtector, 0, 1) \
F(NeverOptimizeFunction, 1, 1) \ F(NeverOptimizeFunction, 1, 1) \
F(NotifyContextDisposed, 0, 1) \ F(NotifyContextDisposed, 0, 1) \
......
...@@ -240,9 +240,12 @@ std::string PickLimitForSplit(FuzzerArgs* args) { ...@@ -240,9 +240,12 @@ std::string PickLimitForSplit(FuzzerArgs* args) {
} }
std::string GenerateRandomFlags(FuzzerArgs* args) { std::string GenerateRandomFlags(FuzzerArgs* args) {
// TODO(mbid,v8:10765): Find a way to generate the kLinear flag sometimes,
// but only for patterns that are supported by the experimental engine.
constexpr size_t kFlagCount = JSRegExp::kFlagCount; constexpr size_t kFlagCount = JSRegExp::kFlagCount;
CHECK_EQ(JSRegExp::kDotAll, 1 << (kFlagCount - 1)); CHECK_EQ(JSRegExp::kLinear, 1 << (kFlagCount - 1));
STATIC_ASSERT((1 << kFlagCount) - 1 < 0xFF); CHECK_EQ(JSRegExp::kDotAll, 1 << (kFlagCount - 2));
STATIC_ASSERT((1 << kFlagCount) - 1 <= 0xFF);
const size_t flags = RandomByte(args) & ((1 << kFlagCount) - 1); const size_t flags = RandomByte(args) & ((1 << kFlagCount) - 1);
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
// Use of this source code is governed by a BSD-style license that can be // Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. // found in the LICENSE file.
// Flags: --allow-natives-syntax --enable-experimental-regexp-engine // Flags: --allow-natives-syntax --default-to-experimental-regexp-engine
function Test(regexp, subject, expectedResult, expectedLastIndex) { function Test(regexp, subject, expectedResult, expectedLastIndex) {
assertEquals(%RegexpTypeTag(regexp), "EXPERIMENTAL"); assertEquals(%RegexpTypeTag(regexp), "EXPERIMENTAL");
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax
// Flags: --enable-experimental-regexp-engine
// Flags: --no-default-to-experimental-regexp-engine
// Flags: --no-force-slow-path
// We shouldn't assign the experimental engine to regexps without 'l' flag.
assertNotEquals("EXPERIMENTAL", %RegexpTypeTag(/asdf/));
assertNotEquals("EXPERIMENTAL", %RegexpTypeTag(/123|asdf/));
assertNotEquals("EXPERIMENTAL", %RegexpTypeTag(/(a*)*x/));
assertNotEquals("EXPERIMENTAL", %RegexpTypeTag(/(a*)\1/));
// We should assign the experimental engine to regexps with 'l' flag.
assertEquals("EXPERIMENTAL", %RegexpTypeTag(/asdf/l));
assertEquals("EXPERIMENTAL", %RegexpTypeTag(/123|asdf/l));
assertEquals("EXPERIMENTAL", %RegexpTypeTag(/(a*)*x/l));
// We should throw if a regexp with 'l' flag can't be handled by the
// experimental engine.
assertThrows(() => /(a*)\1/l, SyntaxError);
// The flags field of a regexp should be sorted.
assertEquals("glmsy", (/asdf/lymsg).flags);
// The 'linear' member should be set according to the linear flag.
assertTrue((/asdf/lymsg).linear);
assertFalse((/asdf/ymsg).linear);
// The new fields installed on the regexp prototype map shouldn't make
// unmodified regexps slow.
assertTrue(%RegexpIsUnmodified(/asdf/));
assertTrue(%RegexpIsUnmodified(/asdf/l));
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --allow-natives-syntax
// Flags: --no-enable-experimental-regexp-engine
// We shouldn't recognize the 'l' flag.
assertThrows(() => new RegExp("asdf", "l"), SyntaxError)
assertThrows(() => new RegExp("123|xyz", "l"), SyntaxError)
assertThrows(() => new RegExp("((a*)*)*", "yls"), SyntaxError)
assertThrows(() => new RegExp("((a*)*)*\1", "l"), SyntaxError)
// RegExps shouldn't have a 'linear' property.
assertFalse(RegExp.prototype.hasOwnProperty('linear'));
assertFalse(/123/.hasOwnProperty('linear'));
...@@ -1487,7 +1487,26 @@ TEST_F(ValueSerializerTest, DecodeRegExpDotAll) { ...@@ -1487,7 +1487,26 @@ TEST_F(ValueSerializerTest, DecodeRegExpDotAll) {
ExpectScriptTrue("result.toString() === '/foo/gimsuy'"); ExpectScriptTrue("result.toString() === '/foo/gimsuy'");
InvalidDecodeTest( InvalidDecodeTest(
{0xFF, 0x09, 0x3F, 0x00, 0x52, 0x03, 0x66, 0x6F, 0x6F, 0x7F}); {0xFF, 0x09, 0x3F, 0x00, 0x52, 0x03, 0x66, 0x6F, 0x6F, 0xFF});
}
TEST_F(ValueSerializerTest, DecodeLinearRegExp) {
bool flag_was_enabled = i::FLAG_enable_experimental_regexp_engine;
// The last byte encodes the regexp flags.
std::vector<uint8_t> regexp_encoding = {0xFF, 0x09, 0x3F, 0x00, 0x52,
0x03, 0x66, 0x6F, 0x6F, 0x6D};
i::FLAG_enable_experimental_regexp_engine = true;
Local<Value> value = DecodeTest(regexp_encoding);
ASSERT_TRUE(value->IsRegExp());
ExpectScriptTrue("Object.getPrototypeOf(result) === RegExp.prototype");
ExpectScriptTrue("result.toString() === '/foo/glmsy'");
i::FLAG_enable_experimental_regexp_engine = false;
InvalidDecodeTest(regexp_encoding);
i::FLAG_enable_experimental_regexp_engine = flag_was_enabled;
} }
TEST_F(ValueSerializerTest, RoundTripMap) { TEST_F(ValueSerializerTest, RoundTripMap) {
......
...@@ -12,7 +12,7 @@ ALL_VARIANT_FLAGS = { ...@@ -12,7 +12,7 @@ ALL_VARIANT_FLAGS = {
# Alias of exhaustive variants, but triggering new test framework features. # Alias of exhaustive variants, but triggering new test framework features.
"infra_staging": [[]], "infra_staging": [[]],
"interpreted_regexp": [["--regexp-interpret-all"]], "interpreted_regexp": [["--regexp-interpret-all"]],
"experimental_regexp": [["--enable-experimental-regexp-engine"]], "experimental_regexp": [["--default-to-experimental-regexp-engine"]],
"jitless": [["--jitless"]], "jitless": [["--jitless"]],
"minor_mc": [["--minor-mc"]], "minor_mc": [["--minor-mc"]],
"nci": [["--turbo-nci"]], "nci": [["--turbo-nci"]],
...@@ -64,7 +64,7 @@ INCOMPATIBLE_FLAGS_PER_VARIANT = { ...@@ -64,7 +64,7 @@ INCOMPATIBLE_FLAGS_PER_VARIANT = {
"turboprop": ["--turbo-inlining", "--interrupt-budget=*", "--no-turboprop"], "turboprop": ["--turbo-inlining", "--interrupt-budget=*", "--no-turboprop"],
"code_serializer": ["--cache=after-execute", "--cache=full-code-cache", "--cache=none"], "code_serializer": ["--cache=after-execute", "--cache=full-code-cache", "--cache=none"],
"no_local_heaps": ["--concurrent-inlining", "--turboprop"], "no_local_heaps": ["--concurrent-inlining", "--turboprop"],
"experimental_regexp": ["--no-enable-experimental-regexp-engine"], "experimental_regexp": ["--no-enable-experimental-regexp-engine", "--no-default-to-experimental-regexp-engine"],
} }
# Flags that lead to a contradiction under certain build variables. # Flags that lead to a contradiction under certain build variables.
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment