Commit d586518a authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[regexp] Break dependency on JSRegExp::Flags

The JSRegExp heap object should not be the source of truth for regexp
flags, which are also relevant in places that don't need or want to
care about the heap object layout (e.g.: the regexp parser).

Introduce RegExpFlags as a new source of truth, and base everything
else on these flags.

As a first change, remove the js-regexp.h dependency from the regexp
parser. Other files in src/regexp/ should be updated in follow-up
work.

Change-Id: Id9a6706c7f09e93f743b08b647b211d0cb0b9c76
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3103306Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Reviewed-by: 's avatarPatrick Thier <pthier@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76379}
parent 2c70eb76
...@@ -1754,6 +1754,7 @@ filegroup( ...@@ -1754,6 +1754,7 @@ filegroup(
"src/regexp/regexp-dotprinter.h", "src/regexp/regexp-dotprinter.h",
"src/regexp/regexp-error.cc", "src/regexp/regexp-error.cc",
"src/regexp/regexp-error.h", "src/regexp/regexp-error.h",
"src/regexp/regexp-flags.h",
"src/regexp/regexp-interpreter.cc", "src/regexp/regexp-interpreter.cc",
"src/regexp/regexp-interpreter.h", "src/regexp/regexp-interpreter.h",
"src/regexp/regexp-macro-assembler-arch.h", "src/regexp/regexp-macro-assembler-arch.h",
......
...@@ -3088,6 +3088,7 @@ v8_header_set("v8_internal_headers") { ...@@ -3088,6 +3088,7 @@ v8_header_set("v8_internal_headers") {
"src/regexp/regexp-compiler.h", "src/regexp/regexp-compiler.h",
"src/regexp/regexp-dotprinter.h", "src/regexp/regexp-dotprinter.h",
"src/regexp/regexp-error.h", "src/regexp/regexp-error.h",
"src/regexp/regexp-flags.h",
"src/regexp/regexp-interpreter.h", "src/regexp/regexp-interpreter.h",
"src/regexp/regexp-macro-assembler-arch.h", "src/regexp/regexp-macro-assembler-arch.h",
"src/regexp/regexp-macro-assembler-tracer.h", "src/regexp/regexp-macro-assembler-tracer.h",
......
...@@ -52,6 +52,7 @@ include_rules = [ ...@@ -52,6 +52,7 @@ include_rules = [
"+src/interpreter/setup-interpreter.h", "+src/interpreter/setup-interpreter.h",
"-src/regexp", "-src/regexp",
"+src/regexp/regexp.h", "+src/regexp/regexp.h",
"+src/regexp/regexp-flags.h",
"+src/regexp/regexp-stack.h", "+src/regexp/regexp-stack.h",
"+src/regexp/regexp-utils.h", "+src/regexp/regexp-utils.h",
"-src/trap-handler", "-src/trap-handler",
......
...@@ -13,6 +13,7 @@ ...@@ -13,6 +13,7 @@
#include "src/base/vector.h" #include "src/base/vector.h"
#include "src/common/globals.h" #include "src/common/globals.h"
#include "src/objects/objects-inl.h" #include "src/objects/objects-inl.h"
#include "src/regexp/regexp-flags.h"
#include "src/strings/string-builder-inl.h" #include "src/strings/string-builder-inl.h"
namespace v8 { namespace v8 {
...@@ -72,6 +73,12 @@ void CallPrinter::Find(AstNode* node, bool print) { ...@@ -72,6 +73,12 @@ void CallPrinter::Find(AstNode* node, bool print) {
} }
} }
void CallPrinter::Print(char c) {
if (!found_ || done_) return;
num_prints_++;
builder_->AppendCharacter(c);
}
void CallPrinter::Print(const char* str) { void CallPrinter::Print(const char* str) {
if (!found_ || done_) return; if (!found_ || done_) return;
num_prints_++; num_prints_++;
...@@ -269,13 +276,10 @@ void CallPrinter::VisitRegExpLiteral(RegExpLiteral* node) { ...@@ -269,13 +276,10 @@ void CallPrinter::VisitRegExpLiteral(RegExpLiteral* node) {
Print("/"); Print("/");
PrintLiteral(node->pattern(), false); PrintLiteral(node->pattern(), false);
Print("/"); Print("/");
if (node->flags() & RegExp::kHasIndices) Print("d"); #define V(Lower, Camel, LowerCamel, Char, Bit) \
if (node->flags() & RegExp::kGlobal) Print("g"); if (node->flags() & RegExp::k##Camel) Print(Char);
if (node->flags() & RegExp::kIgnoreCase) Print("i"); REGEXP_FLAG_LIST(V)
if (node->flags() & RegExp::kLinear) Print("l"); #undef V
if (node->flags() & RegExp::kMultiline) Print("m");
if (node->flags() & RegExp::kUnicode) Print("u");
if (node->flags() & RegExp::kSticky) Print("y");
} }
...@@ -1189,13 +1193,10 @@ void AstPrinter::VisitRegExpLiteral(RegExpLiteral* node) { ...@@ -1189,13 +1193,10 @@ void AstPrinter::VisitRegExpLiteral(RegExpLiteral* node) {
PrintLiteralIndented("PATTERN", node->raw_pattern(), false); PrintLiteralIndented("PATTERN", node->raw_pattern(), false);
int i = 0; int i = 0;
base::EmbeddedVector<char, 128> buf; base::EmbeddedVector<char, 128> buf;
if (node->flags() & RegExp::kHasIndices) buf[i++] = 'd'; #define V(Lower, Camel, LowerCamel, Char, Bit) \
if (node->flags() & RegExp::kGlobal) buf[i++] = 'g'; if (node->flags() & RegExp::k##Camel) buf[i++] = Char;
if (node->flags() & RegExp::kIgnoreCase) buf[i++] = 'i'; REGEXP_FLAG_LIST(V)
if (node->flags() & RegExp::kLinear) buf[i++] = 'l'; #undef V
if (node->flags() & RegExp::kMultiline) buf[i++] = 'm';
if (node->flags() & RegExp::kUnicode) buf[i++] = 'u';
if (node->flags() & RegExp::kSticky) buf[i++] = 'y';
buf[i] = '\0'; buf[i] = '\0';
PrintIndented("FLAGS "); PrintIndented("FLAGS ");
Print("%s", buf.begin()); Print("%s", buf.begin());
......
...@@ -52,6 +52,7 @@ class CallPrinter final : public AstVisitor<CallPrinter> { ...@@ -52,6 +52,7 @@ class CallPrinter final : public AstVisitor<CallPrinter> {
#undef DECLARE_VISIT #undef DECLARE_VISIT
private: private:
void Print(char c);
void Print(const char* str); void Print(const char* str);
void Print(Handle<String> str); void Print(Handle<String> str);
......
...@@ -17,7 +17,6 @@ ...@@ -17,7 +17,6 @@
#include "src/objects/contexts.h" #include "src/objects/contexts.h"
#include "src/objects/field-index-inl.h" #include "src/objects/field-index-inl.h"
#include "src/objects/js-array-inl.h" #include "src/objects/js-array-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/objects/module-inl.h" #include "src/objects/module-inl.h"
#include "src/objects/property-details.h" #include "src/objects/property-details.h"
#include "src/objects/prototype.h" #include "src/objects/prototype.h"
......
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "src/objects/js-regexp-string-iterator.h" #include "src/objects/js-regexp-string-iterator.h"
#include "src/objects/js-regexp.h" #include "src/objects/js-regexp.h"
#include "src/objects/regexp-match-info.h" #include "src/objects/regexp-match-info.h"
#include "src/regexp/regexp-flags.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
...@@ -1041,23 +1042,16 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context, ...@@ -1041,23 +1042,16 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
CAST(LoadObjectField(CAST(regexp), JSRegExp::kFlagsOffset)); CAST(LoadObjectField(CAST(regexp), JSRegExp::kFlagsOffset));
var_flags = SmiUntag(flags_smi); var_flags = SmiUntag(flags_smi);
#define CASE_FOR_FLAG(FLAG) \ #define CASE_FOR_FLAG(Lower, Camel, ...) \
do { \ do { \
Label next(this); \ Label next(this); \
GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next); \ GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \ var_length = Uint32Add(var_length.value(), Uint32Constant(1)); \
Goto(&next); \ Goto(&next); \
BIND(&next); \ BIND(&next); \
} while (false) } while (false);
CASE_FOR_FLAG(JSRegExp::kHasIndices); REGEXP_FLAG_LIST(CASE_FOR_FLAG)
CASE_FOR_FLAG(JSRegExp::kGlobal);
CASE_FOR_FLAG(JSRegExp::kIgnoreCase);
CASE_FOR_FLAG(JSRegExp::kLinear);
CASE_FOR_FLAG(JSRegExp::kMultiline);
CASE_FOR_FLAG(JSRegExp::kDotAll);
CASE_FOR_FLAG(JSRegExp::kUnicode);
CASE_FOR_FLAG(JSRegExp::kSticky);
#undef CASE_FOR_FLAG #undef CASE_FOR_FLAG
} else { } else {
DCHECK(!is_fastpath); DCHECK(!is_fastpath);
...@@ -1123,26 +1117,19 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context, ...@@ -1123,26 +1117,19 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
TVARIABLE(IntPtrT, var_offset, TVARIABLE(IntPtrT, var_offset,
IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag)); IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
#define CASE_FOR_FLAG(FLAG, CHAR) \ #define CASE_FOR_FLAG(Lower, Camel, LowerCamel, Char, ...) \
do { \ do { \
Label next(this); \ Label next(this); \
GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next); \ GotoIfNot(IsSetWord(var_flags.value(), JSRegExp::k##Camel), &next); \
const TNode<Int32T> value = Int32Constant(CHAR); \ const TNode<Int32T> value = Int32Constant(Char); \
StoreNoWriteBarrier(MachineRepresentation::kWord8, string, \ StoreNoWriteBarrier(MachineRepresentation::kWord8, string, \
var_offset.value(), value); \ var_offset.value(), value); \
var_offset = IntPtrAdd(var_offset.value(), int_one); \ var_offset = IntPtrAdd(var_offset.value(), int_one); \
Goto(&next); \ Goto(&next); \
BIND(&next); \ BIND(&next); \
} while (false) } while (false);
CASE_FOR_FLAG(JSRegExp::kHasIndices, 'd'); REGEXP_FLAG_LIST(CASE_FOR_FLAG)
CASE_FOR_FLAG(JSRegExp::kGlobal, 'g');
CASE_FOR_FLAG(JSRegExp::kIgnoreCase, 'i');
CASE_FOR_FLAG(JSRegExp::kLinear, 'l');
CASE_FOR_FLAG(JSRegExp::kMultiline, 'm');
CASE_FOR_FLAG(JSRegExp::kDotAll, 's');
CASE_FOR_FLAG(JSRegExp::kUnicode, 'u');
CASE_FOR_FLAG(JSRegExp::kSticky, 'y');
#undef CASE_FOR_FLAG #undef CASE_FOR_FLAG
if (is_fastpath) { if (is_fastpath) {
...@@ -1391,29 +1378,12 @@ TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context, ...@@ -1391,29 +1378,12 @@ TNode<BoolT> RegExpBuiltinsAssembler::SlowFlagGetter(TNode<Context> context,
switch (flag) { switch (flag) {
case JSRegExp::kNone: case JSRegExp::kNone:
UNREACHABLE(); UNREACHABLE();
case JSRegExp::kGlobal: #define V(Lower, Camel, LowerCamel, Char, Bit) \
name = isolate()->factory()->global_string(); case JSRegExp::k##Camel: \
break; name = isolate()->factory()->LowerCamel##_string(); \
case JSRegExp::kIgnoreCase: break;
name = isolate()->factory()->ignoreCase_string(); REGEXP_FLAG_LIST(V)
break; #undef V
case JSRegExp::kMultiline:
name = isolate()->factory()->multiline_string();
break;
case JSRegExp::kDotAll:
UNREACHABLE(); // Never called for dotAll.
case JSRegExp::kSticky:
name = isolate()->factory()->sticky_string();
break;
case JSRegExp::kUnicode:
name = isolate()->factory()->unicode_string();
break;
case JSRegExp::kHasIndices:
name = isolate()->factory()->has_indices_string();
break;
case JSRegExp::kLinear:
name = isolate()->factory()->linear_string();
break;
} }
TNode<Object> value = GetProperty(context, regexp, name); TNode<Object> value = GetProperty(context, regexp, name);
......
...@@ -2462,7 +2462,7 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object, ...@@ -2462,7 +2462,7 @@ void Genesis::InitializeGlobal(Handle<JSGlobalObject> global_object,
Builtin::kRegExpPrototypeFlagsGetter, true); Builtin::kRegExpPrototypeFlagsGetter, true);
SimpleInstallGetter(isolate_, prototype, factory->global_string(), SimpleInstallGetter(isolate_, prototype, factory->global_string(),
Builtin::kRegExpPrototypeGlobalGetter, true); Builtin::kRegExpPrototypeGlobalGetter, true);
SimpleInstallGetter(isolate(), prototype, factory->has_indices_string(), SimpleInstallGetter(isolate(), prototype, factory->hasIndices_string(),
Builtin::kRegExpPrototypeHasIndicesGetter, true); Builtin::kRegExpPrototypeHasIndicesGetter, true);
SimpleInstallGetter(isolate_, prototype, factory->ignoreCase_string(), SimpleInstallGetter(isolate_, prototype, factory->ignoreCase_string(),
Builtin::kRegExpPrototypeIgnoreCaseGetter, true); Builtin::kRegExpPrototypeIgnoreCaseGetter, true);
......
...@@ -227,7 +227,7 @@ ...@@ -227,7 +227,7 @@
V(_, groups_string, "groups") \ V(_, groups_string, "groups") \
V(_, growable_string, "growable") \ V(_, growable_string, "growable") \
V(_, has_string, "has") \ V(_, has_string, "has") \
V(_, has_indices_string, "hasIndices") \ V(_, hasIndices_string, "hasIndices") \
V(_, ignoreCase_string, "ignoreCase") \ V(_, ignoreCase_string, "ignoreCase") \
V(_, illegal_access_string, "illegal access") \ V(_, illegal_access_string, "illegal access") \
V(_, illegal_argument_string, "illegal argument") \ V(_, illegal_argument_string, "illegal argument") \
......
...@@ -59,7 +59,7 @@ int JSRegExp::MaxRegisterCount() const { ...@@ -59,7 +59,7 @@ int JSRegExp::MaxRegisterCount() const {
return Smi::ToInt(DataAt(kIrregexpMaxRegisterCountIndex)); return Smi::ToInt(DataAt(kIrregexpMaxRegisterCountIndex));
} }
JSRegExp::Flags JSRegExp::GetFlags() { JSRegExp::Flags JSRegExp::GetFlags() const {
DCHECK(this->data().IsFixedArray()); DCHECK(this->data().IsFixedArray());
Object data = this->data(); Object data = this->data();
Smi smi = Smi::cast(FixedArray::cast(data).get(kFlagsIndex)); Smi smi = Smi::cast(FixedArray::cast(data).get(kFlagsIndex));
......
...@@ -111,64 +111,38 @@ uint32_t JSRegExp::BacktrackLimit() const { ...@@ -111,64 +111,38 @@ uint32_t JSRegExp::BacktrackLimit() const {
} }
// static // static
JSRegExp::Flags JSRegExp::FlagsFromString(Isolate* isolate, base::Optional<JSRegExp::Flags> JSRegExp::FlagsFromString(
Handle<String> flags, bool* success) { Isolate* isolate, Handle<String> flags) {
int length = flags->length(); const int length = flags->length();
if (length == 0) {
*success = true;
return JSRegExp::kNone;
}
// A longer flags string cannot be valid. // A longer flags string cannot be valid.
if (length > JSRegExp::kFlagCount) return JSRegExp::Flags(0); if (length > JSRegExp::kFlagCount) return {};
JSRegExp::Flags value(0);
if (flags->IsSeqOneByteString()) { JSRegExp::Flags value;
DisallowGarbageCollection no_gc; FlatStringReader reader(isolate, String::Flatten(isolate, flags));
SeqOneByteString seq_flags = SeqOneByteString::cast(*flags);
for (int i = 0; i < length; i++) { for (int i = 0; i < length; i++) {
base::Optional<JSRegExp::Flag> maybe_flag = base::Optional<JSRegExp::Flag> flag = JSRegExp::FlagFromChar(reader.Get(i));
JSRegExp::FlagFromChar(seq_flags.Get(i)); if (!flag.has_value()) return {};
if (!maybe_flag.has_value()) return JSRegExp::Flags(0); if (value & flag.value()) return {}; // Duplicate.
JSRegExp::Flag flag = *maybe_flag; value |= flag.value();
// Duplicate flag.
if (value & flag) return JSRegExp::Flags(0);
value |= flag;
}
} else {
flags = String::Flatten(isolate, flags);
DisallowGarbageCollection no_gc;
String::FlatContent flags_content = flags->GetFlatContent(no_gc);
for (int i = 0; i < length; i++) {
base::Optional<JSRegExp::Flag> maybe_flag =
JSRegExp::FlagFromChar(flags_content.Get(i));
if (!maybe_flag.has_value()) return JSRegExp::Flags(0);
JSRegExp::Flag flag = *maybe_flag;
// Duplicate flag.
if (value & flag) return JSRegExp::Flags(0);
value |= flag;
}
} }
*success = true;
return value; return value;
} }
// static // static
Handle<String> JSRegExp::StringFromFlags(Isolate* isolate, Handle<String> JSRegExp::StringFromFlags(Isolate* isolate,
JSRegExp::Flags flags) { JSRegExp::Flags flags) {
// Ensure that this function is up-to-date with the supported flag options. static constexpr int kStringTerminator = 1;
constexpr size_t kFlagCount = JSRegExp::kFlagCount;
STATIC_ASSERT(kFlagCount == 8);
// Translate to the lexicographically smaller string.
int cursor = 0; int cursor = 0;
char buffer[kFlagCount] = {'\0'}; char buffer[kFlagCount + kStringTerminator];
if (flags & JSRegExp::kHasIndices) buffer[cursor++] = 'd'; #define V(Lower, Camel, LowerCamel, Char, Bit) \
if (flags & JSRegExp::kGlobal) buffer[cursor++] = 'g'; if (flags & JSRegExp::k##Camel) buffer[cursor++] = Char;
if (flags & JSRegExp::kIgnoreCase) buffer[cursor++] = 'i'; REGEXP_FLAG_LIST(V)
if (flags & JSRegExp::kLinear) buffer[cursor++] = 'l'; #undef V
if (flags & JSRegExp::kMultiline) buffer[cursor++] = 'm'; buffer[cursor++] = '\0';
if (flags & JSRegExp::kDotAll) buffer[cursor++] = 's'; DCHECK_LE(cursor, kFlagCount + kStringTerminator);
if (flags & JSRegExp::kUnicode) buffer[cursor++] = 'u';
if (flags & JSRegExp::kSticky) buffer[cursor++] = 'y';
return isolate->factory()->NewStringFromAsciiChecked(buffer); return isolate->factory()->NewStringFromAsciiChecked(buffer);
} }
...@@ -247,15 +221,15 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp, ...@@ -247,15 +221,15 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
Handle<String> source, Handle<String> source,
Handle<String> flags_string) { Handle<String> flags_string) {
Isolate* isolate = regexp->GetIsolate(); Isolate* isolate = regexp->GetIsolate();
bool success = false; base::Optional<Flags> flags =
Flags flags = JSRegExp::FlagsFromString(isolate, flags_string, &success); JSRegExp::FlagsFromString(isolate, flags_string);
if (!success) { if (!flags.has_value()) {
THROW_NEW_ERROR( THROW_NEW_ERROR(
isolate, isolate,
NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string), NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string),
JSRegExp); JSRegExp);
} }
return Initialize(regexp, source, flags); return Initialize(regexp, source, flags.value());
} }
namespace { namespace {
......
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
#include "src/objects/contexts.h" #include "src/objects/contexts.h"
#include "src/objects/js-array.h" #include "src/objects/js-array.h"
#include "src/regexp/regexp-flags.h"
#include "torque-generated/bit-fields.h" #include "torque-generated/bit-fields.h"
// Has to be the last include (doesn't have include guards): // Has to be the last include (doesn't have include guards):
...@@ -43,32 +44,39 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -43,32 +44,39 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
enum Type { NOT_COMPILED, ATOM, IRREGEXP, EXPERIMENTAL }; enum Type { NOT_COMPILED, ATOM, IRREGEXP, EXPERIMENTAL };
DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS() DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
return static_cast<Flag>(f);
}
static constexpr base::Optional<Flag> AsOptionalJSRegExpFlag(
base::Optional<RegExpFlag> f) {
return f.has_value() ? base::Optional<Flag>{AsJSRegExpFlag(f.value())}
: base::Optional<Flag>{};
}
static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
return Flags{static_cast<int>(f)};
}
static constexpr RegExpFlags AsRegExpFlags(Flags f) {
return RegExpFlags{static_cast<int>(f)};
}
static base::Optional<Flag> FlagFromChar(char c) { static base::Optional<Flag> FlagFromChar(char c) {
STATIC_ASSERT(kFlagCount == 8); base::Optional<Flag> f = AsOptionalJSRegExpFlag(TryRegExpFlagFromChar(c));
// clang-format off if (!f.has_value()) return f;
return c == 'g' ? base::Optional<Flag>(kGlobal) if (f.value() == kLinear && !FLAG_enable_experimental_regexp_engine) {
: c == 'i' ? base::Optional<Flag>(kIgnoreCase) return {};
: c == 'm' ? base::Optional<Flag>(kMultiline) }
: c == 'y' ? base::Optional<Flag>(kSticky) return f;
: c == 'u' ? base::Optional<Flag>(kUnicode)
: c == 's' ? base::Optional<Flag>(kDotAll)
: c == 'd' ? base::Optional<Flag>(kHasIndices)
: (FLAG_enable_experimental_regexp_engine && c == 'l')
? base::Optional<Flag>(kLinear)
: base::Optional<Flag>();
// clang-format on
} }
STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone); STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
STATIC_ASSERT(static_cast<int>(kGlobal) == v8::RegExp::kGlobal); #define V(_, Camel, ...) \
STATIC_ASSERT(static_cast<int>(kIgnoreCase) == v8::RegExp::kIgnoreCase); STATIC_ASSERT(static_cast<int>(k##Camel) == v8::RegExp::k##Camel); \
STATIC_ASSERT(static_cast<int>(kMultiline) == v8::RegExp::kMultiline); STATIC_ASSERT(static_cast<int>(k##Camel) == \
STATIC_ASSERT(static_cast<int>(kSticky) == v8::RegExp::kSticky); static_cast<int>(RegExpFlag::k##Camel));
STATIC_ASSERT(static_cast<int>(kUnicode) == v8::RegExp::kUnicode); REGEXP_FLAG_LIST(V)
STATIC_ASSERT(static_cast<int>(kDotAll) == v8::RegExp::kDotAll); #undef V
STATIC_ASSERT(static_cast<int>(kLinear) == v8::RegExp::kLinear);
STATIC_ASSERT(static_cast<int>(kHasIndices) == v8::RegExp::kHasIndices);
STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount); STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
STATIC_ASSERT(kFlagCount == kRegExpFlagCount);
DECL_ACCESSORS(last_index, Object) DECL_ACCESSORS(last_index, Object)
...@@ -86,8 +94,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -86,8 +94,8 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
Handle<String> source, Handle<String> source,
Handle<String> flags_string); Handle<String> flags_string);
static Flags FlagsFromString(Isolate* isolate, Handle<String> flags, static base::Optional<Flags> FlagsFromString(Isolate* isolate,
bool* success); Handle<String> flags);
V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate, V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate,
Flags flags); Flags flags);
...@@ -112,7 +120,7 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -112,7 +120,7 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
static int RegistersForCaptureCount(int count) { return (count + 1) * 2; } static int RegistersForCaptureCount(int count) { return (count + 1) * 2; }
inline int MaxRegisterCount() const; inline int MaxRegisterCount() const;
inline Flags GetFlags(); inline Flags GetFlags() const;
inline String Pattern(); inline String Pattern();
inline String EscapedPattern(); inline String EscapedPattern();
inline Object CaptureNameMap(); inline Object CaptureNameMap();
......
...@@ -69,14 +69,14 @@ base::Optional<CompilationResult> CompileImpl(Isolate* isolate, ...@@ -69,14 +69,14 @@ base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
Zone zone(isolate->allocator(), ZONE_NAME); Zone zone(isolate->allocator(), ZONE_NAME);
Handle<String> source(regexp->Pattern(), isolate); Handle<String> source(regexp->Pattern(), isolate);
JSRegExp::Flags flags = regexp->GetFlags();
// Parse and compile the regexp source. // Parse and compile the regexp source.
RegExpCompileData parse_result; RegExpCompileData parse_result;
DCHECK(!isolate->has_pending_exception()); DCHECK(!isolate->has_pending_exception());
bool parse_success = RegExpParser::ParseRegExpFromHeapString( bool parse_success = RegExpParser::ParseRegExpFromHeapString(
isolate, &zone, source, flags, &parse_result); isolate, &zone, source, JSRegExp::AsRegExpFlags(regexp->GetFlags()),
&parse_result);
if (!parse_success) { if (!parse_success) {
// The pattern was already parsed successfully during initialization, so // The pattern was already parsed successfully during initialization, so
// the only way parsing can fail now is because of stack overflow. // the only way parsing can fail now is because of stack overflow.
...@@ -86,8 +86,8 @@ base::Optional<CompilationResult> CompileImpl(Isolate* isolate, ...@@ -86,8 +86,8 @@ base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
return base::nullopt; return base::nullopt;
} }
ZoneList<RegExpInstruction> bytecode = ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(
ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone); parse_result.tree, regexp->GetFlags(), &zone);
CompilationResult result; CompilationResult result;
result.bytecode = VectorToByteArray(isolate, bytecode.ToVector()); result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
......
...@@ -518,7 +518,7 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) { ...@@ -518,7 +518,7 @@ bool RegExpDisjunction::SortConsecutiveAtoms(RegExpCompiler* compiler) {
DCHECK_LT(first_atom, alternatives->length()); DCHECK_LT(first_atom, alternatives->length());
DCHECK_LE(i, alternatives->length()); DCHECK_LE(i, alternatives->length());
DCHECK_LE(first_atom, i); DCHECK_LE(first_atom, i);
if (IgnoreCase(compiler->flags())) { if (IsIgnoreCase(compiler->flags())) {
#ifdef V8_INTL_SUPPORT #ifdef V8_INTL_SUPPORT
alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom, alternatives->StableSort(CompareFirstCharCaseInsensitve, first_atom,
i - first_atom); i - first_atom);
...@@ -570,14 +570,14 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) { ...@@ -570,14 +570,14 @@ void RegExpDisjunction::RationalizeConsecutiveAtoms(RegExpCompiler* compiler) {
#ifdef V8_INTL_SUPPORT #ifdef V8_INTL_SUPPORT
icu::UnicodeString new_prefix(atom->data().at(0)); icu::UnicodeString new_prefix(atom->data().at(0));
if (new_prefix != common_prefix) { if (new_prefix != common_prefix) {
if (!IgnoreCase(compiler->flags())) break; if (!IsIgnoreCase(compiler->flags())) break;
if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0) if (common_prefix.caseCompare(new_prefix, U_FOLD_CASE_DEFAULT) != 0)
break; break;
} }
#else #else
unibrow::uchar new_prefix = atom->data().at(0); unibrow::uchar new_prefix = atom->data().at(0);
if (new_prefix != common_prefix) { if (new_prefix != common_prefix) {
if (!IgnoreCase(compiler->flags())) break; if (!IsIgnoreCase(compiler->flags())) break;
unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize = unibrow::Mapping<unibrow::Ecma262Canonicalize>* canonicalize =
compiler->isolate()->regexp_macro_assembler_canonicalize(); compiler->isolate()->regexp_macro_assembler_canonicalize();
new_prefix = Canonical(canonicalize, new_prefix); new_prefix = Canonical(canonicalize, new_prefix);
......
...@@ -1589,7 +1589,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details, ...@@ -1589,7 +1589,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
QuickCheckDetails::Position* pos = QuickCheckDetails::Position* pos =
details->positions(characters_filled_in); details->positions(characters_filled_in);
base::uc16 c = quarks[i]; base::uc16 c = quarks[i];
if (IgnoreCase(compiler->flags())) { if (IsIgnoreCase(compiler->flags())) {
unibrow::uchar chars[4]; unibrow::uchar chars[4];
int length = GetCaseIndependentLetters( int length = GetCaseIndependentLetters(
isolate, c, compiler->one_byte(), chars, 4); isolate, c, compiler->one_byte(), chars, 4);
...@@ -1861,7 +1861,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) { ...@@ -1861,7 +1861,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
base::Vector<const base::uc16> quarks = elm.atom()->data(); base::Vector<const base::uc16> quarks = elm.atom()->data();
for (int j = 0; j < quarks.length(); j++) { for (int j = 0; j < quarks.length(); j++) {
base::uc16 c = quarks[j]; base::uc16 c = quarks[j];
if (IgnoreCase(flags)) { if (IsIgnoreCase(flags)) {
c = unibrow::Latin1::TryConvertToLatin1(c); c = unibrow::Latin1::TryConvertToLatin1(c);
} }
if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr); if (c > unibrow::Latin1::kMaxChar) return set_replacement(nullptr);
...@@ -1880,7 +1880,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) { ...@@ -1880,7 +1880,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (range_count != 0 && ranges->at(0).from() == 0 && if (range_count != 0 && ranges->at(0).from() == 0 &&
ranges->at(0).to() >= String::kMaxOneByteCharCode) { ranges->at(0).to() >= String::kMaxOneByteCharCode) {
// This will be handled in a later filter. // This will be handled in a later filter.
if (IgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) { if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
continue; continue;
} }
return set_replacement(nullptr); return set_replacement(nullptr);
...@@ -1889,7 +1889,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) { ...@@ -1889,7 +1889,7 @@ RegExpNode* TextNode::FilterOneByte(int depth, JSRegExp::Flags flags) {
if (range_count == 0 || if (range_count == 0 ||
ranges->at(0).from() > String::kMaxOneByteCharCode) { ranges->at(0).from() > String::kMaxOneByteCharCode) {
// This will be handled in a later filter. // This will be handled in a later filter.
if (IgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) { if (IsIgnoreCase(flags) && RangesContainLatin1Equivalents(ranges)) {
continue; continue;
} }
return set_replacement(nullptr); return set_replacement(nullptr);
...@@ -2321,13 +2321,13 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass, ...@@ -2321,13 +2321,13 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler, TextEmitPassType pass,
TextElement elm = elements()->at(i); TextElement elm = elements()->at(i);
int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset; int cp_offset = trace->cp_offset() + elm.cp_offset() + backward_offset;
if (elm.text_type() == TextElement::ATOM) { if (elm.text_type() == TextElement::ATOM) {
if (SkipPass(pass, IgnoreCase(compiler->flags()))) continue; if (SkipPass(pass, IsIgnoreCase(compiler->flags()))) continue;
base::Vector<const base::uc16> quarks = elm.atom()->data(); base::Vector<const base::uc16> quarks = elm.atom()->data();
for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) { for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
if (first_element_checked && i == 0 && j == 0) continue; if (first_element_checked && i == 0 && j == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue; if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
base::uc16 quark = quarks[j]; base::uc16 quark = quarks[j];
if (IgnoreCase(compiler->flags())) { if (IsIgnoreCase(compiler->flags())) {
// Everywhere else we assume that a non-Latin-1 character cannot match // Everywhere else we assume that a non-Latin-1 character cannot match
// a Latin-1 character. Avoid the cases where this is assumption is // a Latin-1 character. Avoid the cases where this is assumption is
// invalid by using the Latin1 equivalent instead. // invalid by using the Latin1 equivalent instead.
...@@ -2492,7 +2492,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) { ...@@ -2492,7 +2492,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte, void TextNode::MakeCaseIndependent(Isolate* isolate, bool is_one_byte,
JSRegExp::Flags flags) { JSRegExp::Flags flags) {
if (!IgnoreCase(flags)) return; if (!IsIgnoreCase(flags)) return;
#ifdef V8_INTL_SUPPORT #ifdef V8_INTL_SUPPORT
if (NeedsUnicodeCaseEquivalents(flags)) return; if (NeedsUnicodeCaseEquivalents(flags)) return;
#endif #endif
...@@ -3444,7 +3444,7 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) { ...@@ -3444,7 +3444,7 @@ void BackReferenceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RecursionCheck rc(compiler); RecursionCheck rc(compiler);
DCHECK_EQ(start_reg_ + 1, end_reg_); DCHECK_EQ(start_reg_ + 1, end_reg_);
if (IgnoreCase(flags_)) { if (IsIgnoreCase(flags_)) {
bool unicode = IsUnicode(flags_); bool unicode = IsUnicode(flags_);
assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(), assembler->CheckNotBackReferenceIgnoreCase(start_reg_, read_backward(),
unicode, trace->backtrack()); unicode, trace->backtrack());
...@@ -3809,7 +3809,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget, ...@@ -3809,7 +3809,7 @@ void TextNode::FillInBMInfo(Isolate* isolate, int initial_offset, int budget,
return; return;
} }
base::uc16 character = atom->data()[j]; base::uc16 character = atom->data()[j];
if (IgnoreCase(bm->compiler()->flags())) { if (IsIgnoreCase(bm->compiler()->flags())) {
unibrow::uchar chars[4]; unibrow::uchar chars[4];
int length = GetCaseIndependentLetters( int length = GetCaseIndependentLetters(
isolate, character, bm->max_char() == String::kMaxOneByteCharCode, isolate, character, bm->max_char() == String::kMaxOneByteCharCode,
......
...@@ -9,6 +9,7 @@ ...@@ -9,6 +9,7 @@
#include "src/base/small-vector.h" #include "src/base/small-vector.h"
#include "src/base/strings.h" #include "src/base/strings.h"
#include "src/regexp/regexp-flags.h"
#include "src/regexp/regexp-nodes.h" #include "src/regexp/regexp-nodes.h"
namespace v8 { namespace v8 {
...@@ -49,34 +50,17 @@ constexpr int kPatternTooShortForBoyerMoore = 2; ...@@ -49,34 +50,17 @@ constexpr int kPatternTooShortForBoyerMoore = 2;
} // namespace regexp_compiler_constants } // namespace regexp_compiler_constants
inline bool IgnoreCase(JSRegExp::Flags flags) { #define V(Lower, Camel, LowerCamel, Char, Bit) \
return (flags & JSRegExp::kIgnoreCase) != 0; inline bool Is##Camel(JSRegExp::Flags flags) { \
} return Is##Camel(JSRegExp::AsRegExpFlags(flags)); \
}
inline bool IsUnicode(JSRegExp::Flags flags) { REGEXP_FLAG_LIST(V)
return (flags & JSRegExp::kUnicode) != 0; #undef V
}
inline bool IsSticky(JSRegExp::Flags flags) {
return (flags & JSRegExp::kSticky) != 0;
}
inline bool IsGlobal(JSRegExp::Flags flags) {
return (flags & JSRegExp::kGlobal) != 0;
}
inline bool DotAll(JSRegExp::Flags flags) {
return (flags & JSRegExp::kDotAll) != 0;
}
inline bool Multiline(JSRegExp::Flags flags) {
return (flags & JSRegExp::kMultiline) != 0;
}
inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) { inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
// Both unicode and ignore_case flags are set. We need to use ICU to find // Both unicode and ignore_case flags are set. We need to use ICU to find
// the closure over case equivalents. // the closure over case equivalents.
return IsUnicode(flags) && IgnoreCase(flags); return IsUnicode(flags) && IsIgnoreCase(flags);
} }
// Details of a quick mask-compare check that can look ahead in the // Details of a quick mask-compare check that can look ahead in the
......
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_REGEXP_FLAGS_H_
#define V8_REGEXP_REGEXP_FLAGS_H_
#include "src/base/flags.h"
#include "src/base/optional.h"
namespace v8 {
namespace internal {
// Order is important! Sorted in alphabetic order by the flag char. Note this
// means that flag bits are shuffled. Take care to keep them contiguous when
// adding/removing flags.
#define REGEXP_FLAG_LIST(V) \
V(has_indices, HasIndices, hasIndices, 'd', 7) \
V(global, Global, global, 'g', 0) \
V(ignore_case, IgnoreCase, ignoreCase, 'i', 1) \
V(linear, Linear, linear, 'l', 6) \
V(multiline, Multiline, multiline, 'm', 2) \
V(dot_all, DotAll, dotAll, 's', 5) \
V(unicode, Unicode, unicode, 'u', 4) \
V(sticky, Sticky, sticky, 'y', 3)
#define V(Lower, Camel, LowerCamel, Char, Bit) k##Camel = 1 << Bit,
enum class RegExpFlag { REGEXP_FLAG_LIST(V) };
#undef V
#define V(...) +1
constexpr int kRegExpFlagCount = REGEXP_FLAG_LIST(V);
#undef V
// Assert alpha-sorted chars.
#define V(Lower, Camel, LowerCamel, Char, Bit) < Char) && (Char
static_assert((('a' - 1) REGEXP_FLAG_LIST(V) <= 'z'), "alpha-sort chars");
#undef V
// Assert contiguous indices.
#define V(Lower, Camel, LowerCamel, Char, Bit) | (1 << Bit)
static_assert(((1 << kRegExpFlagCount) - 1) == (0 REGEXP_FLAG_LIST(V)),
"contiguous bits");
#undef V
using RegExpFlags = base::Flags<RegExpFlag>;
#define V(Lower, Camel, ...) \
constexpr bool Is##Camel(RegExpFlags f) { \
return (f & RegExpFlag::k##Camel) != 0; \
}
REGEXP_FLAG_LIST(V)
#undef V
// clang-format off
#define V(Lower, Camel, LowerCamel, Char, Bit) \
c == Char ? RegExpFlag::k##Camel :
constexpr base::Optional<RegExpFlag> TryRegExpFlagFromChar(char c) {
return REGEXP_FLAG_LIST(V) base::Optional<RegExpFlag>{};
}
#undef V
// clang-format on
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_REGEXP_FLAGS_H_
...@@ -106,7 +106,7 @@ class BufferedZoneList { ...@@ -106,7 +106,7 @@ class BufferedZoneList {
// Accumulates RegExp atoms and assertions into lists of terms and alternatives. // Accumulates RegExp atoms and assertions into lists of terms and alternatives.
class RegExpBuilder : public ZoneObject { class RegExpBuilder : public ZoneObject {
public: public:
RegExpBuilder(Zone* zone, JSRegExp::Flags flags); RegExpBuilder(Zone* zone, RegExpFlags flags);
void AddCharacter(base::uc16 character); void AddCharacter(base::uc16 character);
void AddUnicodeCharacter(base::uc32 character); void AddUnicodeCharacter(base::uc32 character);
void AddEscapedUnicodeCharacter(base::uc32 character); void AddEscapedUnicodeCharacter(base::uc32 character);
...@@ -123,12 +123,11 @@ class RegExpBuilder : public ZoneObject { ...@@ -123,12 +123,11 @@ class RegExpBuilder : public ZoneObject {
RegExpQuantifier::QuantifierType type); RegExpQuantifier::QuantifierType type);
void FlushText(); void FlushText();
RegExpTree* ToRegExp(); RegExpTree* ToRegExp();
JSRegExp::Flags flags() const { return flags_; } RegExpFlags flags() const { return flags_; }
void set_flags(JSRegExp::Flags flags) { flags_ = flags; }
bool ignore_case() const { return (flags_ & JSRegExp::kIgnoreCase) != 0; } bool ignore_case() const { return IsIgnoreCase(flags_); }
bool multiline() const { return (flags_ & JSRegExp::kMultiline) != 0; } bool multiline() const { return IsMultiline(flags_); }
bool dotall() const { return (flags_ & JSRegExp::kDotAll) != 0; } bool dotall() const { return IsDotAll(flags_); }
private: private:
static const base::uc16 kNoPendingSurrogate = 0; static const base::uc16 kNoPendingSurrogate = 0;
...@@ -142,9 +141,9 @@ class RegExpBuilder : public ZoneObject { ...@@ -142,9 +141,9 @@ class RegExpBuilder : public ZoneObject {
Zone* zone() const { return zone_; } Zone* zone() const { return zone_; }
bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; } bool unicode() const { return (flags_ & JSRegExp::kUnicode) != 0; }
Zone* zone_; Zone* const zone_;
bool pending_empty_; bool pending_empty_;
JSRegExp::Flags flags_; const RegExpFlags flags_;
ZoneList<base::uc16>* characters_; ZoneList<base::uc16>* characters_;
base::uc16 pending_surrogate_; base::uc16 pending_surrogate_;
BufferedZoneList<RegExpTree, 2> terms_; BufferedZoneList<RegExpTree, 2> terms_;
...@@ -174,7 +173,7 @@ class RegExpParserState : public ZoneObject { ...@@ -174,7 +173,7 @@ class RegExpParserState : public ZoneObject {
RegExpLookaround::Type lookaround_type, RegExpLookaround::Type lookaround_type,
int disjunction_capture_index, int disjunction_capture_index,
const ZoneVector<base::uc16>* capture_name, const ZoneVector<base::uc16>* capture_name,
JSRegExp::Flags flags, Zone* zone) RegExpFlags flags, Zone* zone)
: previous_state_(previous_state), : previous_state_(previous_state),
builder_(zone->New<RegExpBuilder>(zone, flags)), builder_(zone->New<RegExpBuilder>(zone, flags)),
group_type_(group_type), group_type_(group_type),
...@@ -242,7 +241,7 @@ class RegExpParserState : public ZoneObject { ...@@ -242,7 +241,7 @@ class RegExpParserState : public ZoneObject {
template <class CharT> template <class CharT>
class RegExpParserImpl final { class RegExpParserImpl final {
private: private:
RegExpParserImpl(const CharT* input, int input_length, JSRegExp::Flags flags, RegExpParserImpl(const CharT* input, int input_length, RegExpFlags flags,
Isolate* isolate, Zone* zone, Isolate* isolate, Zone* zone,
const DisallowGarbageCollection& no_gc); const DisallowGarbageCollection& no_gc);
...@@ -378,7 +377,7 @@ class RegExpParserImpl final { ...@@ -378,7 +377,7 @@ class RegExpParserImpl final {
// These are the flags specified outside the regexp syntax ie after the // These are the flags specified outside the regexp syntax ie after the
// terminating '/' or in the second argument to the constructor. The current // terminating '/' or in the second argument to the constructor. The current
// flags are stored on the RegExpBuilder. // flags are stored on the RegExpBuilder.
const JSRegExp::Flags top_level_flags_; const RegExpFlags top_level_flags_;
int next_pos_; int next_pos_;
int captures_started_; int captures_started_;
int capture_count_; // Only valid after we have scanned for captures. int capture_count_; // Only valid after we have scanned for captures.
...@@ -391,14 +390,14 @@ class RegExpParserImpl final { ...@@ -391,14 +390,14 @@ class RegExpParserImpl final {
friend bool RegExpParser::ParseRegExpFromHeapString(Isolate*, Zone*, friend bool RegExpParser::ParseRegExpFromHeapString(Isolate*, Zone*,
Handle<String>, Handle<String>,
JSRegExp::Flags, RegExpFlags,
RegExpCompileData*); RegExpCompileData*);
}; };
template <class CharT> template <class CharT>
RegExpParserImpl<CharT>::RegExpParserImpl( RegExpParserImpl<CharT>::RegExpParserImpl(
const CharT* input, int input_length, JSRegExp::Flags flags, const CharT* input, int input_length, RegExpFlags flags, Isolate* isolate,
Isolate* isolate, Zone* zone, const DisallowGarbageCollection& no_gc) Zone* zone, const DisallowGarbageCollection& no_gc)
: isolate_(isolate), : isolate_(isolate),
zone_(zone), zone_(zone),
captures_(nullptr), captures_(nullptr),
...@@ -778,7 +777,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() { ...@@ -778,7 +777,7 @@ RegExpTree* RegExpParserImpl<CharT>::ParseDisjunction() {
} else { } else {
RegExpCapture* capture = GetCapture(index); RegExpCapture* capture = GetCapture(index);
RegExpTree* atom = zone()->template New<RegExpBackReference>( RegExpTree* atom = zone()->template New<RegExpBackReference>(
capture, builder->flags()); capture, JSRegExp::AsJSRegExpFlags(builder->flags()));
builder->AddAtom(atom); builder->AddAtom(atom);
} }
break; break;
...@@ -976,8 +975,6 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis( ...@@ -976,8 +975,6 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
RegExpParserState* state) { RegExpParserState* state) {
RegExpLookaround::Type lookaround_type = state->lookaround_type(); RegExpLookaround::Type lookaround_type = state->lookaround_type();
bool is_named_capture = false; bool is_named_capture = false;
JSRegExp::Flags switch_on = JSRegExp::kNone;
JSRegExp::Flags switch_off = JSRegExp::kNone;
const ZoneVector<base::uc16>* capture_name = nullptr; const ZoneVector<base::uc16>* capture_name = nullptr;
SubexpressionType subexpr_type = CAPTURE; SubexpressionType subexpr_type = CAPTURE;
Advance(); Advance();
...@@ -1030,11 +1027,10 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis( ...@@ -1030,11 +1027,10 @@ RegExpParserState* RegExpParserImpl<CharT>::ParseOpenParenthesis(
capture_name = ParseCaptureGroupName(CHECK_FAILED); capture_name = ParseCaptureGroupName(CHECK_FAILED);
} }
} }
JSRegExp::Flags flags = (state->builder()->flags() | switch_on) & ~switch_off;
// Store current state and begin new disjunction parsing. // Store current state and begin new disjunction parsing.
return zone()->template New<RegExpParserState>( return zone()->template New<RegExpParserState>(
state, subexpr_type, lookaround_type, captures_started_, capture_name, state, subexpr_type, lookaround_type, captures_started_, capture_name,
flags, zone()); state->builder()->flags(), zone());
} }
#ifdef DEBUG #ifdef DEBUG
...@@ -1256,8 +1252,8 @@ bool RegExpParserImpl<CharT>::ParseNamedBackReference( ...@@ -1256,8 +1252,8 @@ bool RegExpParserImpl<CharT>::ParseNamedBackReference(
if (state->IsInsideCaptureGroup(name)) { if (state->IsInsideCaptureGroup(name)) {
builder->AddEmpty(); builder->AddEmpty();
} else { } else {
RegExpBackReference* atom = RegExpBackReference* atom = zone()->template New<RegExpBackReference>(
zone()->template New<RegExpBackReference>(builder->flags()); JSRegExp::AsJSRegExpFlags(builder->flags()));
atom->set_name(name); atom->set_name(name);
builder->AddAtom(atom); builder->AddAtom(atom);
...@@ -1753,7 +1749,7 @@ RegExpTree* RegExpParserImpl<CharT>::GetPropertySequence( ...@@ -1753,7 +1749,7 @@ RegExpTree* RegExpParserImpl<CharT>::GetPropertySequence(
if (!FLAG_harmony_regexp_sequence) return nullptr; if (!FLAG_harmony_regexp_sequence) return nullptr;
const char* name = name_1.data(); const char* name = name_1.data();
const base::uc32* sequence_list = nullptr; const base::uc32* sequence_list = nullptr;
JSRegExp::Flags flags = JSRegExp::kUnicode; RegExpFlags flags = RegExpFlag::kUnicode;
if (NameEquals(name, "Emoji_Flag_Sequence")) { if (NameEquals(name, "Emoji_Flag_Sequence")) {
sequence_list = UnicodePropertySequences::kEmojiFlagSequences; sequence_list = UnicodePropertySequences::kEmojiFlagSequences;
} else if (NameEquals(name, "Emoji_Tag_Sequence")) { } else if (NameEquals(name, "Emoji_Tag_Sequence")) {
...@@ -2114,7 +2110,7 @@ bool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) { ...@@ -2114,7 +2110,7 @@ bool RegExpParserImpl<CharT>::Parse(RegExpCompileData* result) {
return !failed(); return !failed();
} }
RegExpBuilder::RegExpBuilder(Zone* zone, JSRegExp::Flags flags) RegExpBuilder::RegExpBuilder(Zone* zone, RegExpFlags flags)
: zone_(zone), : zone_(zone),
pending_empty_(false), pending_empty_(false),
flags_(flags), flags_(flags),
...@@ -2406,7 +2402,7 @@ template class RegExpParserImpl<base::uc16>; ...@@ -2406,7 +2402,7 @@ template class RegExpParserImpl<base::uc16>;
// static // static
bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone, bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
Handle<String> input, Handle<String> input,
JSRegExp::Flags flags, RegExpFlags flags,
RegExpCompileData* result) { RegExpCompileData* result) {
DisallowGarbageCollection no_gc; DisallowGarbageCollection no_gc;
String::FlatContent content = input->GetFlatContent(no_gc); String::FlatContent content = input->GetFlatContent(no_gc);
...@@ -2425,8 +2421,7 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone, ...@@ -2425,8 +2421,7 @@ bool RegExpParser::ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
// static // static
bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone, bool RegExpParser::VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
Handle<String> input, Handle<String> input, RegExpFlags flags,
JSRegExp::Flags flags,
RegExpCompileData* result, RegExpCompileData* result,
const DisallowGarbageCollection&) { const DisallowGarbageCollection&) {
return ParseRegExpFromHeapString(isolate, zone, input, flags, result); return ParseRegExpFromHeapString(isolate, zone, input, flags, result);
......
...@@ -7,7 +7,7 @@ ...@@ -7,7 +7,7 @@
#include "src/common/assert-scope.h" #include "src/common/assert-scope.h"
#include "src/handles/handles.h" #include "src/handles/handles.h"
#include "src/objects/js-regexp.h" // Move the Flags definition elsewhere. #include "src/regexp/regexp-flags.h"
namespace v8 { namespace v8 {
namespace internal { namespace internal {
...@@ -20,13 +20,12 @@ struct RegExpCompileData; ...@@ -20,13 +20,12 @@ struct RegExpCompileData;
class V8_EXPORT_PRIVATE RegExpParser : public AllStatic { class V8_EXPORT_PRIVATE RegExpParser : public AllStatic {
public: public:
static bool ParseRegExpFromHeapString(Isolate* isolate, Zone* zone, static bool ParseRegExpFromHeapString(Isolate* isolate, Zone* zone,
Handle<String> input, Handle<String> input, RegExpFlags flags,
JSRegExp::Flags flags,
RegExpCompileData* result); RegExpCompileData* result);
// Used by the SpiderMonkey embedding of irregexp. // Used by the SpiderMonkey embedding of irregexp.
static bool VerifyRegExpSyntax(Isolate* isolate, Zone* zone, static bool VerifyRegExpSyntax(Isolate* isolate, Zone* zone,
Handle<String> input, JSRegExp::Flags flags, Handle<String> input, RegExpFlags flags,
RegExpCompileData* result, RegExpCompileData* result,
const DisallowGarbageCollection& no_gc); const DisallowGarbageCollection& no_gc);
}; };
......
...@@ -181,7 +181,8 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re, ...@@ -181,7 +181,8 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
PostponeInterruptsScope postpone(isolate); PostponeInterruptsScope postpone(isolate);
RegExpCompileData parse_result; RegExpCompileData parse_result;
DCHECK(!isolate->has_pending_exception()); DCHECK(!isolate->has_pending_exception());
if (!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, pattern, flags, if (!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, pattern,
JSRegExp::AsRegExpFlags(flags),
&parse_result)) { &parse_result)) {
// Throw an exception if we fail to parse the pattern. // Throw an exception if we fail to parse the pattern.
return RegExp::ThrowRegExpException(isolate, re, pattern, return RegExp::ThrowRegExpException(isolate, re, pattern,
...@@ -209,7 +210,7 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re, ...@@ -209,7 +210,7 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
ExperimentalRegExp::Initialize(isolate, re, pattern, flags, ExperimentalRegExp::Initialize(isolate, re, pattern, flags,
parse_result.capture_count); parse_result.capture_count);
has_been_compiled = true; has_been_compiled = true;
} else if (parse_result.simple && !IgnoreCase(flags) && !IsSticky(flags) && } else if (parse_result.simple && !IsIgnoreCase(flags) && !IsSticky(flags) &&
!HasFewDifferentCharacters(pattern)) { !HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern. // Parse-tree is a single atom that is equal to the pattern.
RegExpImpl::AtomCompile(isolate, re, pattern, flags, pattern); RegExpImpl::AtomCompile(isolate, re, pattern, flags, pattern);
...@@ -224,7 +225,7 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re, ...@@ -224,7 +225,7 @@ MaybeHandle<Object> RegExp::Compile(Isolate* isolate, Handle<JSRegExp> re,
ASSIGN_RETURN_ON_EXCEPTION( ASSIGN_RETURN_ON_EXCEPTION(
isolate, atom_string, isolate, atom_string,
isolate->factory()->NewStringFromTwoByte(atom_pattern), Object); isolate->factory()->NewStringFromTwoByte(atom_pattern), Object);
if (!IgnoreCase(flags) && !HasFewDifferentCharacters(atom_string)) { if (!IsIgnoreCase(flags) && !HasFewDifferentCharacters(atom_string)) {
RegExpImpl::AtomCompile(isolate, re, pattern, flags, atom_string); RegExpImpl::AtomCompile(isolate, re, pattern, flags, atom_string);
has_been_compiled = true; has_been_compiled = true;
} }
...@@ -506,7 +507,8 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re, ...@@ -506,7 +507,8 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern(re->Pattern(), isolate); Handle<String> pattern(re->Pattern(), isolate);
pattern = String::Flatten(isolate, pattern); pattern = String::Flatten(isolate, pattern);
RegExpCompileData compile_data; RegExpCompileData compile_data;
if (!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, pattern, flags, if (!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, pattern,
JSRegExp::AsRegExpFlags(flags),
&compile_data)) { &compile_data)) {
// Throw an exception if we fail to parse the pattern. // Throw an exception if we fail to parse the pattern.
// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once. // THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
......
...@@ -1390,10 +1390,8 @@ RUNTIME_FUNCTION(Runtime_NewRegExpWithBacktrackLimit) { ...@@ -1390,10 +1390,8 @@ RUNTIME_FUNCTION(Runtime_NewRegExpWithBacktrackLimit) {
CONVERT_ARG_HANDLE_CHECKED(String, flags_string, 1); CONVERT_ARG_HANDLE_CHECKED(String, flags_string, 1);
CONVERT_UINT32_ARG_CHECKED(backtrack_limit, 2); CONVERT_UINT32_ARG_CHECKED(backtrack_limit, 2);
bool success = false;
JSRegExp::Flags flags = JSRegExp::Flags flags =
JSRegExp::FlagsFromString(isolate, flags_string, &success); JSRegExp::FlagsFromString(isolate, flags_string).value();
CHECK(success);
RETURN_RESULT_OR_FAILURE( RETURN_RESULT_OR_FAILURE(
isolate, JSRegExp::New(isolate, pattern, flags, backtrack_limit)); isolate, JSRegExp::New(isolate, pattern, flags, backtrack_limit));
......
...@@ -1513,15 +1513,14 @@ void WebSnapshotDeserializer::ReadValue( ...@@ -1513,15 +1513,14 @@ void WebSnapshotDeserializer::ReadValue(
case ValueType::REGEXP: { case ValueType::REGEXP: {
Handle<String> pattern = ReadString(false); Handle<String> pattern = ReadString(false);
Handle<String> flags_string = ReadString(false); Handle<String> flags_string = ReadString(false);
bool success = false; base::Optional<JSRegExp::Flags> flags =
JSRegExp::Flags flags = JSRegExp::FlagsFromString(isolate_, flags_string);
JSRegExp::FlagsFromString(isolate_, flags_string, &success); if (!flags.has_value()) {
if (!success) {
Throw("Web snapshot: Malformed flags in regular expression"); Throw("Web snapshot: Malformed flags in regular expression");
return; return;
} }
MaybeHandle<JSRegExp> maybe_regexp = MaybeHandle<JSRegExp> maybe_regexp =
JSRegExp::New(isolate_, pattern, flags); JSRegExp::New(isolate_, pattern, flags.value());
if (!maybe_regexp.ToHandle(&value)) { if (!maybe_regexp.ToHandle(&value)) {
Throw("Web snapshot: Malformed RegExp"); Throw("Web snapshot: Malformed RegExp");
return; return;
......
...@@ -64,8 +64,8 @@ static bool CheckParse(const char* input) { ...@@ -64,8 +64,8 @@ static bool CheckParse(const char* input) {
Zone zone(isolate->allocator(), ZONE_NAME); Zone zone(isolate->allocator(), ZONE_NAME);
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input); Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
RegExpCompileData result; RegExpCompileData result;
return RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, return RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, {},
JSRegExp::kNone, &result); &result);
} }
static void CheckParseEq(const char* input, const char* expected, static void CheckParseEq(const char* input, const char* expected,
...@@ -76,8 +76,8 @@ static void CheckParseEq(const char* input, const char* expected, ...@@ -76,8 +76,8 @@ static void CheckParseEq(const char* input, const char* expected,
Zone zone(isolate->allocator(), ZONE_NAME); Zone zone(isolate->allocator(), ZONE_NAME);
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input); Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
RegExpCompileData result; RegExpCompileData result;
JSRegExp::Flags flags = JSRegExp::kNone; RegExpFlags flags;
if (unicode) flags |= JSRegExp::kUnicode; if (unicode) flags |= RegExpFlag::kUnicode;
CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, flags, CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, flags,
&result)); &result));
CHECK_NOT_NULL(result.tree); CHECK_NOT_NULL(result.tree);
...@@ -97,8 +97,8 @@ static bool CheckSimple(const char* input) { ...@@ -97,8 +97,8 @@ static bool CheckSimple(const char* input) {
Zone zone(isolate->allocator(), ZONE_NAME); Zone zone(isolate->allocator(), ZONE_NAME);
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input); Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
RegExpCompileData result; RegExpCompileData result;
CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, {},
JSRegExp::kNone, &result)); &result));
CHECK_NOT_NULL(result.tree); CHECK_NOT_NULL(result.tree);
CHECK(result.error == RegExpError::kNone); CHECK(result.error == RegExpError::kNone);
return result.simple; return result.simple;
...@@ -116,8 +116,8 @@ static MinMaxPair CheckMinMaxMatch(const char* input) { ...@@ -116,8 +116,8 @@ static MinMaxPair CheckMinMaxMatch(const char* input) {
Zone zone(isolate->allocator(), ZONE_NAME); Zone zone(isolate->allocator(), ZONE_NAME);
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input); Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
RegExpCompileData result; RegExpCompileData result;
CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, CHECK(RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, {},
JSRegExp::kNone, &result)); &result));
CHECK_NOT_NULL(result.tree); CHECK_NOT_NULL(result.tree);
CHECK(result.error == RegExpError::kNone); CHECK(result.error == RegExpError::kNone);
int min_match = result.tree->min_match(); int min_match = result.tree->min_match();
...@@ -430,8 +430,8 @@ static void ExpectError(const char* input, const char* expected, ...@@ -430,8 +430,8 @@ static void ExpectError(const char* input, const char* expected,
Zone zone(isolate->allocator(), ZONE_NAME); Zone zone(isolate->allocator(), ZONE_NAME);
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input); Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
RegExpCompileData result; RegExpCompileData result;
JSRegExp::Flags flags = JSRegExp::kNone; RegExpFlags flags;
if (unicode) flags |= JSRegExp::kUnicode; if (unicode) flags |= RegExpFlag::kUnicode;
CHECK(!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, flags, CHECK(!RegExpParser::ParseRegExpFromHeapString(isolate, &zone, str, flags,
&result)); &result));
CHECK_NULL(result.tree); CHECK_NULL(result.tree);
...@@ -533,9 +533,9 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode, ...@@ -533,9 +533,9 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input); Handle<String> str = isolate->factory()->NewStringFromAsciiChecked(input);
RegExpCompileData compile_data; RegExpCompileData compile_data;
compile_data.compilation_target = RegExpCompilationTarget::kNative; compile_data.compilation_target = RegExpCompilationTarget::kNative;
JSRegExp::Flags flags = JSRegExp::kNone; RegExpFlags flags;
if (multiline) flags = JSRegExp::kMultiline; if (multiline) flags |= RegExpFlag::kMultiline;
if (unicode) flags = JSRegExp::kUnicode; if (unicode) flags |= RegExpFlag::kUnicode;
if (!RegExpParser::ParseRegExpFromHeapString(isolate, zone, str, flags, if (!RegExpParser::ParseRegExpFromHeapString(isolate, zone, str, flags,
&compile_data)) { &compile_data)) {
return nullptr; return nullptr;
...@@ -546,7 +546,8 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode, ...@@ -546,7 +546,8 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
Handle<String> sample_subject = isolate->factory() Handle<String> sample_subject = isolate->factory()
->NewStringFromUtf8(base::CStrVector("")) ->NewStringFromUtf8(base::CStrVector(""))
.ToHandleChecked(); .ToHandleChecked();
RegExp::CompileForTesting(isolate, zone, &compile_data, flags, pattern, RegExp::CompileForTesting(isolate, zone, &compile_data,
JSRegExp::AsJSRegExpFlags(flags), pattern,
sample_subject, is_one_byte); sample_subject, is_one_byte);
return compile_data.node; return compile_data.node;
} }
...@@ -638,7 +639,7 @@ static Handle<JSRegExp> CreateJSRegExp(Handle<String> source, Handle<Code> code, ...@@ -638,7 +639,7 @@ static Handle<JSRegExp> CreateJSRegExp(Handle<String> source, Handle<Code> code,
Handle<JSRegExp> regexp = Handle<JSRegExp> regexp =
Handle<JSRegExp>::cast(factory->NewJSObject(constructor)); Handle<JSRegExp>::cast(factory->NewJSObject(constructor));
factory->SetRegExpIrregexpData(regexp, source, JSRegExp::kNone, 0, factory->SetRegExpIrregexpData(regexp, source, {}, 0,
JSRegExp::kNoBacktrackLimit); JSRegExp::kNoBacktrackLimit);
regexp->SetDataAt(is_unicode ? JSRegExp::kIrregexpUC16CodeIndex regexp->SetDataAt(is_unicode ? JSRegExp::kIrregexpUC16CodeIndex
: JSRegExp::kIrregexpLatin1CodeIndex, : JSRegExp::kIrregexpLatin1CodeIndex,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment