Commit 5d456727 authored by Patrick Thier's avatar Patrick Thier Committed by V8 LUCI CQ

[regexp] Add v-Flag for Unicode Sets

- Add v-flag and corresponding prototype getters.
- Update RegExp builtins fuzzer to handle two-byte flags.
- Update test262 status.

Bug: v8:11935
Change-Id: If649ebfacf1f933f3ae5c770c2240470a8b460ee
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3868952Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Reviewed-by: 's avatarJakob Linke <jgruber@chromium.org>
Commit-Queue: Patrick Thier <pthier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#83003}
parent 71a2887f
...@@ -37,9 +37,10 @@ class V8_EXPORT RegExp : public Object { ...@@ -37,9 +37,10 @@ class V8_EXPORT RegExp : public Object {
kDotAll = 1 << 5, kDotAll = 1 << 5,
kLinear = 1 << 6, kLinear = 1 << 6,
kHasIndices = 1 << 7, kHasIndices = 1 << 7,
kUnicodeSets = 1 << 8,
}; };
static constexpr int kFlagCount = 8; static constexpr int kFlagCount = 9;
/** /**
* Creates a regular expression from the given pattern string and * Creates a regular expression from the given pattern string and
......
...@@ -7308,6 +7308,7 @@ REGEXP_FLAG_ASSERT_EQ(kSticky); ...@@ -7308,6 +7308,7 @@ REGEXP_FLAG_ASSERT_EQ(kSticky);
REGEXP_FLAG_ASSERT_EQ(kUnicode); REGEXP_FLAG_ASSERT_EQ(kUnicode);
REGEXP_FLAG_ASSERT_EQ(kHasIndices); REGEXP_FLAG_ASSERT_EQ(kHasIndices);
REGEXP_FLAG_ASSERT_EQ(kLinear); REGEXP_FLAG_ASSERT_EQ(kLinear);
REGEXP_FLAG_ASSERT_EQ(kUnicodeSets);
#undef REGEXP_FLAG_ASSERT_EQ #undef REGEXP_FLAG_ASSERT_EQ
v8::RegExp::Flags v8::RegExp::GetFlags() const { v8::RegExp::Flags v8::RegExp::GetFlags() const {
......
...@@ -1100,6 +1100,10 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context, ...@@ -1100,6 +1100,10 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
"linear", "linear",
ExternalReference::address_of_enable_experimental_regexp_engine(), ExternalReference::address_of_enable_experimental_regexp_engine(),
JSRegExp::kLinear); JSRegExp::kLinear);
CASE_FOR_FLAG(
"unicodeSets",
ExternalReference::address_of_FLAG_harmony_regexp_unicode_sets(),
JSRegExp::kUnicodeSets);
#undef CASE_FOR_FLAG #undef CASE_FOR_FLAG
} }
......
...@@ -200,7 +200,8 @@ extern enum Flag constexpr 'JSRegExp::Flag' { ...@@ -200,7 +200,8 @@ extern enum Flag constexpr 'JSRegExp::Flag' {
kUnicode, kUnicode,
kDotAll, kDotAll,
kHasIndices, kHasIndices,
kLinear kLinear,
kUnicodeSets
} }
const kNoCounterFlagGetter: constexpr int31 = -1; const kNoCounterFlagGetter: constexpr int31 = -1;
...@@ -296,6 +297,15 @@ transitioning javascript builtin RegExpPrototypeUnicodeGetter( ...@@ -296,6 +297,15 @@ transitioning javascript builtin RegExpPrototypeUnicodeGetter(
'RegExp.prototype.unicode'); 'RegExp.prototype.unicode');
} }
// ES2023 22.2.5.14
// ES #sec-get-regexp.prototype.unicodeSets
transitioning javascript builtin RegExpPrototypeUnicodeSetsGetter(
js-implicit context: NativeContext, receiver: JSAny)(): JSAny {
return FlagGetter(
receiver, Flag::kUnicodeSets, kNoCounterFlagGetter,
'RegExp.prototype.unicodeSets');
}
extern transitioning macro extern transitioning macro
RegExpBuiltinsAssembler::FlagsGetter(implicit context: Context)( RegExpBuiltinsAssembler::FlagsGetter(implicit context: Context)(
Object, constexpr bool): String; Object, constexpr bool): String;
......
...@@ -575,6 +575,11 @@ ExternalReference::address_of_mock_arraybuffer_allocator_flag() { ...@@ -575,6 +575,11 @@ ExternalReference::address_of_mock_arraybuffer_allocator_flag() {
return ExternalReference(&v8_flags.mock_arraybuffer_allocator); return ExternalReference(&v8_flags.mock_arraybuffer_allocator);
} }
ExternalReference
ExternalReference::address_of_FLAG_harmony_regexp_unicode_sets() {
return ExternalReference(&v8_flags.harmony_regexp_unicode_sets);
}
// TODO(jgruber): Update the other extrefs pointing at v8_flags. addresses to be // TODO(jgruber): Update the other extrefs pointing at v8_flags. addresses to be
// called address_of_FLAG_foo (easier grep-ability). // called address_of_FLAG_foo (easier grep-ability).
ExternalReference ExternalReference::address_of_FLAG_trace_osr() { ExternalReference ExternalReference::address_of_FLAG_trace_osr() {
......
...@@ -96,6 +96,8 @@ class StatsCounter; ...@@ -96,6 +96,8 @@ class StatsCounter;
#define EXTERNAL_REFERENCE_LIST(V) \ #define EXTERNAL_REFERENCE_LIST(V) \
V(abort_with_reason, "abort_with_reason") \ V(abort_with_reason, "abort_with_reason") \
V(address_of_FLAG_harmony_regexp_unicode_sets, \
"v8_flags.harmony_regexp_unicdoe_sets") \
V(address_of_FLAG_trace_osr, "v8_flags.trace_osr") \ V(address_of_FLAG_trace_osr, "v8_flags.trace_osr") \
V(address_of_builtin_subclassing_flag, "v8_flags.builtin_subclassing") \ V(address_of_builtin_subclassing_flag, "v8_flags.builtin_subclassing") \
V(address_of_double_abs_constant, "double_absolute_constant") \ V(address_of_double_abs_constant, "double_absolute_constant") \
......
...@@ -229,7 +229,8 @@ DEFINE_BOOL(harmony_shipping, true, "enable all shipped harmony features") ...@@ -229,7 +229,8 @@ DEFINE_BOOL(harmony_shipping, true, "enable all shipped harmony features")
V(harmony_temporal, "Temporal") \ V(harmony_temporal, "Temporal") \
V(harmony_shadow_realm, "harmony ShadowRealm") \ V(harmony_shadow_realm, "harmony ShadowRealm") \
V(harmony_struct, "harmony structs, shared structs, and shared arrays") \ V(harmony_struct, "harmony structs, shared structs, and shared arrays") \
V(harmony_change_array_by_copy, "harmony change-Array-by-copy") V(harmony_change_array_by_copy, "harmony change-Array-by-copy") \
V(harmony_regexp_unicode_sets, "harmony RegExp Unicode Sets")
#ifdef V8_INTL_SUPPORT #ifdef V8_INTL_SUPPORT
#define HARMONY_INPROGRESS(V) \ #define HARMONY_INPROGRESS(V) \
......
...@@ -4551,6 +4551,20 @@ void Genesis::InitializeGlobal_harmony_change_array_by_copy() { ...@@ -4551,6 +4551,20 @@ void Genesis::InitializeGlobal_harmony_change_array_by_copy() {
} }
} }
void Genesis::InitializeGlobal_harmony_regexp_unicode_sets() {
if (!FLAG_harmony_regexp_unicode_sets) return;
Handle<JSFunction> regexp_fun(native_context()->regexp_function(), isolate());
Handle<JSObject> regexp_prototype(
JSObject::cast(regexp_fun->instance_prototype()), isolate());
SimpleInstallGetter(isolate(), regexp_prototype,
factory()->unicodeSets_string(),
Builtin::kRegExpPrototypeUnicodeSetsGetter, true);
// Store regexp prototype map again after change.
native_context()->set_regexp_prototype_map(regexp_prototype->map());
}
void Genesis::InitializeGlobal_harmony_shadow_realm() { void Genesis::InitializeGlobal_harmony_shadow_realm() {
if (!FLAG_harmony_shadow_realm) return; if (!FLAG_harmony_shadow_realm) return;
Factory* factory = isolate()->factory(); Factory* factory = isolate()->factory();
......
...@@ -420,6 +420,7 @@ ...@@ -420,6 +420,7 @@
V(_, undefined_string, "undefined") \ V(_, undefined_string, "undefined") \
V(_, undefined_to_string, "[object Undefined]") \ V(_, undefined_to_string, "[object Undefined]") \
V(_, unicode_string, "unicode") \ V(_, unicode_string, "unicode") \
V(_, unicodeSets_string, "unicodeSets") \
V(_, unit_string, "unit") \ V(_, unit_string, "unit") \
V(_, URIError_string, "URIError") \ V(_, URIError_string, "URIError") \
V(_, UTC_string, "UTC") \ V(_, UTC_string, "UTC") \
......
...@@ -233,7 +233,8 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp, ...@@ -233,7 +233,8 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
Isolate* isolate = regexp->GetIsolate(); Isolate* isolate = regexp->GetIsolate();
base::Optional<Flags> flags = base::Optional<Flags> flags =
JSRegExp::FlagsFromString(isolate, flags_string); JSRegExp::FlagsFromString(isolate, flags_string);
if (!flags.has_value()) { if (!flags.has_value() ||
!RegExp::VerifyFlags(JSRegExp::AsRegExpFlags(flags.value()))) {
THROW_NEW_ERROR( THROW_NEW_ERROR(
isolate, isolate,
NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string), NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string),
......
...@@ -99,6 +99,10 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> { ...@@ -99,6 +99,10 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
!v8_flags.enable_experimental_regexp_engine) { !v8_flags.enable_experimental_regexp_engine) {
return {}; return {};
} }
if (f.value() == RegExpFlag::kUnicodeSets &&
!FLAG_harmony_regexp_unicode_sets) {
return {};
}
return f; return f;
} }
......
...@@ -11,6 +11,7 @@ bitfield struct JSRegExpFlags extends uint31 { ...@@ -11,6 +11,7 @@ bitfield struct JSRegExpFlags extends uint31 {
dot_all: bool: 1 bit; dot_all: bool: 1 bit;
linear: bool: 1 bit; linear: bool: 1 bit;
has_indices: bool: 1 bit; has_indices: bool: 1 bit;
unicode_sets: bool: 1 bit;
} }
extern class JSRegExp extends JSObject { extern class JSRegExp extends JSObject {
......
...@@ -1156,6 +1156,7 @@ class ParserBase { ...@@ -1156,6 +1156,7 @@ class ParserBase {
const AstRawString* GetNextSymbolForRegExpLiteral() const { const AstRawString* GetNextSymbolForRegExpLiteral() const {
return scanner()->NextSymbol(ast_value_factory()); return scanner()->NextSymbol(ast_value_factory());
} }
bool ValidateRegExpFlags(RegExpFlags flags);
bool ValidateRegExpLiteral(const AstRawString* pattern, RegExpFlags flags, bool ValidateRegExpLiteral(const AstRawString* pattern, RegExpFlags flags,
RegExpError* regexp_error); RegExpError* regexp_error);
ExpressionT ParseRegExpLiteral(); ExpressionT ParseRegExpLiteral();
...@@ -1796,6 +1797,11 @@ ParserBase<Impl>::ParsePropertyOrPrivatePropertyName() { ...@@ -1796,6 +1797,11 @@ ParserBase<Impl>::ParsePropertyOrPrivatePropertyName() {
return key; return key;
} }
template <typename Impl>
bool ParserBase<Impl>::ValidateRegExpFlags(RegExpFlags flags) {
return RegExp::VerifyFlags(flags);
}
template <typename Impl> template <typename Impl>
bool ParserBase<Impl>::ValidateRegExpLiteral(const AstRawString* pattern, bool ParserBase<Impl>::ValidateRegExpLiteral(const AstRawString* pattern,
RegExpFlags flags, RegExpFlags flags,
...@@ -1827,7 +1833,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() { ...@@ -1827,7 +1833,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() {
const AstRawString* js_pattern = GetNextSymbolForRegExpLiteral(); const AstRawString* js_pattern = GetNextSymbolForRegExpLiteral();
base::Optional<RegExpFlags> flags = scanner()->ScanRegExpFlags(); base::Optional<RegExpFlags> flags = scanner()->ScanRegExpFlags();
if (!flags.has_value()) { if (!flags.has_value() || !ValidateRegExpFlags(flags.value())) {
Next(); Next();
ReportMessage(MessageTemplate::kMalformedRegExpFlags); ReportMessage(MessageTemplate::kMalformedRegExpFlags);
return impl()->FailureExpression(); return impl()->FailureExpression();
......
...@@ -17,14 +17,15 @@ namespace internal { ...@@ -17,14 +17,15 @@ namespace internal {
// Order is important! Sorted in alphabetic order by the flag char. Note this // Order is important! Sorted in alphabetic order by the flag char. Note this
// means that flag bits are shuffled. Take care to keep them contiguous when // means that flag bits are shuffled. Take care to keep them contiguous when
// adding/removing flags. // adding/removing flags.
#define REGEXP_FLAG_LIST(V) \ #define REGEXP_FLAG_LIST(V) \
V(has_indices, HasIndices, hasIndices, 'd', 7) \ V(has_indices, HasIndices, hasIndices, 'd', 7) \
V(global, Global, global, 'g', 0) \ V(global, Global, global, 'g', 0) \
V(ignore_case, IgnoreCase, ignoreCase, 'i', 1) \ V(ignore_case, IgnoreCase, ignoreCase, 'i', 1) \
V(linear, Linear, linear, 'l', 6) \ V(linear, Linear, linear, 'l', 6) \
V(multiline, Multiline, multiline, 'm', 2) \ V(multiline, Multiline, multiline, 'm', 2) \
V(dot_all, DotAll, dotAll, 's', 5) \ V(dot_all, DotAll, dotAll, 's', 5) \
V(unicode, Unicode, unicode, 'u', 4) \ V(unicode, Unicode, unicode, 'u', 4) \
V(unicode_sets, UnicodeSets, unicodeSets, 'v', 8) \
V(sticky, Sticky, sticky, 'y', 3) V(sticky, Sticky, sticky, 'y', 3)
#define V(Lower, Camel, LowerCamel, Char, Bit) k##Camel = 1 << Bit, #define V(Lower, Camel, LowerCamel, Char, Bit) k##Camel = 1 << Bit,
......
...@@ -107,6 +107,12 @@ bool RegExp::CanGenerateBytecode() { ...@@ -107,6 +107,12 @@ bool RegExp::CanGenerateBytecode() {
return v8_flags.regexp_interpret_all || v8_flags.regexp_tier_up; return v8_flags.regexp_interpret_all || v8_flags.regexp_tier_up;
} }
// static
bool RegExp::VerifyFlags(RegExpFlags flags) {
if (IsUnicode(flags) && IsUnicodeSets(flags)) return false;
return true;
}
// static // static
template <class CharT> template <class CharT>
bool RegExp::VerifySyntax(Zone* zone, uintptr_t stack_limit, const CharT* input, bool RegExp::VerifySyntax(Zone* zone, uintptr_t stack_limit, const CharT* input,
......
...@@ -70,6 +70,9 @@ class RegExp final : public AllStatic { ...@@ -70,6 +70,9 @@ class RegExp final : public AllStatic {
// Whether the irregexp engine generates interpreter bytecode. // Whether the irregexp engine generates interpreter bytecode.
static bool CanGenerateBytecode(); static bool CanGenerateBytecode();
// Verify that the given flags combination is valid.
V8_EXPORT_PRIVATE static bool VerifyFlags(RegExpFlags flags);
// Verify the given pattern, i.e. check that parsing succeeds. If // Verify the given pattern, i.e. check that parsing succeeds. If
// verification fails, `regexp_error_out` is set. // verification fails, `regexp_error_out` is set.
template <class CharT> template <class CharT>
......
...@@ -15,6 +15,7 @@ ...@@ -15,6 +15,7 @@
#include "include/v8-primitive.h" #include "include/v8-primitive.h"
#include "include/v8-script.h" #include "include/v8-script.h"
#include "src/objects/objects-inl.h" #include "src/objects/objects-inl.h"
#include "src/regexp/regexp.h"
#include "test/fuzzer/fuzzer-support.h" #include "test/fuzzer/fuzzer-support.h"
// This is a hexdump of test/fuzzer/regexp_builtins/mjsunit.js generated using // This is a hexdump of test/fuzzer/regexp_builtins/mjsunit.js generated using
...@@ -242,18 +243,31 @@ std::string PickLimitForSplit(FuzzerArgs* args) { ...@@ -242,18 +243,31 @@ std::string PickLimitForSplit(FuzzerArgs* args) {
} }
std::string GenerateRandomFlags(FuzzerArgs* args) { std::string GenerateRandomFlags(FuzzerArgs* args) {
constexpr int kFlagCount = JSRegExp::kFlagCount;
static_assert((1 << kFlagCount) - 1 <= 0xFFFF);
// TODO(mbid,v8:10765): Find a way to generate the kLinear flag sometimes, // TODO(mbid,v8:10765): Find a way to generate the kLinear flag sometimes,
// but only for patterns that are supported by the experimental engine. // but only for patterns that are supported by the experimental engine.
constexpr size_t kFlagCount = JSRegExp::kFlagCount; constexpr int kFuzzableFlagCount = kFlagCount - 1;
CHECK_EQ(JSRegExp::kHasIndices, 1 << (kFlagCount - 1)); constexpr uint32_t kFuzzableFlagsMask =
CHECK_EQ(JSRegExp::kLinear, 1 << (kFlagCount - 2)); ((1 << kFlagCount) - 1) & (~JSRegExp::kLinear);
CHECK_EQ(JSRegExp::kDotAll, 1 << (kFlagCount - 3));
static_assert((1 << kFlagCount) - 1 <= 0xFF); const uint8_t byte1 = RandomByte(args);
const uint8_t byte2 = RandomByte(args);
const uint16_t random_two_byte = (byte1 << 8) | byte2;
const size_t flags = RandomByte(args) & ((1 << kFlagCount) - 1); uint32_t flags = random_two_byte & kFuzzableFlagsMask;
int cursor = 0; int cursor = 0;
char buffer[kFlagCount] = {'\0'}; char buffer[kFuzzableFlagCount] = {'\0'};
// 'u' and 'v' are incompatible. If both are set randomly, clear
// one based on the random bit of the (unused) JSRegExp::kLinar flag.
if ((flags & JSRegExp::kUnicode) && (flags & JSRegExp::kUnicodeSets)) {
const bool rand_bit = random_two_byte & JSRegExp::kLinear;
flags &= rand_bit ? ~JSRegExp::kUnicode : ~JSRegExp::kUnicodeSets;
}
DCHECK(RegExp::VerifyFlags(RegExpFlags{static_cast<int>(flags)}));
if (flags & JSRegExp::kGlobal) buffer[cursor++] = 'g'; if (flags & JSRegExp::kGlobal) buffer[cursor++] = 'g';
if (flags & JSRegExp::kIgnoreCase) buffer[cursor++] = 'i'; if (flags & JSRegExp::kIgnoreCase) buffer[cursor++] = 'i';
...@@ -261,7 +275,9 @@ std::string GenerateRandomFlags(FuzzerArgs* args) { ...@@ -261,7 +275,9 @@ std::string GenerateRandomFlags(FuzzerArgs* args) {
if (flags & JSRegExp::kSticky) buffer[cursor++] = 'y'; if (flags & JSRegExp::kSticky) buffer[cursor++] = 'y';
if (flags & JSRegExp::kUnicode) buffer[cursor++] = 'u'; if (flags & JSRegExp::kUnicode) buffer[cursor++] = 'u';
if (flags & JSRegExp::kDotAll) buffer[cursor++] = 's'; if (flags & JSRegExp::kDotAll) buffer[cursor++] = 's';
CHECK_EQ(flags & JSRegExp::kLinear, 0);
if (flags & JSRegExp::kHasIndices) buffer[cursor++] = 'd'; if (flags & JSRegExp::kHasIndices) buffer[cursor++] = 'd';
if (flags & JSRegExp::kUnicodeSets) buffer[cursor++] = 'v';
return std::string(buffer, cursor); return std::string(buffer, cursor);
} }
......
This diff is collapsed.
...@@ -41,17 +41,18 @@ from testrunner.outproc import test262 ...@@ -41,17 +41,18 @@ from testrunner.outproc import test262
# TODO(littledan): move the flag mapping into the status file # TODO(littledan): move the flag mapping into the status file
FEATURE_FLAGS = { FEATURE_FLAGS = {
'Intl.NumberFormat-v3': '--harmony_intl_number_format_v3', 'Intl.NumberFormat-v3': '--harmony_intl_number_format_v3',
'Symbol.prototype.description': '--harmony-symbol-description', 'Symbol.prototype.description': '--harmony-symbol-description',
'FinalizationRegistry': '--harmony-weak-refs-with-cleanup-some', 'FinalizationRegistry': '--harmony-weak-refs-with-cleanup-some',
'WeakRef': '--harmony-weak-refs-with-cleanup-some', 'WeakRef': '--harmony-weak-refs-with-cleanup-some',
'host-gc-required': '--expose-gc-as=v8GC', 'host-gc-required': '--expose-gc-as=v8GC',
'IsHTMLDDA': '--allow-natives-syntax', 'IsHTMLDDA': '--allow-natives-syntax',
'import-assertions': '--harmony-import-assertions', 'import-assertions': '--harmony-import-assertions',
'resizable-arraybuffer': '--harmony-rab-gsab', 'resizable-arraybuffer': '--harmony-rab-gsab',
'Temporal': '--harmony-temporal', 'Temporal': '--harmony-temporal',
'array-find-from-last': '--harmony_array_find_last', 'array-find-from-last': '--harmony_array_find_last',
'ShadowRealm': '--harmony-shadow-realm', 'ShadowRealm': '--harmony-shadow-realm',
'regexp-v-flag': '--harmony-regexp-unicode-sets',
} }
SKIPPED_FEATURES = set([]) SKIPPED_FEATURES = set([])
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment