Commit 5d456727 authored by Patrick Thier's avatar Patrick Thier Committed by V8 LUCI CQ

[regexp] Add v-Flag for Unicode Sets

- Add v-flag and corresponding prototype getters.
- Update RegExp builtins fuzzer to handle two-byte flags.
- Update test262 status.

Bug: v8:11935
Change-Id: If649ebfacf1f933f3ae5c770c2240470a8b460ee
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3868952Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Reviewed-by: 's avatarJakob Linke <jgruber@chromium.org>
Commit-Queue: Patrick Thier <pthier@chromium.org>
Cr-Commit-Position: refs/heads/main@{#83003}
parent 71a2887f
......@@ -37,9 +37,10 @@ class V8_EXPORT RegExp : public Object {
kDotAll = 1 << 5,
kLinear = 1 << 6,
kHasIndices = 1 << 7,
kUnicodeSets = 1 << 8,
};
static constexpr int kFlagCount = 8;
static constexpr int kFlagCount = 9;
/**
* Creates a regular expression from the given pattern string and
......
......@@ -7308,6 +7308,7 @@ REGEXP_FLAG_ASSERT_EQ(kSticky);
REGEXP_FLAG_ASSERT_EQ(kUnicode);
REGEXP_FLAG_ASSERT_EQ(kHasIndices);
REGEXP_FLAG_ASSERT_EQ(kLinear);
REGEXP_FLAG_ASSERT_EQ(kUnicodeSets);
#undef REGEXP_FLAG_ASSERT_EQ
v8::RegExp::Flags v8::RegExp::GetFlags() const {
......
......@@ -1100,6 +1100,10 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
"linear",
ExternalReference::address_of_enable_experimental_regexp_engine(),
JSRegExp::kLinear);
CASE_FOR_FLAG(
"unicodeSets",
ExternalReference::address_of_FLAG_harmony_regexp_unicode_sets(),
JSRegExp::kUnicodeSets);
#undef CASE_FOR_FLAG
}
......
......@@ -200,7 +200,8 @@ extern enum Flag constexpr 'JSRegExp::Flag' {
kUnicode,
kDotAll,
kHasIndices,
kLinear
kLinear,
kUnicodeSets
}
const kNoCounterFlagGetter: constexpr int31 = -1;
......@@ -296,6 +297,15 @@ transitioning javascript builtin RegExpPrototypeUnicodeGetter(
'RegExp.prototype.unicode');
}
// ES2023 22.2.5.14
// ES #sec-get-regexp.prototype.unicodeSets
transitioning javascript builtin RegExpPrototypeUnicodeSetsGetter(
js-implicit context: NativeContext, receiver: JSAny)(): JSAny {
return FlagGetter(
receiver, Flag::kUnicodeSets, kNoCounterFlagGetter,
'RegExp.prototype.unicodeSets');
}
extern transitioning macro
RegExpBuiltinsAssembler::FlagsGetter(implicit context: Context)(
Object, constexpr bool): String;
......
......@@ -575,6 +575,11 @@ ExternalReference::address_of_mock_arraybuffer_allocator_flag() {
return ExternalReference(&v8_flags.mock_arraybuffer_allocator);
}
ExternalReference
ExternalReference::address_of_FLAG_harmony_regexp_unicode_sets() {
return ExternalReference(&v8_flags.harmony_regexp_unicode_sets);
}
// TODO(jgruber): Update the other extrefs pointing at v8_flags. addresses to be
// called address_of_FLAG_foo (easier grep-ability).
ExternalReference ExternalReference::address_of_FLAG_trace_osr() {
......
......@@ -96,6 +96,8 @@ class StatsCounter;
#define EXTERNAL_REFERENCE_LIST(V) \
V(abort_with_reason, "abort_with_reason") \
V(address_of_FLAG_harmony_regexp_unicode_sets, \
"v8_flags.harmony_regexp_unicdoe_sets") \
V(address_of_FLAG_trace_osr, "v8_flags.trace_osr") \
V(address_of_builtin_subclassing_flag, "v8_flags.builtin_subclassing") \
V(address_of_double_abs_constant, "double_absolute_constant") \
......
......@@ -229,7 +229,8 @@ DEFINE_BOOL(harmony_shipping, true, "enable all shipped harmony features")
V(harmony_temporal, "Temporal") \
V(harmony_shadow_realm, "harmony ShadowRealm") \
V(harmony_struct, "harmony structs, shared structs, and shared arrays") \
V(harmony_change_array_by_copy, "harmony change-Array-by-copy")
V(harmony_change_array_by_copy, "harmony change-Array-by-copy") \
V(harmony_regexp_unicode_sets, "harmony RegExp Unicode Sets")
#ifdef V8_INTL_SUPPORT
#define HARMONY_INPROGRESS(V) \
......
......@@ -4551,6 +4551,20 @@ void Genesis::InitializeGlobal_harmony_change_array_by_copy() {
}
}
void Genesis::InitializeGlobal_harmony_regexp_unicode_sets() {
if (!FLAG_harmony_regexp_unicode_sets) return;
Handle<JSFunction> regexp_fun(native_context()->regexp_function(), isolate());
Handle<JSObject> regexp_prototype(
JSObject::cast(regexp_fun->instance_prototype()), isolate());
SimpleInstallGetter(isolate(), regexp_prototype,
factory()->unicodeSets_string(),
Builtin::kRegExpPrototypeUnicodeSetsGetter, true);
// Store regexp prototype map again after change.
native_context()->set_regexp_prototype_map(regexp_prototype->map());
}
void Genesis::InitializeGlobal_harmony_shadow_realm() {
if (!FLAG_harmony_shadow_realm) return;
Factory* factory = isolate()->factory();
......
......@@ -420,6 +420,7 @@
V(_, undefined_string, "undefined") \
V(_, undefined_to_string, "[object Undefined]") \
V(_, unicode_string, "unicode") \
V(_, unicodeSets_string, "unicodeSets") \
V(_, unit_string, "unit") \
V(_, URIError_string, "URIError") \
V(_, UTC_string, "UTC") \
......
......@@ -233,7 +233,8 @@ MaybeHandle<JSRegExp> JSRegExp::Initialize(Handle<JSRegExp> regexp,
Isolate* isolate = regexp->GetIsolate();
base::Optional<Flags> flags =
JSRegExp::FlagsFromString(isolate, flags_string);
if (!flags.has_value()) {
if (!flags.has_value() ||
!RegExp::VerifyFlags(JSRegExp::AsRegExpFlags(flags.value()))) {
THROW_NEW_ERROR(
isolate,
NewSyntaxError(MessageTemplate::kInvalidRegExpFlags, flags_string),
......
......@@ -99,6 +99,10 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
!v8_flags.enable_experimental_regexp_engine) {
return {};
}
if (f.value() == RegExpFlag::kUnicodeSets &&
!FLAG_harmony_regexp_unicode_sets) {
return {};
}
return f;
}
......
......@@ -11,6 +11,7 @@ bitfield struct JSRegExpFlags extends uint31 {
dot_all: bool: 1 bit;
linear: bool: 1 bit;
has_indices: bool: 1 bit;
unicode_sets: bool: 1 bit;
}
extern class JSRegExp extends JSObject {
......
......@@ -1156,6 +1156,7 @@ class ParserBase {
const AstRawString* GetNextSymbolForRegExpLiteral() const {
return scanner()->NextSymbol(ast_value_factory());
}
bool ValidateRegExpFlags(RegExpFlags flags);
bool ValidateRegExpLiteral(const AstRawString* pattern, RegExpFlags flags,
RegExpError* regexp_error);
ExpressionT ParseRegExpLiteral();
......@@ -1796,6 +1797,11 @@ ParserBase<Impl>::ParsePropertyOrPrivatePropertyName() {
return key;
}
template <typename Impl>
bool ParserBase<Impl>::ValidateRegExpFlags(RegExpFlags flags) {
return RegExp::VerifyFlags(flags);
}
template <typename Impl>
bool ParserBase<Impl>::ValidateRegExpLiteral(const AstRawString* pattern,
RegExpFlags flags,
......@@ -1827,7 +1833,7 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() {
const AstRawString* js_pattern = GetNextSymbolForRegExpLiteral();
base::Optional<RegExpFlags> flags = scanner()->ScanRegExpFlags();
if (!flags.has_value()) {
if (!flags.has_value() || !ValidateRegExpFlags(flags.value())) {
Next();
ReportMessage(MessageTemplate::kMalformedRegExpFlags);
return impl()->FailureExpression();
......
......@@ -17,14 +17,15 @@ namespace internal {
// Order is important! Sorted in alphabetic order by the flag char. Note this
// means that flag bits are shuffled. Take care to keep them contiguous when
// adding/removing flags.
#define REGEXP_FLAG_LIST(V) \
V(has_indices, HasIndices, hasIndices, 'd', 7) \
V(global, Global, global, 'g', 0) \
V(ignore_case, IgnoreCase, ignoreCase, 'i', 1) \
V(linear, Linear, linear, 'l', 6) \
V(multiline, Multiline, multiline, 'm', 2) \
V(dot_all, DotAll, dotAll, 's', 5) \
V(unicode, Unicode, unicode, 'u', 4) \
#define REGEXP_FLAG_LIST(V) \
V(has_indices, HasIndices, hasIndices, 'd', 7) \
V(global, Global, global, 'g', 0) \
V(ignore_case, IgnoreCase, ignoreCase, 'i', 1) \
V(linear, Linear, linear, 'l', 6) \
V(multiline, Multiline, multiline, 'm', 2) \
V(dot_all, DotAll, dotAll, 's', 5) \
V(unicode, Unicode, unicode, 'u', 4) \
V(unicode_sets, UnicodeSets, unicodeSets, 'v', 8) \
V(sticky, Sticky, sticky, 'y', 3)
#define V(Lower, Camel, LowerCamel, Char, Bit) k##Camel = 1 << Bit,
......
......@@ -107,6 +107,12 @@ bool RegExp::CanGenerateBytecode() {
return v8_flags.regexp_interpret_all || v8_flags.regexp_tier_up;
}
// static
bool RegExp::VerifyFlags(RegExpFlags flags) {
if (IsUnicode(flags) && IsUnicodeSets(flags)) return false;
return true;
}
// static
template <class CharT>
bool RegExp::VerifySyntax(Zone* zone, uintptr_t stack_limit, const CharT* input,
......
......@@ -70,6 +70,9 @@ class RegExp final : public AllStatic {
// Whether the irregexp engine generates interpreter bytecode.
static bool CanGenerateBytecode();
// Verify that the given flags combination is valid.
V8_EXPORT_PRIVATE static bool VerifyFlags(RegExpFlags flags);
// Verify the given pattern, i.e. check that parsing succeeds. If
// verification fails, `regexp_error_out` is set.
template <class CharT>
......
......@@ -15,6 +15,7 @@
#include "include/v8-primitive.h"
#include "include/v8-script.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp.h"
#include "test/fuzzer/fuzzer-support.h"
// This is a hexdump of test/fuzzer/regexp_builtins/mjsunit.js generated using
......@@ -242,18 +243,31 @@ std::string PickLimitForSplit(FuzzerArgs* args) {
}
std::string GenerateRandomFlags(FuzzerArgs* args) {
constexpr int kFlagCount = JSRegExp::kFlagCount;
static_assert((1 << kFlagCount) - 1 <= 0xFFFF);
// TODO(mbid,v8:10765): Find a way to generate the kLinear flag sometimes,
// but only for patterns that are supported by the experimental engine.
constexpr size_t kFlagCount = JSRegExp::kFlagCount;
CHECK_EQ(JSRegExp::kHasIndices, 1 << (kFlagCount - 1));
CHECK_EQ(JSRegExp::kLinear, 1 << (kFlagCount - 2));
CHECK_EQ(JSRegExp::kDotAll, 1 << (kFlagCount - 3));
static_assert((1 << kFlagCount) - 1 <= 0xFF);
constexpr int kFuzzableFlagCount = kFlagCount - 1;
constexpr uint32_t kFuzzableFlagsMask =
((1 << kFlagCount) - 1) & (~JSRegExp::kLinear);
const uint8_t byte1 = RandomByte(args);
const uint8_t byte2 = RandomByte(args);
const uint16_t random_two_byte = (byte1 << 8) | byte2;
const size_t flags = RandomByte(args) & ((1 << kFlagCount) - 1);
uint32_t flags = random_two_byte & kFuzzableFlagsMask;
int cursor = 0;
char buffer[kFlagCount] = {'\0'};
char buffer[kFuzzableFlagCount] = {'\0'};
// 'u' and 'v' are incompatible. If both are set randomly, clear
// one based on the random bit of the (unused) JSRegExp::kLinar flag.
if ((flags & JSRegExp::kUnicode) && (flags & JSRegExp::kUnicodeSets)) {
const bool rand_bit = random_two_byte & JSRegExp::kLinear;
flags &= rand_bit ? ~JSRegExp::kUnicode : ~JSRegExp::kUnicodeSets;
}
DCHECK(RegExp::VerifyFlags(RegExpFlags{static_cast<int>(flags)}));
if (flags & JSRegExp::kGlobal) buffer[cursor++] = 'g';
if (flags & JSRegExp::kIgnoreCase) buffer[cursor++] = 'i';
......@@ -261,7 +275,9 @@ std::string GenerateRandomFlags(FuzzerArgs* args) {
if (flags & JSRegExp::kSticky) buffer[cursor++] = 'y';
if (flags & JSRegExp::kUnicode) buffer[cursor++] = 'u';
if (flags & JSRegExp::kDotAll) buffer[cursor++] = 's';
CHECK_EQ(flags & JSRegExp::kLinear, 0);
if (flags & JSRegExp::kHasIndices) buffer[cursor++] = 'd';
if (flags & JSRegExp::kUnicodeSets) buffer[cursor++] = 'v';
return std::string(buffer, cursor);
}
......
This diff is collapsed.
......@@ -41,17 +41,18 @@ from testrunner.outproc import test262
# TODO(littledan): move the flag mapping into the status file
FEATURE_FLAGS = {
'Intl.NumberFormat-v3': '--harmony_intl_number_format_v3',
'Symbol.prototype.description': '--harmony-symbol-description',
'FinalizationRegistry': '--harmony-weak-refs-with-cleanup-some',
'WeakRef': '--harmony-weak-refs-with-cleanup-some',
'host-gc-required': '--expose-gc-as=v8GC',
'IsHTMLDDA': '--allow-natives-syntax',
'import-assertions': '--harmony-import-assertions',
'resizable-arraybuffer': '--harmony-rab-gsab',
'Temporal': '--harmony-temporal',
'array-find-from-last': '--harmony_array_find_last',
'ShadowRealm': '--harmony-shadow-realm',
'Intl.NumberFormat-v3': '--harmony_intl_number_format_v3',
'Symbol.prototype.description': '--harmony-symbol-description',
'FinalizationRegistry': '--harmony-weak-refs-with-cleanup-some',
'WeakRef': '--harmony-weak-refs-with-cleanup-some',
'host-gc-required': '--expose-gc-as=v8GC',
'IsHTMLDDA': '--allow-natives-syntax',
'import-assertions': '--harmony-import-assertions',
'resizable-arraybuffer': '--harmony-rab-gsab',
'Temporal': '--harmony-temporal',
'array-find-from-last': '--harmony_array_find_last',
'ShadowRealm': '--harmony-shadow-realm',
'regexp-v-flag': '--harmony-regexp-unicode-sets',
}
SKIPPED_FEATURES = set([])
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment