Commit ff5a3e3d authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[scanner] Use JSRegExp::Flags instead of v8::RegExp::Flags

There's no reason to use the API RegExp type instead of the internal
JSRegExp type. In fact, the parsed flags end up in
Runtime_CreateRegExpLiteral, which assumes them to be of type
JSRegExp::Flags.

Drive-by: Additional asserts and helper functions in JSRegExp.

Bug: v8:9359
Change-Id: I5c12aba7d4e39a4891fb23d8b47c55fc480a28d9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1667004Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Reviewed-by: 's avatarYang Guo <yangguo@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62376}
parent 2a440bb2
......@@ -5313,6 +5313,8 @@ class V8_EXPORT RegExp : public Object {
kDotAll = 1 << 5,
};
static constexpr int kFlagCount = 6;
/**
* Creates a regular expression from the given pattern string and
* the flags bit field. May throw a JavaScript exception as
......
......@@ -1540,7 +1540,8 @@ TNode<Int32T> RegExpBuiltinsAssembler::FastFlagGetter(TNode<JSRegExp> regexp,
JSRegExp::Flag flag) {
TNode<Smi> flags = CAST(LoadObjectField(regexp, JSRegExp::kFlagsOffset));
TNode<Smi> mask = SmiConstant(flag);
return SmiToInt32(SmiShr(SmiAnd(flags, mask), JSRegExp::FlagShiftBits(flag)));
return SmiToInt32(SmiShr(SmiAnd(flags, mask), base::bits::CountTrailingZeros(
static_cast<int>(flag))));
}
// Load through the GetProperty stub.
......
......@@ -37,13 +37,13 @@ class JSRegExp : public JSObject {
// IRREGEXP: Compiled with Irregexp.
enum Type { NOT_COMPILED, ATOM, IRREGEXP };
struct FlagShiftBit {
static const int kGlobal = 0;
static const int kIgnoreCase = 1;
static const int kMultiline = 2;
static const int kSticky = 3;
static const int kUnicode = 4;
static const int kDotAll = 5;
static const int kInvalid = 7;
static constexpr int kGlobal = 0;
static constexpr int kIgnoreCase = 1;
static constexpr int kMultiline = 2;
static constexpr int kSticky = 3;
static constexpr int kUnicode = 4;
static constexpr int kDotAll = 5;
static constexpr int kInvalid = 6;
};
enum Flag : uint8_t {
kNone = 0,
......@@ -57,28 +57,31 @@ class JSRegExp : public JSObject {
kInvalid = 1 << FlagShiftBit::kInvalid, // Not included in FlagCount.
};
using Flags = base::Flags<Flag>;
static constexpr int FlagCount() { return 6; }
static int FlagShiftBits(Flag flag) {
switch (flag) {
case kGlobal:
return FlagShiftBit::kGlobal;
case kIgnoreCase:
return FlagShiftBit::kIgnoreCase;
case kMultiline:
return FlagShiftBit::kMultiline;
case kSticky:
return FlagShiftBit::kSticky;
case kUnicode:
return FlagShiftBit::kUnicode;
case kDotAll:
return FlagShiftBit::kDotAll;
default:
STATIC_ASSERT(FlagCount() == 6);
UNREACHABLE();
}
static constexpr int kFlagCount = 6;
static constexpr Flag FlagFromChar(char c) {
STATIC_ASSERT(kFlagCount == 6);
// clang-format off
return c == 'g' ? kGlobal
: c == 'i' ? kIgnoreCase
: c == 'm' ? kMultiline
: c == 'y' ? kSticky
: c == 'u' ? kUnicode
: c == 's' ? kDotAll
: kInvalid;
// clang-format on
}
STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
STATIC_ASSERT(static_cast<int>(kGlobal) == v8::RegExp::kGlobal);
STATIC_ASSERT(static_cast<int>(kIgnoreCase) == v8::RegExp::kIgnoreCase);
STATIC_ASSERT(static_cast<int>(kMultiline) == v8::RegExp::kMultiline);
STATIC_ASSERT(static_cast<int>(kSticky) == v8::RegExp::kSticky);
STATIC_ASSERT(static_cast<int>(kUnicode) == v8::RegExp::kUnicode);
STATIC_ASSERT(static_cast<int>(kDotAll) == v8::RegExp::kDotAll);
STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
DECL_ACCESSORS(data, Object)
DECL_ACCESSORS(flags, Object)
DECL_ACCESSORS(last_index, Object)
......
......@@ -6072,42 +6072,14 @@ Handle<Object> JSPromise::TriggerPromiseReactions(Isolate* isolate,
namespace {
constexpr JSRegExp::Flag kCharFlagValues[] = {
JSRegExp::kGlobal, // g
JSRegExp::kInvalid, // h
JSRegExp::kIgnoreCase, // i
JSRegExp::kInvalid, // j
JSRegExp::kInvalid, // k
JSRegExp::kInvalid, // l
JSRegExp::kMultiline, // m
JSRegExp::kInvalid, // n
JSRegExp::kInvalid, // o
JSRegExp::kInvalid, // p
JSRegExp::kInvalid, // q
JSRegExp::kInvalid, // r
JSRegExp::kDotAll, // s
JSRegExp::kInvalid, // t
JSRegExp::kUnicode, // u
JSRegExp::kInvalid, // v
JSRegExp::kInvalid, // w
JSRegExp::kInvalid, // x
JSRegExp::kSticky, // y
};
constexpr JSRegExp::Flag CharToFlag(uc16 flag_char) {
return (flag_char < 'g' || flag_char > 'y')
? JSRegExp::kInvalid
: kCharFlagValues[flag_char - 'g'];
}
JSRegExp::Flags RegExpFlagsFromString(Isolate* isolate, Handle<String> flags,
bool* success) {
STATIC_ASSERT(CharToFlag('g') == JSRegExp::kGlobal);
STATIC_ASSERT(CharToFlag('i') == JSRegExp::kIgnoreCase);
STATIC_ASSERT(CharToFlag('m') == JSRegExp::kMultiline);
STATIC_ASSERT(CharToFlag('s') == JSRegExp::kDotAll);
STATIC_ASSERT(CharToFlag('u') == JSRegExp::kUnicode);
STATIC_ASSERT(CharToFlag('y') == JSRegExp::kSticky);
STATIC_ASSERT(JSRegExp::FlagFromChar('g') == JSRegExp::kGlobal);
STATIC_ASSERT(JSRegExp::FlagFromChar('i') == JSRegExp::kIgnoreCase);
STATIC_ASSERT(JSRegExp::FlagFromChar('m') == JSRegExp::kMultiline);
STATIC_ASSERT(JSRegExp::FlagFromChar('s') == JSRegExp::kDotAll);
STATIC_ASSERT(JSRegExp::FlagFromChar('u') == JSRegExp::kUnicode);
STATIC_ASSERT(JSRegExp::FlagFromChar('y') == JSRegExp::kSticky);
int length = flags->length();
if (length == 0) {
......@@ -6115,14 +6087,14 @@ JSRegExp::Flags RegExpFlagsFromString(Isolate* isolate, Handle<String> flags,
return JSRegExp::kNone;
}
// A longer flags string cannot be valid.
if (length > JSRegExp::FlagCount()) return JSRegExp::Flags(0);
if (length > JSRegExp::kFlagCount) return JSRegExp::Flags(0);
// Initialize {value} to {kInvalid} to allow 2-in-1 duplicate/invalid check.
JSRegExp::Flags value = JSRegExp::kInvalid;
if (flags->IsSeqOneByteString()) {
DisallowHeapAllocation no_gc;
SeqOneByteString seq_flags = SeqOneByteString::cast(*flags);
for (int i = 0; i < length; i++) {
JSRegExp::Flag flag = CharToFlag(seq_flags.Get(i));
JSRegExp::Flag flag = JSRegExp::FlagFromChar(seq_flags.Get(i));
// Duplicate or invalid flag.
if (value & flag) return JSRegExp::Flags(0);
value |= flag;
......@@ -6132,7 +6104,7 @@ JSRegExp::Flags RegExpFlagsFromString(Isolate* isolate, Handle<String> flags,
DisallowHeapAllocation no_gc;
String::FlatContent flags_content = flags->GetFlatContent(no_gc);
for (int i = 0; i < length; i++) {
JSRegExp::Flag flag = CharToFlag(flags_content.Get(i));
JSRegExp::Flag flag = JSRegExp::FlagFromChar(flags_content.Get(i));
// Duplicate or invalid flag.
if (value & flag) return JSRegExp::Flags(0);
value |= flag;
......
......@@ -1583,7 +1583,7 @@ MaybeHandle<JSRegExp> ValueDeserializer::ReadJSRegExp() {
// Ensure the deserialized flags are valid.
// TODO(adamk): Can we remove this check now that dotAll is always-on?
uint32_t flags_mask = static_cast<uint32_t>(-1) << JSRegExp::FlagCount();
uint32_t flags_mask = static_cast<uint32_t>(-1) << JSRegExp::kFlagCount;
if ((raw_flags & flags_mask) ||
!JSRegExp::New(isolate_, pattern, static_cast<JSRegExp::Flags>(raw_flags))
.ToHandle(&regexp)) {
......
......@@ -1595,15 +1595,14 @@ typename ParserBase<Impl>::ExpressionT ParserBase<Impl>::ParseRegExpLiteral() {
}
IdentifierT js_pattern = impl()->GetNextSymbol();
Maybe<v8::RegExp::Flags> flags = scanner()->ScanRegExpFlags();
Maybe<int> flags = scanner()->ScanRegExpFlags();
if (flags.IsNothing()) {
Next();
ReportMessage(MessageTemplate::kMalformedRegExpFlags);
return impl()->FailureExpression();
}
int js_flags = flags.FromJust();
Next();
return factory()->NewRegExpLiteral(js_pattern, js_flags, pos);
return factory()->NewRegExpLiteral(js_pattern, flags.FromJust(), pos);
}
template <typename Impl>
......
......@@ -1004,44 +1004,21 @@ bool Scanner::ScanRegExpPattern() {
return true;
}
Maybe<v8::RegExp::Flags> Scanner::ScanRegExpFlags() {
Maybe<int> Scanner::ScanRegExpFlags() {
DCHECK_EQ(Token::REGEXP_LITERAL, next().token);
// Scan regular expression flags.
int flags = 0;
JSRegExp::Flags flags;
while (IsIdentifierPart(c0_)) {
v8::RegExp::Flags flag = v8::RegExp::kNone;
switch (c0_) {
case 'g':
flag = v8::RegExp::kGlobal;
break;
case 'i':
flag = v8::RegExp::kIgnoreCase;
break;
case 'm':
flag = v8::RegExp::kMultiline;
break;
case 's':
flag = v8::RegExp::kDotAll;
break;
case 'u':
flag = v8::RegExp::kUnicode;
break;
case 'y':
flag = v8::RegExp::kSticky;
break;
default:
return Nothing<v8::RegExp::Flags>();
}
if (flags & flag) {
return Nothing<v8::RegExp::Flags>();
}
JSRegExp::Flags flag = JSRegExp::FlagFromChar(c0_);
if (flag == JSRegExp::kInvalid) return Nothing<int>();
if (flags & flag) return Nothing<int>();
Advance();
flags |= flag;
}
next().location.end_pos = source_pos();
return Just(v8::RegExp::Flags(flags));
return Just<int>(flags);
}
const AstRawString* Scanner::CurrentSymbol(
......
......@@ -392,7 +392,7 @@ class V8_EXPORT_PRIVATE Scanner {
// Returns true if a pattern is scanned.
bool ScanRegExpPattern();
// Scans the input as regular expression flags. Returns the flags on success.
Maybe<v8::RegExp::Flags> ScanRegExpFlags();
Maybe<int> ScanRegExpFlags();
// Scans the input as a template literal
Token::Value ScanTemplateContinuation() {
......
......@@ -242,7 +242,7 @@ std::string PickLimitForSplit(FuzzerArgs* args) {
}
std::string GenerateRandomFlags(FuzzerArgs* args) {
constexpr size_t kFlagCount = JSRegExp::FlagCount();
constexpr size_t kFlagCount = JSRegExp::kFlagCount;
CHECK_EQ(JSRegExp::kDotAll, 1 << (kFlagCount - 1));
STATIC_ASSERT((1 << kFlagCount) - 1 < 0xFF);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment