Commit add69092 authored by Vicky Kontoura's avatar Vicky Kontoura Committed by V8 LUCI CQ

[web snapshot] Support RegExp

This CL adds support for regular expressions. To serialize a regular
expression, the pattern and flags are extracted and serialized as
strings.

Also, JSRegExp::StringFromFlags() is introduced to allow for
transforming back from JSRegExp::Flags to the corresponding flag string.
To verify that this implementation is on par with
RegExp.prototype.flags, unittests are introduced under
regexp/regexp-unittest.cc and RegExpBuiltinsAssembler::FlagsGetter()
is updated to include a slow path that calls JSRegExp::StringFromFlags()
through a runtime function.

Bug: v8:11525, v8:11706
Change-Id: I9cad4c464129ba1cbf64672130d8410730d7d679
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2878751
Commit-Queue: Vicky Kontoura <vkont@google.com>
Reviewed-by: 's avatarCamillo Bruni <cbruni@chromium.org>
Reviewed-by: 's avatarMarja Hölttä <marja@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74566}
parent cecf349b
......@@ -1014,6 +1014,12 @@ TF_BUILTIN(RegExpExecInternal, RegExpBuiltinsAssembler) {
TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
TNode<Object> regexp,
bool is_fastpath) {
TVARIABLE(String, result);
Label runtime(this, Label::kDeferred), done(this, &result);
if (is_fastpath) {
GotoIfForceSlowPath(&runtime);
}
Isolate* isolate = this->isolate();
const TNode<IntPtrT> int_one = IntPtrConstant(1);
......@@ -1110,7 +1116,7 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
// corresponding char for each set flag.
{
const TNode<String> result = AllocateSeqOneByteString(var_length.value());
const TNode<String> string = AllocateSeqOneByteString(var_length.value());
TVARIABLE(IntPtrT, var_offset,
IntPtrConstant(SeqOneByteString::kHeaderSize - kHeapObjectTag));
......@@ -1120,7 +1126,7 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
Label next(this); \
GotoIfNot(IsSetWord(var_flags.value(), FLAG), &next); \
const TNode<Int32T> value = Int32Constant(CHAR); \
StoreNoWriteBarrier(MachineRepresentation::kWord8, result, \
StoreNoWriteBarrier(MachineRepresentation::kWord8, string, \
var_offset.value(), value); \
var_offset = IntPtrAdd(var_offset.value(), int_one); \
Goto(&next); \
......@@ -1137,7 +1143,26 @@ TNode<String> RegExpBuiltinsAssembler::FlagsGetter(TNode<Context> context,
CASE_FOR_FLAG(JSRegExp::kSticky, 'y');
#undef CASE_FOR_FLAG
return result;
if (is_fastpath) {
#ifdef V8_ENABLE_FORCE_SLOW_PATH
result = string;
Goto(&done);
BIND(&runtime);
{
result =
CAST(CallRuntime(Runtime::kRegExpStringFromFlags, context, regexp));
Goto(&done);
}
BIND(&done);
return result.value();
#else
return string;
#endif
} else {
return string;
}
}
}
......
......@@ -150,6 +150,27 @@ JSRegExp::Flags JSRegExp::FlagsFromString(Isolate* isolate,
return value;
}
// static
Handle<String> JSRegExp::StringFromFlags(Isolate* isolate,
JSRegExp::Flags flags) {
// Ensure that this function is up-to-date with the supported flag options.
constexpr size_t kFlagCount = JSRegExp::kFlagCount;
STATIC_ASSERT(kFlagCount == 8);
// Translate to the lexicographically smaller string.
int cursor = 0;
char buffer[kFlagCount] = {'\0'};
if (flags & JSRegExp::kHasIndices) buffer[cursor++] = 'd';
if (flags & JSRegExp::kGlobal) buffer[cursor++] = 'g';
if (flags & JSRegExp::kIgnoreCase) buffer[cursor++] = 'i';
if (flags & JSRegExp::kLinear) buffer[cursor++] = 'l';
if (flags & JSRegExp::kMultiline) buffer[cursor++] = 'm';
if (flags & JSRegExp::kDotAll) buffer[cursor++] = 's';
if (flags & JSRegExp::kUnicode) buffer[cursor++] = 'u';
if (flags & JSRegExp::kSticky) buffer[cursor++] = 'y';
return isolate->factory()->NewStringFromAsciiChecked(buffer);
}
// static
MaybeHandle<JSRegExp> JSRegExp::New(Isolate* isolate, Handle<String> pattern,
Flags flags, uint32_t backtrack_limit) {
......
......@@ -90,6 +90,9 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
static Flags FlagsFromString(Isolate* isolate, Handle<String> flags,
bool* success);
V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate,
Flags flags);
bool CanTierUp();
bool MarkedForTierUp();
void ResetLastTierUpTick();
......
......@@ -2004,5 +2004,13 @@ RUNTIME_FUNCTION(Runtime_IsRegExp) {
return isolate->heap()->ToBoolean(obj.IsJSRegExp());
}
RUNTIME_FUNCTION(Runtime_RegExpStringFromFlags) {
HandleScope scope(isolate);
DCHECK_EQ(1, args.length());
CONVERT_ARG_CHECKED(JSRegExp, regexp, 0);
Handle<String> flags = JSRegExp::StringFromFlags(isolate, regexp.GetFlags());
return *flags;
}
} // namespace internal
} // namespace v8
......@@ -407,6 +407,7 @@ namespace internal {
F(RegExpInitializeAndCompile, 3, 1) \
F(RegExpReplaceRT, 3, 1) \
F(RegExpSplit, 3, 1) \
F(RegExpStringFromFlags, 1, 1) \
F(StringReplaceNonGlobalRegExpWithFunction, 3, 1) \
F(StringSplit, 3, 1)
......
......@@ -11,6 +11,7 @@
#include "src/base/platform/wrappers.h"
#include "src/handles/handles.h"
#include "src/objects/contexts.h"
#include "src/objects/js-regexp-inl.h"
#include "src/objects/script.h"
namespace v8 {
......@@ -429,6 +430,23 @@ void WebSnapshotSerializer::WriteValue(Handle<Object> object,
serializer.WriteUint32(ValueType::OBJECT_ID);
serializer.WriteUint32(id);
break;
case JS_REG_EXP_TYPE: {
Handle<JSRegExp> regexp = Handle<JSRegExp>::cast(object);
if (regexp->map() != isolate_->regexp_function()->initial_map()) {
Throw("Web snapshot: Unsupported RegExp map");
return;
}
uint32_t pattern_id, flags_id;
Handle<String> pattern = handle(regexp->Pattern(), isolate_);
Handle<String> flags_string =
JSRegExp::StringFromFlags(isolate_, regexp->GetFlags());
SerializeString(pattern, pattern_id);
SerializeString(flags_string, flags_id);
serializer.WriteUint32(ValueType::REGEXP);
serializer.WriteUint32(pattern_id);
serializer.WriteUint32(flags_id);
break;
}
default:
if (object->IsString()) {
SerializeString(Handle<String>::cast(object), id);
......@@ -928,6 +946,25 @@ void WebSnapshotDeserializer::ReadValue(Handle<Object>& value,
value = handle(functions_->get(function_id), isolate_);
representation = Representation::Tagged();
break;
case ValueType::REGEXP: {
Handle<String> pattern = ReadString(false);
Handle<String> flags_string = ReadString(false);
bool success = false;
JSRegExp::Flags flags =
JSRegExp::FlagsFromString(isolate_, flags_string, &success);
if (!success) {
Throw("Web snapshot: Malformed flags in regular expression");
return;
}
MaybeHandle<JSRegExp> maybe_regexp =
JSRegExp::New(isolate_, pattern, flags);
if (!maybe_regexp.ToHandle(&value)) {
Throw("Web snapshot: Malformed RegExp");
return;
}
representation = Representation::Tagged();
break;
}
default:
// TODO(v8:11525): Handle other value types.
Throw("Web snapshot: Unsupported value type");
......
......@@ -47,7 +47,8 @@ class WebSnapshotSerializerDeserializer {
DOUBLE,
STRING_ID,
OBJECT_ID,
FUNCTION_ID
FUNCTION_ID,
REGEXP
};
// The maximum count of items for each value type (strings, objects etc.)
......
......@@ -229,6 +229,64 @@ TEST(InnerFunctionWithContextAndParentContext) {
kMapCount, kContextCount, kFunctionCount, kObjectCount);
}
TEST(RegExp) {
const char* snapshot_source = "var foo = {'re': /ab+c/gi}";
const char* test_source = "foo";
uint32_t kStringCount = 4; // 'foo', 're', RegExp pattern, RegExp flags
uint32_t kMapCount = 1;
uint32_t kContextCount = 0;
uint32_t kFunctionCount = 0;
uint32_t kObjectCount = 1;
std::function<void(v8::Isolate*, v8::Local<v8::Context>)> tester =
[test_source](v8::Isolate* isolate, v8::Local<v8::Context> new_context) {
v8::Local<v8::Object> result = CompileRun(test_source).As<v8::Object>();
Local<v8::RegExp> re = result->Get(new_context, v8_str("re"))
.ToLocalChecked()
.As<v8::RegExp>();
CHECK(re->IsRegExp());
CHECK(re->GetSource()->Equals(new_context, v8_str("ab+c")).FromJust());
CHECK_EQ(v8::RegExp::kGlobal | v8::RegExp::kIgnoreCase, re->GetFlags());
v8::Local<v8::Object> match =
re->Exec(new_context, v8_str("aBc")).ToLocalChecked();
CHECK(match->IsArray());
v8::Local<v8::Object> no_match =
re->Exec(new_context, v8_str("ac")).ToLocalChecked();
CHECK(no_match->IsNull());
};
TestWebSnapshotExtensive(snapshot_source, test_source, tester, kStringCount,
kMapCount, kContextCount, kFunctionCount,
kObjectCount);
}
TEST(RegExpNoFlags) {
const char* snapshot_source = "var foo = {'re': /ab+c/}";
const char* test_source = "foo";
uint32_t kStringCount = 4; // 'foo', 're', RegExp pattern, RegExp flags
uint32_t kMapCount = 1;
uint32_t kContextCount = 0;
uint32_t kFunctionCount = 0;
uint32_t kObjectCount = 1;
std::function<void(v8::Isolate*, v8::Local<v8::Context>)> tester =
[test_source](v8::Isolate* isolate, v8::Local<v8::Context> new_context) {
v8::Local<v8::Object> result = CompileRun(test_source).As<v8::Object>();
Local<v8::RegExp> re = result->Get(new_context, v8_str("re"))
.ToLocalChecked()
.As<v8::RegExp>();
CHECK(re->IsRegExp());
CHECK(re->GetSource()->Equals(new_context, v8_str("ab+c")).FromJust());
CHECK_EQ(v8::RegExp::kNone, re->GetFlags());
v8::Local<v8::Object> match =
re->Exec(new_context, v8_str("abc")).ToLocalChecked();
CHECK(match->IsArray());
v8::Local<v8::Object> no_match =
re->Exec(new_context, v8_str("ac")).ToLocalChecked();
CHECK(no_match->IsNull());
};
TestWebSnapshotExtensive(snapshot_source, test_source, tester, kStringCount,
kMapCount, kContextCount, kFunctionCount,
kObjectCount);
}
TEST(SFIDeduplication) {
CcTest::InitializeVM();
v8::Isolate* isolate = CcTest::isolate();
......
......@@ -362,6 +362,7 @@ v8_source_set("unittests_sources") {
"parser/ast-value-unittest.cc",
"parser/preparser-unittest.cc",
"profiler/strings-storage-unittest.cc",
"regexp/regexp-unittest.cc",
"regress/regress-crbug-1041240-unittest.cc",
"regress/regress-crbug-1056054-unittest.cc",
"regress/regress-crbug-938251-unittest.cc",
......
// Copyright 2021 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/objects/js-regexp-inl.h"
#include "test/unittests/test-utils.h"
namespace v8 {
namespace internal {
TEST_F(TestWithNativeContext, ConvertRegExpFlagsToString) {
RunJS("let regexp = new RegExp(/ab+c/ig);");
Handle<JSRegExp> regexp = RunJS<JSRegExp>("regexp");
Handle<String> flags = RunJS<String>("regexp.flags");
Handle<String> converted_flags =
JSRegExp::StringFromFlags(isolate(), regexp->GetFlags());
EXPECT_TRUE(String::Equals(isolate(), flags, converted_flags));
}
TEST_F(TestWithNativeContext, ConvertRegExpFlagsToStringNoFlags) {
RunJS("let regexp = new RegExp(/ab+c/);");
Handle<JSRegExp> regexp = RunJS<JSRegExp>("regexp");
Handle<String> flags = RunJS<String>("regexp.flags");
Handle<String> converted_flags =
JSRegExp::StringFromFlags(isolate(), regexp->GetFlags());
EXPECT_TRUE(String::Equals(isolate(), flags, converted_flags));
}
TEST_F(TestWithNativeContext, ConvertRegExpFlagsToStringAllFlags) {
RunJS("let regexp = new RegExp(/ab+c/dgimsuy);");
Handle<JSRegExp> regexp = RunJS<JSRegExp>("regexp");
Handle<String> flags = RunJS<String>("regexp.flags");
Handle<String> converted_flags =
JSRegExp::StringFromFlags(isolate(), regexp->GetFlags());
EXPECT_TRUE(String::Equals(isolate(), flags, converted_flags));
}
} // namespace internal
} // namespace v8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment