Commit 4bf5e119 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[regexp] Implement interrupt support in the interpreter

... similar to how we do this in native irregexp code, i.e. handle
interrupts on each backtrack. Unhandlified references into the code
ByteArray and the subject String object are updated after a potential
GC.

Since interrupts may change the subject string's representation, the
interpreter is now called in a loop to handle retries.

Bug: v8:8724
Change-Id: Ic34de8d69ccc56d4656b8ed080c2c168c212ebfc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1511477
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60187}
parent d5f08e4e
......@@ -147,13 +147,76 @@ class BacktrackStack {
DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
};
namespace {
// Runs all pending interrupts. Callers must update unhandlified object
// references after this function completes.
IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate,
Handle<String> subject_string) {
DisallowHeapAllocation no_gc;
StackLimitCheck check(isolate);
if (check.JsHasOverflowed()) {
// A real stack overflow.
AllowHeapAllocation yes_gc;
isolate->StackOverflow();
return IrregexpInterpreter::EXCEPTION;
}
const bool was_one_byte =
String::IsOneByteRepresentationUnderneath(*subject_string);
Object result;
{
AllowHeapAllocation yes_gc;
result = isolate->stack_guard()->HandleInterrupts();
}
if (result->IsException(isolate)) {
return IrregexpInterpreter::EXCEPTION;
}
// If we changed between a LATIN1 and a UC16 string, we need to restart
// regexp matching with the appropriate template instantiation of RawMatch.
if (String::IsOneByteRepresentationUnderneath(*subject_string) !=
was_one_byte) {
return IrregexpInterpreter::RETRY;
}
return IrregexpInterpreter::SUCCESS;
}
template <typename Char>
static IrregexpInterpreter::Result RawMatch(Isolate* isolate,
const byte* code_base,
Vector<const Char> subject,
int* registers, int current,
uint32_t current_char) {
const byte* pc = code_base;
void UpdateCodeAndSubjectReferences(Isolate* isolate,
Handle<ByteArray> code_array,
Handle<String> subject_string,
const byte** code_base_out,
const byte** pc_out,
Vector<const Char>* subject_string_out) {
DisallowHeapAllocation no_gc;
if (*code_base_out != code_array->GetDataStartAddress()) {
const intptr_t pc_offset = *pc_out - *code_base_out;
DCHECK_GT(pc_offset, 0);
*code_base_out = code_array->GetDataStartAddress();
*pc_out = *code_base_out + pc_offset;
}
DCHECK(subject_string->IsFlat());
*subject_string_out = subject_string->GetCharVector<Char>(no_gc);
}
template <typename Char>
IrregexpInterpreter::Result RawMatch(Isolate* isolate,
Handle<ByteArray> code_array,
Handle<String> subject_string,
Vector<const Char> subject, int* registers,
int current, uint32_t current_char) {
DisallowHeapAllocation no_gc;
const byte* pc = code_array->GetDataStartAddress();
const byte* code_base = pc;
// BacktrackStack ensures that the memory allocated for the backtracking stack
// is returned to the system or cached if there is no stack being cached at
// the moment.
......@@ -228,12 +291,21 @@ static IrregexpInterpreter::Result RawMatch(Isolate* isolate,
current = *backtrack_sp;
pc += BC_POP_CP_LENGTH;
break;
BYTECODE(POP_BT)
// clang-format off
BYTECODE(POP_BT) {
IrregexpInterpreter::Result return_code = HandleInterrupts(
isolate, subject_string);
if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
&code_base, &pc, &subject);
backtrack_stack_space++;
--backtrack_sp;
pc = code_base + *backtrack_sp;
break;
BYTECODE(POP_REGISTER)
}
BYTECODE(POP_REGISTER) // clang-format on
backtrack_stack_space++;
--backtrack_sp;
registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
......@@ -585,34 +657,28 @@ static IrregexpInterpreter::Result RawMatch(Isolate* isolate,
}
}
} // namespace
// static
IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, Handle<ByteArray> code_array, Handle<String> subject,
int* registers, int start_position) {
DCHECK(subject->IsFlat());
Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject_string, int* registers, int start_position) {
DCHECK(subject_string->IsFlat());
DisallowHeapAllocation no_gc;
const byte* code_base = code_array->GetDataStartAddress();
uc16 previous_char = '\n';
String::FlatContent subject_content = subject->GetFlatContent(no_gc);
String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
if (subject_content.IsOneByte()) {
Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate,
code_base,
subject_vector,
registers,
start_position,
previous_char);
return RawMatch(isolate, code_array, subject_string, subject_vector,
registers, start_position, previous_char);
} else {
DCHECK(subject_content.IsTwoByte());
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate,
code_base,
subject_vector,
registers,
start_position,
previous_char);
return RawMatch(isolate, code_array, subject_string, subject_vector,
registers, start_position, previous_char);
}
}
......
......@@ -19,8 +19,9 @@ class IrregexpInterpreter {
STATIC_ASSERT(FAILURE == static_cast<int>(RegExpImpl::RE_FAILURE));
STATIC_ASSERT(SUCCESS == static_cast<int>(RegExpImpl::RE_SUCCESS));
// The caller is responsible for initializing registers before each call.
static Result Match(Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject, int* registers,
Handle<String> subject_string, int* registers,
int start_position);
};
......
......@@ -477,26 +477,40 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
int number_of_capture_registers =
(IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
int32_t* raw_output = &output[number_of_capture_registers];
// We do not touch the actual capture result registers until we know there
// has been a match so that we can use those capture results to set the
// last match info.
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
raw_output[i] = -1;
}
Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
isolate);
IrregexpInterpreter::Result result = IrregexpInterpreter::Match(
isolate, byte_codes, subject, raw_output, index);
DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
isolate->has_pending_exception());
if (result == IrregexpInterpreter::SUCCESS) {
// Copy capture results to the start of the registers array.
MemCopy(output, raw_output,
number_of_capture_registers * sizeof(int32_t));
}
return result;
do {
// We do not touch the actual capture result registers until we know there
// has been a match so that we can use those capture results to set the
// last match info.
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
raw_output[i] = -1;
}
Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
isolate);
IrregexpInterpreter::Result result = IrregexpInterpreter::Match(
isolate, byte_codes, subject, raw_output, index);
DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
isolate->has_pending_exception());
switch (result) {
case IrregexpInterpreter::SUCCESS:
// Copy capture results to the start of the registers array.
MemCopy(output, raw_output,
number_of_capture_registers * sizeof(int32_t));
return result;
case IrregexpInterpreter::EXCEPTION:
case IrregexpInterpreter::FAILURE:
return result;
case IrregexpInterpreter::RETRY:
// The string has changed representation, and we must restart the
// match.
is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
break;
}
} while (true);
UNREACHABLE();
}
}
......
......@@ -412,12 +412,6 @@
'test-compiler/DecideToPretenureDuringCompilation': [SKIP],
}], # variant == stress_background_compile
##############################################################################
['variant == interpreted_regexp', {
# Times out: https://crbug.com/v8/8678
'test-api/RegExpInterruption': [SKIP],
}], # variant == interpreted_regexp
##############################################################################
['variant == no_wasm_traps', {
'test-accessors/*': [SKIP],
......@@ -503,7 +497,6 @@
# Tests that generate code at runtime.
'codegen-tester/*': [SKIP],
'test-accessor-assembler/*': [SKIP],
'test-api/RegExpInterruption': [SKIP],
'test-assembler-*': [SKIP],
'test-basic-block-profiler/*': [SKIP],
'test-branch-combine/*': [SKIP],
......
......@@ -15102,81 +15102,6 @@ TEST(CompileExternalTwoByteSource) {
}
}
struct RegExpInterruptionData {
v8::base::Atomic32 loop_count;
UC16VectorResource* string_resource;
v8::Persistent<v8::String> string;
} regexp_interruption_data;
class RegExpInterruptionThread : public v8::base::Thread {
public:
explicit RegExpInterruptionThread(v8::Isolate* isolate)
: Thread(Options("TimeoutThread")), isolate_(isolate) {}
void Run() override {
for (v8::base::Relaxed_Store(&regexp_interruption_data.loop_count, 0);
v8::base::Relaxed_Load(&regexp_interruption_data.loop_count) < 7;
v8::base::Relaxed_AtomicIncrement(&regexp_interruption_data.loop_count,
1)) {
// Wait a bit before requesting GC.
v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(50));
reinterpret_cast<i::Isolate*>(isolate_)->stack_guard()->RequestGC();
}
// Wait a bit before terminating.
v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(50));
isolate_->TerminateExecution();
}
private:
v8::Isolate* isolate_;
};
void RunBeforeGC(v8::Isolate* isolate, v8::GCType type,
v8::GCCallbackFlags flags) {
if (v8::base::Relaxed_Load(&regexp_interruption_data.loop_count) != 2) {
return;
}
v8::HandleScope scope(isolate);
v8::Local<v8::String> string = v8::Local<v8::String>::New(
CcTest::isolate(), regexp_interruption_data.string);
string->MakeExternal(regexp_interruption_data.string_resource);
}
// Test that RegExp execution can be interrupted. Specifically, we test
// * interrupting with GC
// * turn the subject string from one-byte internal to two-byte external string
// * force termination
TEST(RegExpInterruption) {
LocalContext env;
v8::HandleScope scope(env->GetIsolate());
RegExpInterruptionThread timeout_thread(env->GetIsolate());
env->GetIsolate()->AddGCPrologueCallback(RunBeforeGC);
static const char* one_byte_content = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa";
i::uc16* uc16_content = AsciiToTwoByteString(one_byte_content);
v8::Local<v8::String> string = v8_str(one_byte_content);
env->Global()->Set(env.local(), v8_str("a"), string).FromJust();
regexp_interruption_data.string.Reset(env->GetIsolate(), string);
regexp_interruption_data.string_resource = new UC16VectorResource(
i::Vector<const i::uc16>(uc16_content, i::StrLength(one_byte_content)));
v8::TryCatch try_catch(env->GetIsolate());
timeout_thread.Start();
CompileRun("/((a*)*)*b/.exec(a)");
CHECK(try_catch.HasTerminated());
timeout_thread.Join();
regexp_interruption_data.string.Reset();
i::DeleteArray(uc16_content);
}
// Test that we cannot set a property on the global object if there
// is a read-only property in the prototype chain.
TEST(ReadOnlyPropertyInGlobalProto) {
......@@ -22843,6 +22768,179 @@ TEST(AccessCheckThrows) {
isolate->SetFailedAccessCheckCallbackFunction(nullptr);
}
namespace {
const char kOneByteSubjectString[] = {
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', '\0'};
const uint16_t kTwoByteSubjectString[] = {
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a',
'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', 'a', '\0'};
const int kSubjectStringLength = arraysize(kOneByteSubjectString) - 1;
STATIC_ASSERT(arraysize(kOneByteSubjectString) ==
arraysize(kTwoByteSubjectString));
OneByteVectorResource one_byte_string_resource(
i::Vector<const char>(&kOneByteSubjectString[0], kSubjectStringLength));
UC16VectorResource two_byte_string_resource(
i::Vector<const i::uc16>(&kTwoByteSubjectString[0], kSubjectStringLength));
class RegExpInterruptTest {
public:
RegExpInterruptTest()
: i_thread(this),
env_(),
isolate_(env_->GetIsolate()),
sem_(0),
ran_test_body_(false),
ran_to_completion_(false) {}
void RunTest(v8::InterruptCallback test_body_fn) {
v8::HandleScope handle_scope(isolate_);
i_thread.SetTestBody(test_body_fn);
i_thread.Start();
TestBody();
i_thread.Join();
}
static void CollectAllGarbage(v8::Isolate* isolate, void* data) {
i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate);
i_isolate->heap()->PreciseCollectAllGarbage(
i::Heap::kNoGCFlags, i::GarbageCollectionReason::kRuntime);
}
static void MakeSubjectOneByteExternal(v8::Isolate* isolate, void* data) {
auto instance = reinterpret_cast<RegExpInterruptTest*>(data);
v8::HandleScope scope(isolate);
v8::Local<v8::String> string =
v8::Local<v8::String>::New(isolate, instance->string_handle_);
CHECK(string->CanMakeExternal());
string->MakeExternal(&one_byte_string_resource);
}
static void MakeSubjectTwoByteExternal(v8::Isolate* isolate, void* data) {
auto instance = reinterpret_cast<RegExpInterruptTest*>(data);
v8::HandleScope scope(isolate);
v8::Local<v8::String> string =
v8::Local<v8::String>::New(isolate, instance->string_handle_);
CHECK(string->CanMakeExternal());
string->MakeExternal(&two_byte_string_resource);
}
private:
static void SignalSemaphore(v8::Isolate* isolate, void* data) {
reinterpret_cast<RegExpInterruptTest*>(data)->sem_.Signal();
}
void CreateTestStrings() {
i::Isolate* i_isolate = reinterpret_cast<i::Isolate*>(isolate_);
// The string must be in old space to support externalization.
i::Handle<i::String> i_string =
i_isolate->factory()->NewStringFromAsciiChecked(
&kOneByteSubjectString[0], i::AllocationType::kOld);
v8::Local<v8::String> string = v8::Utils::ToLocal(i_string);
env_->Global()->Set(env_.local(), v8_str("a"), string).FromJust();
string_handle_.Reset(env_->GetIsolate(), string);
}
void TestBody() {
CHECK(!ran_test_body_.load());
CHECK(!ran_to_completion_.load());
CreateTestStrings();
v8::TryCatch try_catch(env_->GetIsolate());
isolate_->RequestInterrupt(&SignalSemaphore, this);
CompileRun("/((a*)*)*b/.exec(a)");
CHECK(try_catch.HasTerminated());
CHECK(ran_test_body_.load());
CHECK(ran_to_completion_.load());
}
class InterruptThread : public v8::base::Thread {
public:
explicit InterruptThread(RegExpInterruptTest* test)
: Thread(Options("RegExpInterruptTest")), test_(test) {}
void Run() override {
CHECK_NOT_NULL(test_body_fn_);
// Wait for JS execution to start.
test_->sem_.Wait();
// Sleep for a bit to allow irregexp execution to start up, then run the
// test body.
v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(50));
test_->isolate_->RequestInterrupt(&RunTestBody, test_);
test_->isolate_->RequestInterrupt(&SignalSemaphore, test_);
// Wait for the scheduled interrupt to signal.
test_->sem_.Wait();
// Sleep again to resume irregexp execution, then terminate.
v8::base::OS::Sleep(v8::base::TimeDelta::FromMilliseconds(50));
test_->ran_to_completion_.store(true);
test_->isolate_->TerminateExecution();
}
static void RunTestBody(v8::Isolate* isolate, void* data) {
auto instance = reinterpret_cast<RegExpInterruptTest*>(data);
instance->i_thread.test_body_fn_(isolate, data);
instance->ran_test_body_.store(true);
}
void SetTestBody(v8::InterruptCallback callback) {
test_body_fn_ = callback;
}
private:
v8::InterruptCallback test_body_fn_;
RegExpInterruptTest* test_;
};
InterruptThread i_thread;
LocalContext env_;
v8::Isolate* isolate_;
v8::base::Semaphore sem_; // Coordinates between main and interrupt threads.
v8::Persistent<v8::String> string_handle_;
std::atomic<bool> ran_test_body_;
std::atomic<bool> ran_to_completion_;
};
} // namespace
TEST(RegExpInterruptAndCollectAllGarbage) {
i::FLAG_always_compact = true; // Move all movable objects on GC.
RegExpInterruptTest test;
test.RunTest(RegExpInterruptTest::CollectAllGarbage);
}
TEST(RegExpInterruptAndMakeSubjectOneByteExternal) {
RegExpInterruptTest test;
test.RunTest(RegExpInterruptTest::MakeSubjectOneByteExternal);
}
TEST(RegExpInterruptAndMakeSubjectTwoByteExternal) {
RegExpInterruptTest test;
test.RunTest(RegExpInterruptTest::MakeSubjectTwoByteExternal);
}
class RequestInterruptTestBase {
public:
RequestInterruptTestBase()
......
......@@ -866,26 +866,23 @@ class TerminatorSleeperThread : public v8::base::Thread {
};
TEST(TerminateRegExp) {
// The regexp interpreter does not support preemption.
if (!i::FLAG_regexp_interpret_all) {
i::FLAG_allow_natives_syntax = true;
v8::Isolate* isolate = CcTest::isolate();
v8::HandleScope scope(isolate);
v8::Local<v8::ObjectTemplate> global = CreateGlobalTemplate(
isolate, TerminateCurrentThread, DoLoopCancelTerminate);
v8::Local<v8::Context> context = v8::Context::New(isolate, nullptr, global);
v8::Context::Scope context_scope(context);
CHECK(!isolate->IsExecutionTerminating());
v8::TryCatch try_catch(isolate);
CHECK(!isolate->IsExecutionTerminating());
CHECK(!CompileRun("var re = /(x+)+y$/; re.test('x');").IsEmpty());
TerminatorSleeperThread terminator(isolate, 100);
terminator.Start();
CHECK(CompileRun("re.test('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'); fail();")
.IsEmpty());
CHECK(try_catch.HasCaught());
CHECK(isolate->IsExecutionTerminating());
}
i::FLAG_allow_natives_syntax = true;
v8::Isolate* isolate = CcTest::isolate();
v8::HandleScope scope(isolate);
v8::Local<v8::ObjectTemplate> global = CreateGlobalTemplate(
isolate, TerminateCurrentThread, DoLoopCancelTerminate);
v8::Local<v8::Context> context = v8::Context::New(isolate, nullptr, global);
v8::Context::Scope context_scope(context);
CHECK(!isolate->IsExecutionTerminating());
v8::TryCatch try_catch(isolate);
CHECK(!isolate->IsExecutionTerminating());
CHECK(!CompileRun("var re = /(x+)+y$/; re.test('x');").IsEmpty());
TerminatorSleeperThread terminator(isolate, 100);
terminator.Start();
CHECK(CompileRun("re.test('xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'); fail();")
.IsEmpty());
CHECK(try_catch.HasCaught());
CHECK(isolate->IsExecutionTerminating());
}
TEST(TerminateInMicrotask) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment