Commit f3a666d6 authored by Martin Bidlingmaier's avatar Martin Bidlingmaier Committed by Commit Bot

[regexp] Handle interrupts in experimental interpreter

No surprises; very much based on interrupt handling in the irregexp
interpreter.

Cq-Include-Trybots: luci.v8.try:v8_linux64_fyi_rel_ng
Bug: v8:10765
Change-Id: I2353cac4639a494362b8dfdf9507985fb6298c0e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2452710Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Martin Bidlingmaier <mbid@google.com>
Cr-Commit-Position: refs/heads/master@{#70370}
parent 7be8692e
......@@ -5,6 +5,8 @@
#include "src/regexp/experimental/experimental-interpreter.h"
#include "src/base/optional.h"
#include "src/objects/fixed-array-inl.h"
#include "src/objects/string-inl.h"
#include "src/regexp/experimental/experimental.h"
#include "src/strings/char-predicates-inl.h"
#include "src/zone/zone-allocator.h"
......@@ -50,6 +52,37 @@ bool SatisfiesAssertion(RegExpAssertion::AssertionType type,
}
}
Vector<RegExpInstruction> ToInstructionVector(
ByteArray raw_bytes, const DisallowHeapAllocation& no_gc) {
RegExpInstruction* inst_begin =
reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress());
int inst_num = raw_bytes.length() / sizeof(RegExpInstruction);
DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length());
return Vector<RegExpInstruction>(inst_begin, inst_num);
}
template <class Character>
Vector<const Character> ToCharacterVector(String str,
const DisallowHeapAllocation& no_gc);
template <>
Vector<const uint8_t> ToCharacterVector<uint8_t>(
String str, const DisallowHeapAllocation& no_gc) {
DCHECK(str.IsFlat());
String::FlatContent content = str.GetFlatContent(no_gc);
DCHECK(content.IsOneByte());
return content.ToOneByteVector();
}
template <>
Vector<const uc16> ToCharacterVector<uc16>(
String str, const DisallowHeapAllocation& no_gc) {
DCHECK(str.IsFlat());
String::FlatContent content = str.GetFlatContent(no_gc);
DCHECK(content.IsTwoByte());
return content.ToUC16Vector();
}
template <class Character>
class NfaInterpreter {
// Executes a bytecode program in breadth-first mode, without backtracking.
......@@ -100,12 +133,16 @@ class NfaInterpreter {
// with high priority are left, we return the match that was produced by the
// ACCEPTing thread with highest priority.
public:
NfaInterpreter(Vector<const RegExpInstruction> bytecode,
int register_count_per_match, Vector<const Character> input,
NfaInterpreter(Isolate* isolate, RegExp::CallOrigin call_origin,
ByteArray bytecode, int register_count_per_match, String input,
int32_t input_index, Zone* zone)
: bytecode_(bytecode),
: isolate_(isolate),
call_origin_(call_origin),
bytecode_object_(bytecode),
bytecode_(ToInstructionVector(bytecode, no_gc_)),
register_count_per_match_(register_count_per_match),
input_(input),
input_object_(input),
input_(ToCharacterVector<Character>(input, no_gc_)),
input_index_(input_index),
pc_last_input_index_(zone->NewArray<int>(bytecode.length()),
bytecode.length()),
......@@ -131,12 +168,15 @@ class NfaInterpreter {
int match_num = 0;
while (match_num != max_match_num) {
FindNextMatch();
int err_code = FindNextMatch();
if (err_code != RegExp::kInternalRegExpSuccess) return err_code;
if (!FoundMatch()) break;
Vector<int> registers = *best_match_registers_;
Vector<int> registers = *best_match_registers_;
output_registers =
std::copy(registers.begin(), registers.end(), output_registers);
++match_num;
const int match_begin = registers[0];
......@@ -177,6 +217,69 @@ class NfaInterpreter {
int* register_array_begin;
};
// Handles pending interrupts if there are any. Returns
// RegExp::kInternalRegExpSuccess if execution can continue, and an error
// code otherwise.
int HandleInterrupts() {
StackLimitCheck check(isolate_);
if (call_origin_ == RegExp::CallOrigin::kFromJs) {
// Direct calls from JavaScript can be interrupted in two ways:
// 1. A real stack overflow, in which case we let the caller throw the
// exception.
// 2. The stack guard was used to interrupt execution for another purpose,
// forcing the call through the runtime system.
if (check.JsHasOverflowed()) {
return RegExp::kInternalRegExpException;
} else if (check.InterruptRequested()) {
return RegExp::kInternalRegExpRetry;
}
} else {
DCHECK(call_origin_ == RegExp::CallOrigin::kFromRuntime);
HandleScope handles(isolate_);
Handle<ByteArray> bytecode_handle(bytecode_object_, isolate_);
Handle<String> input_handle(input_object_, isolate_);
if (check.JsHasOverflowed()) {
// We abort the interpreter now anyway, so gc can't invalidate any
// pointers.
AllowHeapAllocation yes_gc;
isolate_->StackOverflow();
return RegExp::kInternalRegExpException;
} else if (check.InterruptRequested()) {
// TODO(mbid): Is this really equivalent to whether the string is
// one-byte or two-byte? A comment at the declaration of
// IsOneByteRepresentationUnderneath says that this might fail for
// external strings.
const bool was_one_byte =
String::IsOneByteRepresentationUnderneath(input_object_);
Object result;
{
AllowHeapAllocation yes_gc;
result = isolate_->stack_guard()->HandleInterrupts();
}
if (result.IsException(isolate_)) {
return RegExp::kInternalRegExpException;
}
// If we changed between a LATIN1 and a UC16 string, we need to restart
// regexp matching with the appropriate template instantiation of
// RawMatch.
if (String::IsOneByteRepresentationUnderneath(*input_handle) !=
was_one_byte) {
return RegExp::kInternalRegExpRetry;
}
// Update objects and pointers in case they have changed during gc.
bytecode_object_ = *bytecode_handle;
bytecode_ = ToInstructionVector(bytecode_object_, no_gc_);
input_object_ = *input_handle;
input_ = ToCharacterVector<Character>(input_object_, no_gc_);
}
}
return RegExp::kInternalRegExpSuccess;
}
// Change the current input index for future calls to `FindNextMatch`.
void SetInputIndex(int new_input_index) {
DCHECK_GE(input_index_, 0);
......@@ -187,8 +290,10 @@ class NfaInterpreter {
// Find the next match and return the corresponding capture registers and
// write its capture registers to `best_match_registers_`. The search starts
// at the current `input_index_`.
void FindNextMatch() {
// at the current `input_index_`. Returns RegExp::kInternalRegExpSuccess if
// execution could finish regularly (with or without a match) and an error
// code due to interrupt otherwise.
int FindNextMatch() {
DCHECK(active_threads_.is_empty());
// TODO(mbid,v8:10765): Can we get around resetting `pc_last_input_index_`
// here? As long as
......@@ -240,12 +345,20 @@ class NfaInterpreter {
uc16 input_char = input_[input_index_];
++input_index_;
static constexpr int kTicksBetweenInterruptHandling = 64;
if (input_index_ % kTicksBetweenInterruptHandling == 0) {
int err_code = HandleInterrupts();
if (err_code != RegExp::kInternalRegExpSuccess) return err_code;
}
// We unblock all blocked_threads_ by feeding them the input char.
FlushBlockedThreads(input_char);
// Run all threads until they block or accept.
RunActiveThreads();
}
return RegExp::kInternalRegExpSuccess;
}
// Run an active thread `t` until it executes a CONSUME_RANGE or ACCEPT
......@@ -394,12 +507,20 @@ class NfaInterpreter {
pc_last_input_index_[pc] = input_index_;
}
const Vector<const RegExpInstruction> bytecode_;
Isolate* const isolate_;
const RegExp::CallOrigin call_origin_;
const DisallowHeapAllocation no_gc_;
ByteArray bytecode_object_;
Vector<const RegExpInstruction> bytecode_;
// Number of registers used per thread.
const int register_count_per_match_;
const Vector<const Character> input_;
String input_object_;
Vector<const Character> input_;
int input_index_;
// pc_last_input_index_[k] records the value of input_index_ the last
......@@ -432,22 +553,25 @@ class NfaInterpreter {
} // namespace
int ExperimentalRegExpInterpreter::FindMatchesNfaOneByte(
Vector<const RegExpInstruction> bytecode, int register_count_per_match,
Vector<const uint8_t> input, int start_index, int32_t* output_registers,
int output_register_count, Zone* zone) {
NfaInterpreter<uint8_t> interpreter(bytecode, register_count_per_match, input,
start_index, zone);
return interpreter.FindMatches(output_registers, output_register_count);
}
int ExperimentalRegExpInterpreter::FindMatchesNfaTwoByte(
Vector<const RegExpInstruction> bytecode, int register_count_per_match,
Vector<const uc16> input, int start_index, int32_t* output_registers,
int output_register_count, Zone* zone) {
NfaInterpreter<uc16> interpreter(bytecode, register_count_per_match, input,
start_index, zone);
return interpreter.FindMatches(output_registers, output_register_count);
int ExperimentalRegExpInterpreter::FindMatches(
Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray bytecode,
int register_count_per_match, String input, int start_index,
int32_t* output_registers, int output_register_count, Zone* zone) {
DCHECK(input.IsFlat());
DisallowHeapAllocation no_gc;
if (input.GetFlatContent(no_gc).IsOneByte()) {
NfaInterpreter<uint8_t> interpreter(isolate, call_origin, bytecode,
register_count_per_match, input,
start_index, zone);
return interpreter.FindMatches(output_registers, output_register_count);
} else {
DCHECK(input.GetFlatContent(no_gc).IsTwoByte());
NfaInterpreter<uc16> interpreter(isolate, call_origin, bytecode,
register_count_per_match, input,
start_index, zone);
return interpreter.FindMatches(output_registers, output_register_count);
}
}
} // namespace internal
......
......@@ -5,7 +5,10 @@
#ifndef V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_INTERPRETER_H_
#define V8_REGEXP_EXPERIMENTAL_EXPERIMENTAL_INTERPRETER_H_
#include "src/objects/fixed-array.h"
#include "src/objects/string.h"
#include "src/regexp/experimental/experimental-bytecode.h"
#include "src/regexp/regexp.h"
#include "src/utils/vector.h"
namespace v8 {
......@@ -18,18 +21,13 @@ class ExperimentalRegExpInterpreter final : public AllStatic {
// Executes a bytecode program in breadth-first NFA mode, without
// backtracking, to find matching substrings. Trys to find up to
// `max_match_num` matches in `input`, starting at `start_index`. Returns
// the actual number of matches found. The boundaires of matching subranges
// the actual number of matches found. The boundaries of matching subranges
// are written to `matches_out`. Provided in variants for one-byte and
// two-byte strings.
static int FindMatchesNfaOneByte(Vector<const RegExpInstruction> bytecode,
int capture_count,
Vector<const uint8_t> input, int start_index,
int32_t* output_registers,
int output_register_count, Zone* zone);
static int FindMatchesNfaTwoByte(Vector<const RegExpInstruction> bytecode,
int capture_count, Vector<const uc16> input,
int start_index, int32_t* output_registers,
int output_register_count, Zone* zone);
static int FindMatches(Isolate* isolate, RegExp::CallOrigin call_origin,
ByteArray bytecode, int capture_count, String input,
int start_index, int32_t* output_registers,
int output_register_count, Zone* zone);
};
} // namespace internal
......
......@@ -75,6 +75,11 @@ bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
ZoneList<RegExpInstruction> bytecode =
ExperimentalRegExpCompiler::Compile(parse_result.tree, flags, &zone);
if (FLAG_print_regexp_bytecode) {
StdoutStream{} << "Bytecode:" << std::endl;
StdoutStream{} << bytecode.ToVector() << std::endl;
}
int byte_length = sizeof(RegExpInstruction) * bytecode.length();
Handle<ByteArray> bytecode_byte_array =
isolate->factory()->NewByteArray(byte_length);
......@@ -102,8 +107,10 @@ Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
}
// Returns the number of matches.
int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate, JSRegExp regexp,
String subject, int32_t* output_registers,
int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
RegExp::CallOrigin call_origin,
JSRegExp regexp, String subject,
int32_t* output_registers,
int32_t output_register_count,
int32_t subject_index) {
DisallowHeapAllocation no_gc;
......@@ -115,31 +122,22 @@ int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate, JSRegExp regexp,
StdoutStream{} << "Executing experimental regexp " << source << std::endl;
}
Vector<RegExpInstruction> bytecode = AsInstructionSequence(
ByteArray::cast(regexp.DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex)));
if (FLAG_print_regexp_bytecode) {
StdoutStream{} << "Bytecode:" << std::endl;
StdoutStream{} << bytecode << std::endl;
}
ByteArray bytecode =
ByteArray::cast(regexp.DataAt(JSRegExp::kIrregexpLatin1BytecodeIndex));
int register_count_per_match =
JSRegExp::RegistersForCaptureCount(regexp.CaptureCount());
DCHECK(subject.IsFlat());
String::FlatContent subject_content = subject.GetFlatContent(no_gc);
Zone zone(isolate->allocator(), ZONE_NAME);
if (subject_content.IsOneByte()) {
return ExperimentalRegExpInterpreter::FindMatchesNfaOneByte(
bytecode, register_count_per_match, subject_content.ToOneByteVector(),
subject_index, output_registers, output_register_count, &zone);
} else {
return ExperimentalRegExpInterpreter::FindMatchesNfaTwoByte(
bytecode, register_count_per_match, subject_content.ToUC16Vector(),
int32_t result;
do {
DCHECK(subject.IsFlat());
Zone zone(isolate->allocator(), ZONE_NAME);
result = ExperimentalRegExpInterpreter::FindMatches(
isolate, call_origin, bytecode, register_count_per_match, subject,
subject_index, output_registers, output_register_count, &zone);
}
} while (result == RegExp::kInternalRegExpRetry &&
call_origin == RegExp::kFromRuntime);
return result;
}
int32_t ExperimentalRegExp::MatchForCallFromJs(
......@@ -162,8 +160,8 @@ int32_t ExperimentalRegExp::MatchForCallFromJs(
JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
return ExecRaw(isolate, regexp_obj, subject_string, output_registers,
output_register_count, start_position);
return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string,
output_registers, output_register_count, start_position);
}
MaybeHandle<Object> ExperimentalRegExp::Exec(
......@@ -197,16 +195,20 @@ MaybeHandle<Object> ExperimentalRegExp::Exec(
output_registers_release.reset(output_registers);
}
int num_matches = ExecRaw(isolate, *regexp, *subject, output_registers,
output_register_count, subject_index);
int num_matches =
ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject,
output_registers, output_register_count, subject_index);
if (num_matches == 0) {
return isolate->factory()->null_value();
} else {
if (num_matches > 0) {
DCHECK_EQ(num_matches, 1);
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
capture_count, output_registers);
return last_match_info;
} else if (num_matches == 0) {
return isolate->factory()->null_value();
} else {
DCHECK_LT(num_matches, 0);
DCHECK(isolate->has_pending_exception());
return MaybeHandle<Object>();
}
}
......
......@@ -39,7 +39,8 @@ class ExperimentalRegExp final : public AllStatic {
static MaybeHandle<Object> Exec(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info);
static int32_t ExecRaw(Isolate* isolate, JSRegExp regexp, String subject,
static int32_t ExecRaw(Isolate* isolate, RegExp::CallOrigin call_origin,
JSRegExp regexp, String subject,
int32_t* output_registers,
int32_t output_register_count, int32_t subject_index);
......
......@@ -1014,8 +1014,8 @@ int32_t* RegExpGlobalCache::FetchNext() {
DCHECK(ExperimentalRegExp::IsCompiled(regexp_, isolate_));
DisallowHeapAllocation no_gc;
num_matches_ = ExperimentalRegExp::ExecRaw(
isolate_, *regexp_, *subject_, register_array_,
register_array_size_, last_end_index);
isolate_, RegExp::kFromRuntime, *regexp_, *subject_,
register_array_, register_array_size_, last_end_index);
break;
}
case JSRegExp::IRREGEXP: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment