Commit 3a0f407d authored by Patrick Thier's avatar Patrick Thier Committed by Commit Bot

Reland "Reland "[regexp] Call the regexp interpreter without CEntry overhead""

This is a reland of c2ee4a79

Original change's description:
> Reland "[regexp] Call the regexp interpreter without CEntry overhead"
> 
> This is a reland of d4d28b73
> 
> Original change's description:
> > [regexp] Call the regexp interpreter without CEntry overhead
> > 
> > Previously all RegExp calls went through Runtime_RegExpExec when --regexp-interpret-all was set.
> > 
> > This CL avoids the runtime overhead by calling into the interpreter directly from the RegExpExec Builtin when the regular expression subject was already compiled to ByteCode (i.e. after the first call).
> > 
> > Bug: v8:8954
> > Change-Id: Iae9dfcef3370b772a05b2942305335d592f6f15a
> > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1698391
> > Commit-Queue: Patrick Thier <pthier@google.com>
> > Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> > Reviewed-by: Peter Marshall <petermarshall@chromium.org>
> > Cr-Commit-Position: refs/heads/master@{#62753}
> 
> Bug: v8:8954
> Change-Id: I1f0b6de9c6da65bcb582ddb41a37419116a5c510
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1706053
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Commit-Queue: Patrick Thier <pthier@google.com>
> Cr-Commit-Position: refs/heads/master@{#62794}

Bug: v8:8954
Change-Id: Ice77c05240f1fabd36bf97b8e789dd4c25a9718f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1715451Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62904}
parent 5bd57783
......@@ -335,7 +335,8 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
ToDirectStringAssembler to_direct(state(), string);
TVARIABLE(HeapObject, var_result);
Label out(this), atom(this), runtime(this, Label::kDeferred);
Label out(this), interpreted(this), atom(this),
runtime(this, Label::kDeferred);
// External constants.
TNode<ExternalReference> isolate_address =
......@@ -398,14 +399,6 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
&runtime);
}
// Ensure that a RegExp stack is allocated. This check is after branching off
// for ATOM regexps to avoid unnecessary trips to runtime.
{
TNode<IntPtrT> stack_size = UncheckedCast<IntPtrT>(
Load(MachineType::IntPtr(), regexp_stack_memory_size_address));
GotoIf(IntPtrEqual(stack_size, IntPtrZero()), &runtime);
}
// Unpack the string if possible.
to_direct.TryToDirect(&runtime);
......@@ -462,9 +455,16 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
#endif
GotoIf(TaggedIsSmi(var_code.value()), &runtime);
GotoIfNot(IsCode(CAST(var_code.value())), &runtime);
GotoIfNot(IsCode(CAST(var_code.value())), &interpreted);
TNode<Code> code = CAST(var_code.value());
// Ensure that a RegExp stack is allocated when using compiled Irregexp.
{
TNode<IntPtrT> stack_size = UncheckedCast<IntPtrT>(
Load(MachineType::IntPtr(), regexp_stack_memory_size_address));
GotoIf(IntPtrEqual(stack_size, IntPtrZero()), &runtime);
}
Label if_success(this), if_exception(this, Label::kDeferred);
{
IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
......@@ -553,6 +553,74 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
Goto(&runtime);
}
BIND(&interpreted);
{
TNode<ByteArray> byte_code = CAST(var_code.value());
IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
// Set up args for the final call into IrregexpInterpreter.
MachineType type_int32 = MachineType::Int32();
MachineType type_tagged = MachineType::AnyTagged();
MachineType type_ptr = MachineType::Pointer();
// Result: A IrregexpInterpreter::Result return code.
MachineType retval_type = type_int32;
// Argument 0: Pass current isolate address.
MachineType arg0_type = type_ptr;
TNode<ExternalReference> arg0 = isolate_address;
// Argument 1: Pattern ByteCode.
MachineType arg1_type = type_tagged;
TNode<ByteArray> arg1 = byte_code;
// Argument 2: Original subject string.
MachineType arg2_type = type_tagged;
TNode<String> arg2 = string;
// Argument 3: Static offsets vector buffer.
MachineType arg3_type = type_ptr;
TNode<ExternalReference> arg3 = static_offsets_vector_address;
// Argument 4: Length of static offsets vector buffer.
TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
data, JSRegExp::kIrregexpCaptureCountIndex));
TNode<Smi> register_count =
SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
MachineType arg4_type = type_int32;
TNode<Int32T> arg4 = SmiToInt32(register_count);
// Argument 5: Previous index.
MachineType arg5_type = type_int32;
TNode<Int32T> arg5 = TruncateIntPtrToInt32(int_last_index);
TNode<ExternalReference> code_entry = ExternalConstant(
ExternalReference::re_match_for_call_from_js(isolate()));
TNode<Int32T> result = UncheckedCast<Int32T>(CallCFunction(
code_entry, retval_type, std::make_pair(arg0_type, arg0),
std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
std::make_pair(arg5_type, arg5)));
TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
GotoIf(
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
&if_success);
GotoIf(
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
&if_failure);
GotoIf(IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpException)),
&if_exception);
CSA_ASSERT(this, IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpRetry)));
Goto(&runtime);
}
BIND(&if_success);
{
// Check that the last match info has space for the capture registers and
......
......@@ -4,6 +4,8 @@
specific_include_rules = {
"external-reference.cc": [
# Required to call IrregexpInterpreter::NativeMatch from builtin.
"+src/regexp/regexp-interpreter.h",
"+src/regexp/regexp-macro-assembler-arch.h",
],
}
......@@ -26,6 +26,7 @@
#include "src/logging/log.h"
#include "src/numbers/math-random.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-interpreter.h"
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-stack.h"
#include "src/strings/string-search.h"
......@@ -481,6 +482,9 @@ FUNCTION_REFERENCE_WITH_ISOLATE(re_check_stack_guard_state, re_stack_check_func)
FUNCTION_REFERENCE_WITH_ISOLATE(re_grow_stack,
NativeRegExpMacroAssembler::GrowStack)
FUNCTION_REFERENCE_WITH_ISOLATE(re_match_for_call_from_js,
IrregexpInterpreter::MatchForCallFromJs)
FUNCTION_REFERENCE_WITH_ISOLATE(
re_case_insensitive_compare_uc16,
NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16)
......
......@@ -82,6 +82,7 @@ class StatsCounter;
V(re_check_stack_guard_state, \
"RegExpMacroAssembler*::CheckStackGuardState()") \
V(re_grow_stack, "NativeRegExpMacroAssembler::GrowStack()") \
V(re_match_for_call_from_js, "IrregexpInterpreter::MatchForCallFromJs") \
V(re_word_character_map, "NativeRegExpMacroAssembler::word_character_map")
#define EXTERNAL_REFERENCE_LIST(V) \
......
......@@ -1109,7 +1109,8 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1326,8 +1326,9 @@ int RegExpMacroAssemblerARM64::CheckStackGuardState(
Code re_code = Code::cast(Object(raw_code));
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), start_index,
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
frame_entry_address<Address>(re_frame, kInput), input_start, input_end);
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
input_start, input_end);
}
......
......@@ -1120,7 +1120,8 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1176,7 +1176,8 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1213,7 +1213,9 @@ int64_t RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)),
frame_entry<int64_t>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(
frame_entry<int64_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1177,8 +1177,10 @@ int RegExpMacroAssemblerPPC::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<intptr_t>(re_frame, kStartIndex),
frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address,
re_code, frame_entry_address<Address>(re_frame, kInputString),
static_cast<RegExp::CallOrigin>(
frame_entry<intptr_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
}
......
......@@ -141,7 +141,9 @@ class BacktrackStack {
namespace {
IrregexpInterpreter::Result StackOverflow(Isolate* isolate) {
IrregexpInterpreter::Result StackOverflow(Isolate* isolate,
RegExp::CallOrigin call_origin) {
CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
// We abort interpreter execution after the stack overflow is thrown, and thus
// allow allocation here despite the outer DisallowHeapAllocationScope.
AllowHeapAllocation yes_gc;
......@@ -149,72 +151,97 @@ IrregexpInterpreter::Result StackOverflow(Isolate* isolate) {
return IrregexpInterpreter::EXCEPTION;
}
// Runs all pending interrupts. Callers must update unhandlified object
// references after this function completes.
IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate,
Handle<String> subject_string) {
template <typename Char>
void UpdateCodeAndSubjectReferences(
Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject_string, ByteArray* code_array_out,
const byte** code_base_out, const byte** pc_out, String* subject_string_out,
Vector<const Char>* subject_string_vector_out) {
DisallowHeapAllocation no_gc;
StackLimitCheck check(isolate);
if (check.JsHasOverflowed()) {
return StackOverflow(isolate); // A real stack overflow.
if (*code_base_out != code_array->GetDataStartAddress()) {
*code_array_out = *code_array;
const intptr_t pc_offset = *pc_out - *code_base_out;
DCHECK_GT(pc_offset, 0);
*code_base_out = code_array->GetDataStartAddress();
*pc_out = *code_base_out + pc_offset;
}
// Handle interrupts if any exist.
if (check.InterruptRequested()) {
const bool was_one_byte =
String::IsOneByteRepresentationUnderneath(*subject_string);
DCHECK(subject_string->IsFlat());
*subject_string_out = *subject_string;
*subject_string_vector_out = subject_string->GetCharVector<Char>(no_gc);
}
Object result;
{
AllowHeapAllocation yes_gc;
result = isolate->stack_guard()->HandleInterrupts();
}
// Runs all pending interrupts and updates unhandlified object references if
// necessary.
template <typename Char>
IrregexpInterpreter::Result HandleInterrupts(
Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
String* subject_string_out, const byte** code_base_out,
Vector<const Char>* subject_string_vector_out, const byte** pc_out) {
DisallowHeapAllocation no_gc;
if (result.IsException(isolate)) {
StackLimitCheck check(isolate);
bool js_has_overflowed = check.JsHasOverflowed();
if (call_origin == RegExp::CallOrigin::kFromJs) {
// Direct calls from JavaScript can be interrupted in two ways:
// 1. A real stack overflow, in which case we let the caller throw the
// exception.
// 2. The stack guard was used to interrupt execution for another purpose,
// forcing the call through the runtime system.
if (js_has_overflowed) {
return IrregexpInterpreter::EXCEPTION;
}
// If we changed between a LATIN1 and a UC16 string, we need to restart
// regexp matching with the appropriate template instantiation of RawMatch.
if (String::IsOneByteRepresentationUnderneath(*subject_string) !=
was_one_byte) {
} else if (check.InterruptRequested()) {
return IrregexpInterpreter::RETRY;
}
} else {
DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
// Prepare for possible GC.
HandleScope handles(isolate);
Handle<ByteArray> code_handle(*code_array_out, isolate);
Handle<String> subject_handle(*subject_string_out, isolate);
if (js_has_overflowed) {
return StackOverflow(isolate, call_origin);
} else if (check.InterruptRequested()) {
const bool was_one_byte =
String::IsOneByteRepresentationUnderneath(*subject_string_out);
Object result;
{
AllowHeapAllocation yes_gc;
result = isolate->stack_guard()->HandleInterrupts();
}
if (result.IsException(isolate)) {
return IrregexpInterpreter::EXCEPTION;
}
// If we changed between a LATIN1 and a UC16 string, we need to restart
// regexp matching with the appropriate template instantiation of
// RawMatch.
if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
was_one_byte) {
return IrregexpInterpreter::RETRY;
}
UpdateCodeAndSubjectReferences(
isolate, code_handle, subject_handle, code_array_out, code_base_out,
pc_out, subject_string_out, subject_string_vector_out);
}
}
return IrregexpInterpreter::SUCCESS;
}
template <typename Char>
void UpdateCodeAndSubjectReferences(Isolate* isolate,
Handle<ByteArray> code_array,
Handle<String> subject_string,
const byte** code_base_out,
const byte** pc_out,
Vector<const Char>* subject_string_out) {
DisallowHeapAllocation no_gc;
if (*code_base_out != code_array->GetDataStartAddress()) {
const intptr_t pc_offset = *pc_out - *code_base_out;
DCHECK_GT(pc_offset, 0);
*code_base_out = code_array->GetDataStartAddress();
*pc_out = *code_base_out + pc_offset;
}
DCHECK(subject_string->IsFlat());
*subject_string_out = subject_string->GetCharVector<Char>(no_gc);
}
template <typename Char>
IrregexpInterpreter::Result RawMatch(Isolate* isolate,
Handle<ByteArray> code_array,
Handle<String> subject_string,
IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
String subject_string,
Vector<const Char> subject, int* registers,
int current, uint32_t current_char) {
int current, uint32_t current_char,
RegExp::CallOrigin call_origin) {
DisallowHeapAllocation no_gc;
const byte* pc = code_array->GetDataStartAddress();
const byte* pc = code_array.GetDataStartAddress();
const byte* code_base = pc;
BacktrackStack backtrack_stack;
......@@ -280,12 +307,10 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
}
BYTECODE(POP_BT) {
IrregexpInterpreter::Result return_code =
HandleInterrupts(isolate, subject_string);
HandleInterrupts(isolate, call_origin, &code_array, &subject_string,
&code_base, &subject, &pc);
if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
&code_base, &pc, &subject);
pc = code_base + backtrack_stack.pop();
break;
}
......@@ -649,32 +674,67 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
// static
IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject_string, int* registers, int start_position) {
DCHECK(subject_string->IsFlat());
Isolate* isolate, ByteArray code_array, String subject_string,
int* registers, int registers_length, int start_position,
RegExp::CallOrigin call_origin) {
DCHECK(subject_string.IsFlat());
// Note: Heap allocation *is* allowed in two situations:
// Note: Heap allocation *is* allowed in two situations if calling from
// Runtime:
// 1. When creating & throwing a stack overflow exception. The interpreter
// aborts afterwards, and thus possible-moved objects are never used.
// 2. When handling interrupts. We manually relocate unhandlified references
// after interrupts have run.
DisallowHeapAllocation no_gc;
// Reset registers to -1 (=undefined).
// This is necessary because registers are only written when a
// capture group matched.
// Resetting them ensures that previous matches are cleared.
memset(registers, -1, sizeof(registers[0]) * registers_length);
uc16 previous_char = '\n';
String::FlatContent subject_content = subject_string->GetFlatContent(no_gc);
String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
if (subject_content.IsOneByte()) {
Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate, code_array, subject_string, subject_vector,
registers, start_position, previous_char);
registers, start_position, previous_char, call_origin);
} else {
DCHECK(subject_content.IsTwoByte());
Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate, code_array, subject_string, subject_vector,
registers, start_position, previous_char);
registers, start_position, previous_char, call_origin);
}
}
// This method is called through an external reference from RegExpExecInternal
// builtin.
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
Isolate* isolate, Address code, Address subject, int* registers,
int32_t registers_length, int32_t start_position) {
DCHECK_NOT_NULL(isolate);
DCHECK_NOT_NULL(registers);
DisallowHeapAllocation no_gc;
DisallowJavascriptExecution no_js(isolate);
String subject_string = String::cast(Object(subject));
ByteArray code_array = ByteArray::cast(Object(code));
return Match(isolate, code_array, subject_string, registers, registers_length,
start_position, RegExp::CallOrigin::kFromJs);
}
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject_string, int* registers, int registers_length,
int start_position) {
return Match(isolate, *code_array, *subject_string, registers,
registers_length, start_position,
RegExp::CallOrigin::kFromRuntime);
}
} // namespace internal
} // namespace v8
......@@ -12,7 +12,7 @@
namespace v8 {
namespace internal {
class V8_EXPORT_PRIVATE IrregexpInterpreter {
class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
public:
enum Result {
FAILURE = RegExp::kInternalRegExpFailure,
......@@ -21,10 +21,26 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter {
RETRY = RegExp::kInternalRegExpRetry,
};
// The caller is responsible for initializing registers before each call.
static Result Match(Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject_string, int* registers,
int start_position);
// In case a StackOverflow occurs, a StackOverflowException is created and
// EXCEPTION is returned.
static Result MatchForCallFromRuntime(Isolate* isolate,
Handle<ByteArray> code_array,
Handle<String> subject_string,
int* registers, int registers_length,
int start_position);
// In case a StackOverflow occurs, EXCEPTION is returned. The caller is
// responsible for creating the exception.
static Result MatchForCallFromJs(Isolate* isolate, Address code,
Address subject, int* registers,
int32_t registers_length,
int32_t start_position);
private:
static Result Match(Isolate* isolate, ByteArray code_array,
String subject_string, int* registers,
int registers_length, int start_position,
RegExp::CallOrigin call_origin);
};
} // namespace internal
......
......@@ -129,32 +129,46 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
}
}
// This method may only be called after an interrupt.
int NativeRegExpMacroAssembler::CheckStackGuardState(
Isolate* isolate, int start_index, bool is_direct_call,
Isolate* isolate, int start_index, RegExp::CallOrigin call_origin,
Address* return_address, Code re_code, Address* subject,
const byte** input_start, const byte** input_end) {
DisallowHeapAllocation no_gc;
DCHECK(re_code.raw_instruction_start() <= *return_address);
DCHECK(*return_address <= re_code.raw_instruction_end());
int return_value = 0;
// Prepare for possible GC.
HandleScope handles(isolate);
Handle<Code> code_handle(re_code, isolate);
Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
StackLimitCheck check(isolate);
bool js_has_overflowed = check.JsHasOverflowed();
if (is_direct_call) {
if (call_origin == RegExp::CallOrigin::kFromJs) {
// Direct calls from JavaScript can be interrupted in two ways:
// 1. A real stack overflow, in which case we let the caller throw the
// exception.
// 2. The stack guard was used to interrupt execution for another purpose,
// forcing the call through the runtime system.
return_value = js_has_overflowed ? EXCEPTION : RETRY;
} else if (js_has_overflowed) {
// Bug(v8:9540) Investigate why this method is called from JS although no
// stackoverflow or interrupt is pending on ARM64. We return 0 in this case
// to continue execution normally.
if (js_has_overflowed) {
return EXCEPTION;
} else if (check.InterruptRequested()) {
return RETRY;
} else {
return 0;
}
}
DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
// Prepare for possible GC.
HandleScope handles(isolate);
Handle<Code> code_handle(re_code, isolate);
Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
int return_value = 0;
if (js_has_overflowed) {
AllowHeapAllocation yes_gc;
isolate->StackOverflow();
return_value = EXCEPTION;
......
......@@ -235,9 +235,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
String subject, int start_index, const DisallowHeapAllocation& no_gc);
static int CheckStackGuardState(Isolate* isolate, int start_index,
bool is_direct_call, Address* return_address,
Code re_code, Address* subject,
const byte** input_start,
RegExp::CallOrigin call_origin,
Address* return_address, Code re_code,
Address* subject, const byte** input_start,
const byte** input_end);
// Byte map of one byte characters with a 0xff if the character is a word
......
......@@ -478,17 +478,13 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
int32_t* raw_output = &output[number_of_capture_registers];
do {
// We do not touch the actual capture result registers until we know there
// has been a match so that we can use those capture results to set the
// last match info.
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
raw_output[i] = -1;
}
Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
isolate);
IrregexpInterpreter::Result result = IrregexpInterpreter::Match(
isolate, byte_codes, subject, raw_output, index);
IrregexpInterpreter::Result result =
IrregexpInterpreter::MatchForCallFromRuntime(
isolate, byte_codes, subject, raw_output,
number_of_capture_registers, index);
DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
isolate->has_pending_exception());
......
......@@ -61,6 +61,11 @@ class RegExp final : public AllStatic {
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
JSRegExp::Flags flags);
enum CallOrigin : int {
kFromRuntime = 0,
kFromJs = 1,
};
// See ECMA-262 section 15.10.6.2.
// This function calls the garbage collector if necessary.
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
......@@ -73,7 +78,7 @@ class RegExp final : public AllStatic {
static constexpr int kInternalRegExpException = -1;
static constexpr int kInternalRegExpRetry = -2;
enum IrregexpResult {
enum IrregexpResult : int32_t {
RE_FAILURE = kInternalRegExpFailure,
RE_SUCCESS = kInternalRegExpSuccess,
RE_EXCEPTION = kInternalRegExpException,
......
......@@ -1120,8 +1120,10 @@ int RegExpMacroAssemblerS390::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<intptr_t>(re_frame, kStartIndex),
frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address,
re_code, frame_entry_address<Address>(re_frame, kInputString),
static_cast<RegExp::CallOrigin>(
frame_entry<intptr_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
}
......
......@@ -1198,7 +1198,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1298,7 +1298,8 @@ TEST(MacroAssembler) {
Handle<String> f1_16 = factory->NewStringFromTwoByte(
Vector<const uc16>(str1, 6)).ToHandleChecked();
CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
CHECK(IrregexpInterpreter::MatchForCallFromRuntime(isolate, array, f1_16,
captures, 5, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
......@@ -1309,7 +1310,8 @@ TEST(MacroAssembler) {
Handle<String> f2_16 = factory->NewStringFromTwoByte(
Vector<const uc16>(str2, 6)).ToHandleChecked();
CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
CHECK(!IrregexpInterpreter::MatchForCallFromRuntime(isolate, array, f2_16,
captures, 5, 0));
CHECK_EQ(42, captures[0]);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment