Commit 3a0f407d authored by Patrick Thier's avatar Patrick Thier Committed by Commit Bot

Reland "Reland "[regexp] Call the regexp interpreter without CEntry overhead""

This is a reland of c2ee4a79

Original change's description:
> Reland "[regexp] Call the regexp interpreter without CEntry overhead"
> 
> This is a reland of d4d28b73
> 
> Original change's description:
> > [regexp] Call the regexp interpreter without CEntry overhead
> > 
> > Previously all RegExp calls went through Runtime_RegExpExec when --regexp-interpret-all was set.
> > 
> > This CL avoids the runtime overhead by calling into the interpreter directly from the RegExpExec Builtin when the regular expression subject was already compiled to ByteCode (i.e. after the first call).
> > 
> > Bug: v8:8954
> > Change-Id: Iae9dfcef3370b772a05b2942305335d592f6f15a
> > Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1698391
> > Commit-Queue: Patrick Thier <pthier@google.com>
> > Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> > Reviewed-by: Peter Marshall <petermarshall@chromium.org>
> > Cr-Commit-Position: refs/heads/master@{#62753}
> 
> Bug: v8:8954
> Change-Id: I1f0b6de9c6da65bcb582ddb41a37419116a5c510
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1706053
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Commit-Queue: Patrick Thier <pthier@google.com>
> Cr-Commit-Position: refs/heads/master@{#62794}

Bug: v8:8954
Change-Id: Ice77c05240f1fabd36bf97b8e789dd4c25a9718f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1715451Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62904}
parent 5bd57783
...@@ -335,7 +335,8 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal( ...@@ -335,7 +335,8 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
ToDirectStringAssembler to_direct(state(), string); ToDirectStringAssembler to_direct(state(), string);
TVARIABLE(HeapObject, var_result); TVARIABLE(HeapObject, var_result);
Label out(this), atom(this), runtime(this, Label::kDeferred); Label out(this), interpreted(this), atom(this),
runtime(this, Label::kDeferred);
// External constants. // External constants.
TNode<ExternalReference> isolate_address = TNode<ExternalReference> isolate_address =
...@@ -398,14 +399,6 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal( ...@@ -398,14 +399,6 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
&runtime); &runtime);
} }
// Ensure that a RegExp stack is allocated. This check is after branching off
// for ATOM regexps to avoid unnecessary trips to runtime.
{
TNode<IntPtrT> stack_size = UncheckedCast<IntPtrT>(
Load(MachineType::IntPtr(), regexp_stack_memory_size_address));
GotoIf(IntPtrEqual(stack_size, IntPtrZero()), &runtime);
}
// Unpack the string if possible. // Unpack the string if possible.
to_direct.TryToDirect(&runtime); to_direct.TryToDirect(&runtime);
...@@ -462,9 +455,16 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal( ...@@ -462,9 +455,16 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
#endif #endif
GotoIf(TaggedIsSmi(var_code.value()), &runtime); GotoIf(TaggedIsSmi(var_code.value()), &runtime);
GotoIfNot(IsCode(CAST(var_code.value())), &runtime); GotoIfNot(IsCode(CAST(var_code.value())), &interpreted);
TNode<Code> code = CAST(var_code.value()); TNode<Code> code = CAST(var_code.value());
// Ensure that a RegExp stack is allocated when using compiled Irregexp.
{
TNode<IntPtrT> stack_size = UncheckedCast<IntPtrT>(
Load(MachineType::IntPtr(), regexp_stack_memory_size_address));
GotoIf(IntPtrEqual(stack_size, IntPtrZero()), &runtime);
}
Label if_success(this), if_exception(this, Label::kDeferred); Label if_success(this), if_exception(this, Label::kDeferred);
{ {
IncrementCounter(isolate()->counters()->regexp_entry_native(), 1); IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
...@@ -553,6 +553,74 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal( ...@@ -553,6 +553,74 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
Goto(&runtime); Goto(&runtime);
} }
BIND(&interpreted);
{
TNode<ByteArray> byte_code = CAST(var_code.value());
IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
// Set up args for the final call into IrregexpInterpreter.
MachineType type_int32 = MachineType::Int32();
MachineType type_tagged = MachineType::AnyTagged();
MachineType type_ptr = MachineType::Pointer();
// Result: A IrregexpInterpreter::Result return code.
MachineType retval_type = type_int32;
// Argument 0: Pass current isolate address.
MachineType arg0_type = type_ptr;
TNode<ExternalReference> arg0 = isolate_address;
// Argument 1: Pattern ByteCode.
MachineType arg1_type = type_tagged;
TNode<ByteArray> arg1 = byte_code;
// Argument 2: Original subject string.
MachineType arg2_type = type_tagged;
TNode<String> arg2 = string;
// Argument 3: Static offsets vector buffer.
MachineType arg3_type = type_ptr;
TNode<ExternalReference> arg3 = static_offsets_vector_address;
// Argument 4: Length of static offsets vector buffer.
TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
data, JSRegExp::kIrregexpCaptureCountIndex));
TNode<Smi> register_count =
SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
MachineType arg4_type = type_int32;
TNode<Int32T> arg4 = SmiToInt32(register_count);
// Argument 5: Previous index.
MachineType arg5_type = type_int32;
TNode<Int32T> arg5 = TruncateIntPtrToInt32(int_last_index);
TNode<ExternalReference> code_entry = ExternalConstant(
ExternalReference::re_match_for_call_from_js(isolate()));
TNode<Int32T> result = UncheckedCast<Int32T>(CallCFunction(
code_entry, retval_type, std::make_pair(arg0_type, arg0),
std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
std::make_pair(arg5_type, arg5)));
TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
GotoIf(
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
&if_success);
GotoIf(
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
&if_failure);
GotoIf(IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpException)),
&if_exception);
CSA_ASSERT(this, IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpRetry)));
Goto(&runtime);
}
BIND(&if_success); BIND(&if_success);
{ {
// Check that the last match info has space for the capture registers and // Check that the last match info has space for the capture registers and
......
...@@ -4,6 +4,8 @@ ...@@ -4,6 +4,8 @@
specific_include_rules = { specific_include_rules = {
"external-reference.cc": [ "external-reference.cc": [
# Required to call IrregexpInterpreter::NativeMatch from builtin.
"+src/regexp/regexp-interpreter.h",
"+src/regexp/regexp-macro-assembler-arch.h", "+src/regexp/regexp-macro-assembler-arch.h",
], ],
} }
...@@ -26,6 +26,7 @@ ...@@ -26,6 +26,7 @@
#include "src/logging/log.h" #include "src/logging/log.h"
#include "src/numbers/math-random.h" #include "src/numbers/math-random.h"
#include "src/objects/objects-inl.h" #include "src/objects/objects-inl.h"
#include "src/regexp/regexp-interpreter.h"
#include "src/regexp/regexp-macro-assembler-arch.h" #include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-stack.h" #include "src/regexp/regexp-stack.h"
#include "src/strings/string-search.h" #include "src/strings/string-search.h"
...@@ -481,6 +482,9 @@ FUNCTION_REFERENCE_WITH_ISOLATE(re_check_stack_guard_state, re_stack_check_func) ...@@ -481,6 +482,9 @@ FUNCTION_REFERENCE_WITH_ISOLATE(re_check_stack_guard_state, re_stack_check_func)
FUNCTION_REFERENCE_WITH_ISOLATE(re_grow_stack, FUNCTION_REFERENCE_WITH_ISOLATE(re_grow_stack,
NativeRegExpMacroAssembler::GrowStack) NativeRegExpMacroAssembler::GrowStack)
FUNCTION_REFERENCE_WITH_ISOLATE(re_match_for_call_from_js,
IrregexpInterpreter::MatchForCallFromJs)
FUNCTION_REFERENCE_WITH_ISOLATE( FUNCTION_REFERENCE_WITH_ISOLATE(
re_case_insensitive_compare_uc16, re_case_insensitive_compare_uc16,
NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16) NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16)
......
...@@ -82,6 +82,7 @@ class StatsCounter; ...@@ -82,6 +82,7 @@ class StatsCounter;
V(re_check_stack_guard_state, \ V(re_check_stack_guard_state, \
"RegExpMacroAssembler*::CheckStackGuardState()") \ "RegExpMacroAssembler*::CheckStackGuardState()") \
V(re_grow_stack, "NativeRegExpMacroAssembler::GrowStack()") \ V(re_grow_stack, "NativeRegExpMacroAssembler::GrowStack()") \
V(re_match_for_call_from_js, "IrregexpInterpreter::MatchForCallFromJs") \
V(re_word_character_map, "NativeRegExpMacroAssembler::word_character_map") V(re_word_character_map, "NativeRegExpMacroAssembler::word_character_map")
#define EXTERNAL_REFERENCE_LIST(V) \ #define EXTERNAL_REFERENCE_LIST(V) \
......
...@@ -1109,7 +1109,8 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address, ...@@ -1109,7 +1109,8 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState( return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex), frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd)); frame_entry_address<const byte*>(re_frame, kInputEnd));
......
...@@ -1326,8 +1326,9 @@ int RegExpMacroAssemblerARM64::CheckStackGuardState( ...@@ -1326,8 +1326,9 @@ int RegExpMacroAssemblerARM64::CheckStackGuardState(
Code re_code = Code::cast(Object(raw_code)); Code re_code = Code::cast(Object(raw_code));
return NativeRegExpMacroAssembler::CheckStackGuardState( return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), start_index, frame_entry<Isolate*>(re_frame, kIsolate), start_index,
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
frame_entry_address<Address>(re_frame, kInput), input_start, input_end); return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
input_start, input_end);
} }
......
...@@ -1120,7 +1120,8 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address, ...@@ -1120,7 +1120,8 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState( return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex), frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd)); frame_entry_address<const byte*>(re_frame, kInputEnd));
......
...@@ -1176,7 +1176,8 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address, ...@@ -1176,7 +1176,8 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState( return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex), frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd)); frame_entry_address<const byte*>(re_frame, kInputEnd));
......
...@@ -1213,7 +1213,9 @@ int64_t RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address, ...@@ -1213,7 +1213,9 @@ int64_t RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState( return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<Isolate*>(re_frame, kIsolate),
static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)), static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)),
frame_entry<int64_t>(re_frame, kDirectCall) == 1, return_address, re_code, static_cast<RegExp::CallOrigin>(
frame_entry<int64_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd)); frame_entry_address<const byte*>(re_frame, kInputEnd));
......
...@@ -1177,8 +1177,10 @@ int RegExpMacroAssemblerPPC::CheckStackGuardState(Address* return_address, ...@@ -1177,8 +1177,10 @@ int RegExpMacroAssemblerPPC::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState( return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<intptr_t>(re_frame, kStartIndex), frame_entry<intptr_t>(re_frame, kStartIndex),
frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address, static_cast<RegExp::CallOrigin>(
re_code, frame_entry_address<Address>(re_frame, kInputString), frame_entry<intptr_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd)); frame_entry_address<const byte*>(re_frame, kInputEnd));
} }
......
...@@ -141,7 +141,9 @@ class BacktrackStack { ...@@ -141,7 +141,9 @@ class BacktrackStack {
namespace { namespace {
IrregexpInterpreter::Result StackOverflow(Isolate* isolate) { IrregexpInterpreter::Result StackOverflow(Isolate* isolate,
RegExp::CallOrigin call_origin) {
CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
// We abort interpreter execution after the stack overflow is thrown, and thus // We abort interpreter execution after the stack overflow is thrown, and thus
// allow allocation here despite the outer DisallowHeapAllocationScope. // allow allocation here despite the outer DisallowHeapAllocationScope.
AllowHeapAllocation yes_gc; AllowHeapAllocation yes_gc;
...@@ -149,72 +151,97 @@ IrregexpInterpreter::Result StackOverflow(Isolate* isolate) { ...@@ -149,72 +151,97 @@ IrregexpInterpreter::Result StackOverflow(Isolate* isolate) {
return IrregexpInterpreter::EXCEPTION; return IrregexpInterpreter::EXCEPTION;
} }
// Runs all pending interrupts. Callers must update unhandlified object template <typename Char>
// references after this function completes. void UpdateCodeAndSubjectReferences(
IrregexpInterpreter::Result HandleInterrupts(Isolate* isolate, Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject_string) { Handle<String> subject_string, ByteArray* code_array_out,
const byte** code_base_out, const byte** pc_out, String* subject_string_out,
Vector<const Char>* subject_string_vector_out) {
DisallowHeapAllocation no_gc; DisallowHeapAllocation no_gc;
StackLimitCheck check(isolate); if (*code_base_out != code_array->GetDataStartAddress()) {
if (check.JsHasOverflowed()) { *code_array_out = *code_array;
return StackOverflow(isolate); // A real stack overflow. const intptr_t pc_offset = *pc_out - *code_base_out;
DCHECK_GT(pc_offset, 0);
*code_base_out = code_array->GetDataStartAddress();
*pc_out = *code_base_out + pc_offset;
} }
// Handle interrupts if any exist. DCHECK(subject_string->IsFlat());
if (check.InterruptRequested()) { *subject_string_out = *subject_string;
const bool was_one_byte = *subject_string_vector_out = subject_string->GetCharVector<Char>(no_gc);
String::IsOneByteRepresentationUnderneath(*subject_string); }
Object result; // Runs all pending interrupts and updates unhandlified object references if
{ // necessary.
AllowHeapAllocation yes_gc; template <typename Char>
result = isolate->stack_guard()->HandleInterrupts(); IrregexpInterpreter::Result HandleInterrupts(
} Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
String* subject_string_out, const byte** code_base_out,
Vector<const Char>* subject_string_vector_out, const byte** pc_out) {
DisallowHeapAllocation no_gc;
if (result.IsException(isolate)) { StackLimitCheck check(isolate);
bool js_has_overflowed = check.JsHasOverflowed();
if (call_origin == RegExp::CallOrigin::kFromJs) {
// Direct calls from JavaScript can be interrupted in two ways:
// 1. A real stack overflow, in which case we let the caller throw the
// exception.
// 2. The stack guard was used to interrupt execution for another purpose,
// forcing the call through the runtime system.
if (js_has_overflowed) {
return IrregexpInterpreter::EXCEPTION; return IrregexpInterpreter::EXCEPTION;
} } else if (check.InterruptRequested()) {
// If we changed between a LATIN1 and a UC16 string, we need to restart
// regexp matching with the appropriate template instantiation of RawMatch.
if (String::IsOneByteRepresentationUnderneath(*subject_string) !=
was_one_byte) {
return IrregexpInterpreter::RETRY; return IrregexpInterpreter::RETRY;
} }
} else {
DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
// Prepare for possible GC.
HandleScope handles(isolate);
Handle<ByteArray> code_handle(*code_array_out, isolate);
Handle<String> subject_handle(*subject_string_out, isolate);
if (js_has_overflowed) {
return StackOverflow(isolate, call_origin);
} else if (check.InterruptRequested()) {
const bool was_one_byte =
String::IsOneByteRepresentationUnderneath(*subject_string_out);
Object result;
{
AllowHeapAllocation yes_gc;
result = isolate->stack_guard()->HandleInterrupts();
}
if (result.IsException(isolate)) {
return IrregexpInterpreter::EXCEPTION;
}
// If we changed between a LATIN1 and a UC16 string, we need to restart
// regexp matching with the appropriate template instantiation of
// RawMatch.
if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
was_one_byte) {
return IrregexpInterpreter::RETRY;
}
UpdateCodeAndSubjectReferences(
isolate, code_handle, subject_handle, code_array_out, code_base_out,
pc_out, subject_string_out, subject_string_vector_out);
}
} }
return IrregexpInterpreter::SUCCESS; return IrregexpInterpreter::SUCCESS;
} }
template <typename Char> template <typename Char>
void UpdateCodeAndSubjectReferences(Isolate* isolate, IrregexpInterpreter::Result RawMatch(Isolate* isolate, ByteArray code_array,
Handle<ByteArray> code_array, String subject_string,
Handle<String> subject_string,
const byte** code_base_out,
const byte** pc_out,
Vector<const Char>* subject_string_out) {
DisallowHeapAllocation no_gc;
if (*code_base_out != code_array->GetDataStartAddress()) {
const intptr_t pc_offset = *pc_out - *code_base_out;
DCHECK_GT(pc_offset, 0);
*code_base_out = code_array->GetDataStartAddress();
*pc_out = *code_base_out + pc_offset;
}
DCHECK(subject_string->IsFlat());
*subject_string_out = subject_string->GetCharVector<Char>(no_gc);
}
template <typename Char>
IrregexpInterpreter::Result RawMatch(Isolate* isolate,
Handle<ByteArray> code_array,
Handle<String> subject_string,
Vector<const Char> subject, int* registers, Vector<const Char> subject, int* registers,
int current, uint32_t current_char) { int current, uint32_t current_char,
RegExp::CallOrigin call_origin) {
DisallowHeapAllocation no_gc; DisallowHeapAllocation no_gc;
const byte* pc = code_array->GetDataStartAddress(); const byte* pc = code_array.GetDataStartAddress();
const byte* code_base = pc; const byte* code_base = pc;
BacktrackStack backtrack_stack; BacktrackStack backtrack_stack;
...@@ -280,12 +307,10 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ...@@ -280,12 +307,10 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
} }
BYTECODE(POP_BT) { BYTECODE(POP_BT) {
IrregexpInterpreter::Result return_code = IrregexpInterpreter::Result return_code =
HandleInterrupts(isolate, subject_string); HandleInterrupts(isolate, call_origin, &code_array, &subject_string,
&code_base, &subject, &pc);
if (return_code != IrregexpInterpreter::SUCCESS) return return_code; if (return_code != IrregexpInterpreter::SUCCESS) return return_code;
UpdateCodeAndSubjectReferences(isolate, code_array, subject_string,
&code_base, &pc, &subject);
pc = code_base + backtrack_stack.pop(); pc = code_base + backtrack_stack.pop();
break; break;
} }
...@@ -649,32 +674,67 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate, ...@@ -649,32 +674,67 @@ IrregexpInterpreter::Result RawMatch(Isolate* isolate,
// static // static
IrregexpInterpreter::Result IrregexpInterpreter::Match( IrregexpInterpreter::Result IrregexpInterpreter::Match(
Isolate* isolate, Handle<ByteArray> code_array, Isolate* isolate, ByteArray code_array, String subject_string,
Handle<String> subject_string, int* registers, int start_position) { int* registers, int registers_length, int start_position,
DCHECK(subject_string->IsFlat()); RegExp::CallOrigin call_origin) {
DCHECK(subject_string.IsFlat());
// Note: Heap allocation *is* allowed in two situations: // Note: Heap allocation *is* allowed in two situations if calling from
// Runtime:
// 1. When creating & throwing a stack overflow exception. The interpreter // 1. When creating & throwing a stack overflow exception. The interpreter
// aborts afterwards, and thus possible-moved objects are never used. // aborts afterwards, and thus possible-moved objects are never used.
// 2. When handling interrupts. We manually relocate unhandlified references // 2. When handling interrupts. We manually relocate unhandlified references
// after interrupts have run. // after interrupts have run.
DisallowHeapAllocation no_gc; DisallowHeapAllocation no_gc;
// Reset registers to -1 (=undefined).
// This is necessary because registers are only written when a
// capture group matched.
// Resetting them ensures that previous matches are cleared.
memset(registers, -1, sizeof(registers[0]) * registers_length);
uc16 previous_char = '\n'; uc16 previous_char = '\n';
String::FlatContent subject_content = subject_string->GetFlatContent(no_gc); String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
if (subject_content.IsOneByte()) { if (subject_content.IsOneByte()) {
Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector(); Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
if (start_position != 0) previous_char = subject_vector[start_position - 1]; if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate, code_array, subject_string, subject_vector, return RawMatch(isolate, code_array, subject_string, subject_vector,
registers, start_position, previous_char); registers, start_position, previous_char, call_origin);
} else { } else {
DCHECK(subject_content.IsTwoByte()); DCHECK(subject_content.IsTwoByte());
Vector<const uc16> subject_vector = subject_content.ToUC16Vector(); Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
if (start_position != 0) previous_char = subject_vector[start_position - 1]; if (start_position != 0) previous_char = subject_vector[start_position - 1];
return RawMatch(isolate, code_array, subject_string, subject_vector, return RawMatch(isolate, code_array, subject_string, subject_vector,
registers, start_position, previous_char); registers, start_position, previous_char, call_origin);
} }
} }
// This method is called through an external reference from RegExpExecInternal
// builtin.
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
Isolate* isolate, Address code, Address subject, int* registers,
int32_t registers_length, int32_t start_position) {
DCHECK_NOT_NULL(isolate);
DCHECK_NOT_NULL(registers);
DisallowHeapAllocation no_gc;
DisallowJavascriptExecution no_js(isolate);
String subject_string = String::cast(Object(subject));
ByteArray code_array = ByteArray::cast(Object(code));
return Match(isolate, code_array, subject_string, registers, registers_length,
start_position, RegExp::CallOrigin::kFromJs);
}
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject_string, int* registers, int registers_length,
int start_position) {
return Match(isolate, *code_array, *subject_string, registers,
registers_length, start_position,
RegExp::CallOrigin::kFromRuntime);
}
} // namespace internal } // namespace internal
} // namespace v8 } // namespace v8
...@@ -12,7 +12,7 @@ ...@@ -12,7 +12,7 @@
namespace v8 { namespace v8 {
namespace internal { namespace internal {
class V8_EXPORT_PRIVATE IrregexpInterpreter { class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
public: public:
enum Result { enum Result {
FAILURE = RegExp::kInternalRegExpFailure, FAILURE = RegExp::kInternalRegExpFailure,
...@@ -21,10 +21,26 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter { ...@@ -21,10 +21,26 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter {
RETRY = RegExp::kInternalRegExpRetry, RETRY = RegExp::kInternalRegExpRetry,
}; };
// The caller is responsible for initializing registers before each call. // In case a StackOverflow occurs, a StackOverflowException is created and
static Result Match(Isolate* isolate, Handle<ByteArray> code_array, // EXCEPTION is returned.
Handle<String> subject_string, int* registers, static Result MatchForCallFromRuntime(Isolate* isolate,
int start_position); Handle<ByteArray> code_array,
Handle<String> subject_string,
int* registers, int registers_length,
int start_position);
// In case a StackOverflow occurs, EXCEPTION is returned. The caller is
// responsible for creating the exception.
static Result MatchForCallFromJs(Isolate* isolate, Address code,
Address subject, int* registers,
int32_t registers_length,
int32_t start_position);
private:
static Result Match(Isolate* isolate, ByteArray code_array,
String subject_string, int* registers,
int registers_length, int start_position,
RegExp::CallOrigin call_origin);
}; };
} // namespace internal } // namespace internal
......
...@@ -129,32 +129,46 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition( ...@@ -129,32 +129,46 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
} }
} }
// This method may only be called after an interrupt.
int NativeRegExpMacroAssembler::CheckStackGuardState( int NativeRegExpMacroAssembler::CheckStackGuardState(
Isolate* isolate, int start_index, bool is_direct_call, Isolate* isolate, int start_index, RegExp::CallOrigin call_origin,
Address* return_address, Code re_code, Address* subject, Address* return_address, Code re_code, Address* subject,
const byte** input_start, const byte** input_end) { const byte** input_start, const byte** input_end) {
DisallowHeapAllocation no_gc; DisallowHeapAllocation no_gc;
DCHECK(re_code.raw_instruction_start() <= *return_address); DCHECK(re_code.raw_instruction_start() <= *return_address);
DCHECK(*return_address <= re_code.raw_instruction_end()); DCHECK(*return_address <= re_code.raw_instruction_end());
int return_value = 0;
// Prepare for possible GC.
HandleScope handles(isolate);
Handle<Code> code_handle(re_code, isolate);
Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
StackLimitCheck check(isolate); StackLimitCheck check(isolate);
bool js_has_overflowed = check.JsHasOverflowed(); bool js_has_overflowed = check.JsHasOverflowed();
if (is_direct_call) { if (call_origin == RegExp::CallOrigin::kFromJs) {
// Direct calls from JavaScript can be interrupted in two ways: // Direct calls from JavaScript can be interrupted in two ways:
// 1. A real stack overflow, in which case we let the caller throw the // 1. A real stack overflow, in which case we let the caller throw the
// exception. // exception.
// 2. The stack guard was used to interrupt execution for another purpose, // 2. The stack guard was used to interrupt execution for another purpose,
// forcing the call through the runtime system. // forcing the call through the runtime system.
return_value = js_has_overflowed ? EXCEPTION : RETRY;
} else if (js_has_overflowed) { // Bug(v8:9540) Investigate why this method is called from JS although no
// stackoverflow or interrupt is pending on ARM64. We return 0 in this case
// to continue execution normally.
if (js_has_overflowed) {
return EXCEPTION;
} else if (check.InterruptRequested()) {
return RETRY;
} else {
return 0;
}
}
DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
// Prepare for possible GC.
HandleScope handles(isolate);
Handle<Code> code_handle(re_code, isolate);
Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
int return_value = 0;
if (js_has_overflowed) {
AllowHeapAllocation yes_gc; AllowHeapAllocation yes_gc;
isolate->StackOverflow(); isolate->StackOverflow();
return_value = EXCEPTION; return_value = EXCEPTION;
......
...@@ -235,9 +235,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler { ...@@ -235,9 +235,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
String subject, int start_index, const DisallowHeapAllocation& no_gc); String subject, int start_index, const DisallowHeapAllocation& no_gc);
static int CheckStackGuardState(Isolate* isolate, int start_index, static int CheckStackGuardState(Isolate* isolate, int start_index,
bool is_direct_call, Address* return_address, RegExp::CallOrigin call_origin,
Code re_code, Address* subject, Address* return_address, Code re_code,
const byte** input_start, Address* subject, const byte** input_start,
const byte** input_end); const byte** input_end);
// Byte map of one byte characters with a 0xff if the character is a word // Byte map of one byte characters with a 0xff if the character is a word
......
...@@ -478,17 +478,13 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp, ...@@ -478,17 +478,13 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
int32_t* raw_output = &output[number_of_capture_registers]; int32_t* raw_output = &output[number_of_capture_registers];
do { do {
// We do not touch the actual capture result registers until we know there
// has been a match so that we can use those capture results to set the
// last match info.
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
raw_output[i] = -1;
}
Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte), Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
isolate); isolate);
IrregexpInterpreter::Result result = IrregexpInterpreter::Match( IrregexpInterpreter::Result result =
isolate, byte_codes, subject, raw_output, index); IrregexpInterpreter::MatchForCallFromRuntime(
isolate, byte_codes, subject, raw_output,
number_of_capture_registers, index);
DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION, DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
isolate->has_pending_exception()); isolate->has_pending_exception());
......
...@@ -61,6 +61,11 @@ class RegExp final : public AllStatic { ...@@ -61,6 +61,11 @@ class RegExp final : public AllStatic {
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern, Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
JSRegExp::Flags flags); JSRegExp::Flags flags);
enum CallOrigin : int {
kFromRuntime = 0,
kFromJs = 1,
};
// See ECMA-262 section 15.10.6.2. // See ECMA-262 section 15.10.6.2.
// This function calls the garbage collector if necessary. // This function calls the garbage collector if necessary.
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec( V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
...@@ -73,7 +78,7 @@ class RegExp final : public AllStatic { ...@@ -73,7 +78,7 @@ class RegExp final : public AllStatic {
static constexpr int kInternalRegExpException = -1; static constexpr int kInternalRegExpException = -1;
static constexpr int kInternalRegExpRetry = -2; static constexpr int kInternalRegExpRetry = -2;
enum IrregexpResult { enum IrregexpResult : int32_t {
RE_FAILURE = kInternalRegExpFailure, RE_FAILURE = kInternalRegExpFailure,
RE_SUCCESS = kInternalRegExpSuccess, RE_SUCCESS = kInternalRegExpSuccess,
RE_EXCEPTION = kInternalRegExpException, RE_EXCEPTION = kInternalRegExpException,
......
...@@ -1120,8 +1120,10 @@ int RegExpMacroAssemblerS390::CheckStackGuardState(Address* return_address, ...@@ -1120,8 +1120,10 @@ int RegExpMacroAssemblerS390::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState( return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<intptr_t>(re_frame, kStartIndex), frame_entry<intptr_t>(re_frame, kStartIndex),
frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address, static_cast<RegExp::CallOrigin>(
re_code, frame_entry_address<Address>(re_frame, kInputString), frame_entry<intptr_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd)); frame_entry_address<const byte*>(re_frame, kInputEnd));
} }
......
...@@ -1198,7 +1198,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address, ...@@ -1198,7 +1198,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState( return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex), frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code, static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString), frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart), frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd)); frame_entry_address<const byte*>(re_frame, kInputEnd));
......
...@@ -1298,7 +1298,8 @@ TEST(MacroAssembler) { ...@@ -1298,7 +1298,8 @@ TEST(MacroAssembler) {
Handle<String> f1_16 = factory->NewStringFromTwoByte( Handle<String> f1_16 = factory->NewStringFromTwoByte(
Vector<const uc16>(str1, 6)).ToHandleChecked(); Vector<const uc16>(str1, 6)).ToHandleChecked();
CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0)); CHECK(IrregexpInterpreter::MatchForCallFromRuntime(isolate, array, f1_16,
captures, 5, 0));
CHECK_EQ(0, captures[0]); CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]); CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]); CHECK_EQ(1, captures[2]);
...@@ -1309,7 +1310,8 @@ TEST(MacroAssembler) { ...@@ -1309,7 +1310,8 @@ TEST(MacroAssembler) {
Handle<String> f2_16 = factory->NewStringFromTwoByte( Handle<String> f2_16 = factory->NewStringFromTwoByte(
Vector<const uc16>(str2, 6)).ToHandleChecked(); Vector<const uc16>(str2, 6)).ToHandleChecked();
CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0)); CHECK(!IrregexpInterpreter::MatchForCallFromRuntime(isolate, array, f2_16,
captures, 5, 0));
CHECK_EQ(42, captures[0]); CHECK_EQ(42, captures[0]);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment