Commit c2ee4a79 authored by Patrick Thier's avatar Patrick Thier Committed by Commit Bot

Reland "[regexp] Call the regexp interpreter without CEntry overhead"

This is a reland of d4d28b73

Original change's description:
> [regexp] Call the regexp interpreter without CEntry overhead
> 
> Previously all RegExp calls went through Runtime_RegExpExec when --regexp-interpret-all was set.
> 
> This CL avoids the runtime overhead by calling into the interpreter directly from the RegExpExec Builtin when the regular expression subject was already compiled to ByteCode (i.e. after the first call).
> 
> Bug: v8:8954
> Change-Id: Iae9dfcef3370b772a05b2942305335d592f6f15a
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1698391
> Commit-Queue: Patrick Thier <pthier@google.com>
> Reviewed-by: Jakob Gruber <jgruber@chromium.org>
> Reviewed-by: Peter Marshall <petermarshall@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#62753}

Bug: v8:8954
Change-Id: I1f0b6de9c6da65bcb582ddb41a37419116a5c510
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1706053Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Patrick Thier <pthier@google.com>
Cr-Commit-Position: refs/heads/master@{#62794}
parent 9165e22c
......@@ -335,7 +335,8 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
ToDirectStringAssembler to_direct(state(), string);
TVARIABLE(HeapObject, var_result);
Label out(this), atom(this), runtime(this, Label::kDeferred);
Label out(this), interpreted(this), atom(this),
runtime(this, Label::kDeferred);
// External constants.
TNode<ExternalReference> isolate_address =
......@@ -398,14 +399,6 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
&runtime);
}
// Ensure that a RegExp stack is allocated. This check is after branching off
// for ATOM regexps to avoid unnecessary trips to runtime.
{
TNode<IntPtrT> stack_size = UncheckedCast<IntPtrT>(
Load(MachineType::IntPtr(), regexp_stack_memory_size_address));
GotoIf(IntPtrEqual(stack_size, IntPtrZero()), &runtime);
}
// Unpack the string if possible.
to_direct.TryToDirect(&runtime);
......@@ -462,9 +455,16 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
#endif
GotoIf(TaggedIsSmi(var_code.value()), &runtime);
GotoIfNot(IsCode(CAST(var_code.value())), &runtime);
GotoIfNot(IsCode(CAST(var_code.value())), &interpreted);
TNode<Code> code = CAST(var_code.value());
// Ensure that a RegExp stack is allocated when using compiled Irregexp.
{
TNode<IntPtrT> stack_size = UncheckedCast<IntPtrT>(
Load(MachineType::IntPtr(), regexp_stack_memory_size_address));
GotoIf(IntPtrEqual(stack_size, IntPtrZero()), &runtime);
}
Label if_success(this), if_exception(this, Label::kDeferred);
{
IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
......@@ -553,6 +553,74 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
Goto(&runtime);
}
BIND(&interpreted);
{
TNode<ByteArray> byte_code = CAST(var_code.value());
IncrementCounter(isolate()->counters()->regexp_entry_native(), 1);
// Set up args for the final call into IrregexpInterpreter.
MachineType type_int32 = MachineType::Int32();
MachineType type_tagged = MachineType::AnyTagged();
MachineType type_ptr = MachineType::Pointer();
// Result: A IrregexpInterpreter::Result return code.
MachineType retval_type = type_int32;
// Argument 0: Pass current isolate address.
MachineType arg0_type = type_ptr;
TNode<ExternalReference> arg0 = isolate_address;
// Argument 1: Pattern ByteCode.
MachineType arg1_type = type_tagged;
TNode<ByteArray> arg1 = byte_code;
// Argument 2: Original subject string.
MachineType arg2_type = type_tagged;
TNode<String> arg2 = string;
// Argument 3: Static offsets vector buffer.
MachineType arg3_type = type_ptr;
TNode<ExternalReference> arg3 = static_offsets_vector_address;
// Argument 4: Length of static offsets vector buffer.
TNode<Smi> capture_count = CAST(UnsafeLoadFixedArrayElement(
data, JSRegExp::kIrregexpCaptureCountIndex));
TNode<Smi> register_count =
SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
MachineType arg4_type = type_int32;
TNode<Int32T> arg4 = SmiToInt32(register_count);
// Argument 5: Previous index.
MachineType arg5_type = type_int32;
TNode<Int32T> arg5 = TruncateIntPtrToInt32(int_last_index);
TNode<ExternalReference> code_entry = ExternalConstant(
ExternalReference::re_match_for_call_from_js(isolate()));
TNode<Int32T> result = UncheckedCast<Int32T>(CallCFunction(
code_entry, retval_type, std::make_pair(arg0_type, arg0),
std::make_pair(arg1_type, arg1), std::make_pair(arg2_type, arg2),
std::make_pair(arg3_type, arg3), std::make_pair(arg4_type, arg4),
std::make_pair(arg5_type, arg5)));
TNode<IntPtrT> int_result = ChangeInt32ToIntPtr(result);
GotoIf(
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpSuccess)),
&if_success);
GotoIf(
IntPtrEqual(int_result, IntPtrConstant(RegExp::kInternalRegExpFailure)),
&if_failure);
GotoIf(IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpException)),
&if_exception);
CSA_ASSERT(this, IntPtrEqual(int_result,
IntPtrConstant(RegExp::kInternalRegExpRetry)));
Goto(&runtime);
}
BIND(&if_success);
{
// Check that the last match info has space for the capture registers and
......
......@@ -4,6 +4,8 @@
specific_include_rules = {
"external-reference.cc": [
# Required to call IrregexpInterpreter::NativeMatch from builtin.
"+src/regexp/regexp-interpreter.h",
"+src/regexp/regexp-macro-assembler-arch.h",
],
}
......@@ -26,6 +26,7 @@
#include "src/logging/log.h"
#include "src/numbers/math-random.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/regexp-interpreter.h"
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-stack.h"
#include "src/strings/string-search.h"
......@@ -481,6 +482,9 @@ FUNCTION_REFERENCE_WITH_ISOLATE(re_check_stack_guard_state, re_stack_check_func)
FUNCTION_REFERENCE_WITH_ISOLATE(re_grow_stack,
NativeRegExpMacroAssembler::GrowStack)
FUNCTION_REFERENCE_WITH_ISOLATE(re_match_for_call_from_js,
IrregexpInterpreter::MatchForCallFromJs)
FUNCTION_REFERENCE_WITH_ISOLATE(
re_case_insensitive_compare_uc16,
NativeRegExpMacroAssembler::CaseInsensitiveCompareUC16)
......
......@@ -82,6 +82,7 @@ class StatsCounter;
V(re_check_stack_guard_state, \
"RegExpMacroAssembler*::CheckStackGuardState()") \
V(re_grow_stack, "NativeRegExpMacroAssembler::GrowStack()") \
V(re_match_for_call_from_js, "IrregexpInterpreter::MatchForCallFromJs") \
V(re_word_character_map, "NativeRegExpMacroAssembler::word_character_map")
#define EXTERNAL_REFERENCE_LIST(V) \
......
......@@ -1109,7 +1109,8 @@ int RegExpMacroAssemblerARM::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1326,8 +1326,9 @@ int RegExpMacroAssemblerARM64::CheckStackGuardState(
Code re_code = Code::cast(Object(raw_code));
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate), start_index,
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
frame_entry_address<Address>(re_frame, kInput), input_start, input_end);
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code, frame_entry_address<Address>(re_frame, kInput),
input_start, input_end);
}
......
......@@ -1120,7 +1120,8 @@ int RegExpMacroAssemblerIA32::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1176,7 +1176,8 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1213,7 +1213,9 @@ int64_t RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
static_cast<int>(frame_entry<int64_t>(re_frame, kStartIndex)),
frame_entry<int64_t>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(
frame_entry<int64_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1177,8 +1177,10 @@ int RegExpMacroAssemblerPPC::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<intptr_t>(re_frame, kStartIndex),
frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address,
re_code, frame_entry_address<Address>(re_frame, kInputString),
static_cast<RegExp::CallOrigin>(
frame_entry<intptr_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
}
......
This diff is collapsed.
......@@ -12,7 +12,7 @@
namespace v8 {
namespace internal {
class V8_EXPORT_PRIVATE IrregexpInterpreter {
class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
public:
enum Result {
FAILURE = RegExp::kInternalRegExpFailure,
......@@ -21,10 +21,26 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter {
RETRY = RegExp::kInternalRegExpRetry,
};
// The caller is responsible for initializing registers before each call.
static Result Match(Isolate* isolate, Handle<ByteArray> code_array,
Handle<String> subject_string, int* registers,
// In case a StackOverflow occurs, a StackOverflowException is created and
// EXCEPTION is returned.
static Result MatchForCallFromRuntime(Isolate* isolate,
Handle<ByteArray> code_array,
Handle<String> subject_string,
int* registers, int registers_length,
int start_position);
// In case a StackOverflow occurs, EXCEPTION is returned. The caller is
// responsible for creating the exception.
static Result MatchForCallFromJs(Isolate* isolate, Address code,
Address subject, int* registers,
int32_t registers_length,
int32_t start_position);
private:
static Result Match(Isolate* isolate, ByteArray code_array,
String subject_string, int* registers,
int registers_length, int start_position,
RegExp::CallOrigin call_origin);
};
} // namespace internal
......
......@@ -129,32 +129,37 @@ const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
}
}
// This method may only be called after an interrupt.
int NativeRegExpMacroAssembler::CheckStackGuardState(
Isolate* isolate, int start_index, bool is_direct_call,
Isolate* isolate, int start_index, RegExp::CallOrigin call_origin,
Address* return_address, Code re_code, Address* subject,
const byte** input_start, const byte** input_end) {
DisallowHeapAllocation no_gc;
DCHECK(re_code.raw_instruction_start() <= *return_address);
DCHECK(*return_address <= re_code.raw_instruction_end());
int return_value = 0;
// Prepare for possible GC.
HandleScope handles(isolate);
Handle<Code> code_handle(re_code, isolate);
Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
StackLimitCheck check(isolate);
bool js_has_overflowed = check.JsHasOverflowed();
if (is_direct_call) {
if (call_origin == RegExp::CallOrigin::kFromJs) {
// Direct calls from JavaScript can be interrupted in two ways:
// 1. A real stack overflow, in which case we let the caller throw the
// exception.
// 2. The stack guard was used to interrupt execution for another purpose,
// forcing the call through the runtime system.
return_value = js_has_overflowed ? EXCEPTION : RETRY;
} else if (js_has_overflowed) {
DCHECK(js_has_overflowed || check.InterruptRequested());
return js_has_overflowed ? EXCEPTION : RETRY;
}
DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
// Prepare for possible GC.
HandleScope handles(isolate);
Handle<Code> code_handle(re_code, isolate);
Handle<String> subject_handle(String::cast(Object(*subject)), isolate);
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject_handle);
int return_value = 0;
if (js_has_overflowed) {
AllowHeapAllocation yes_gc;
isolate->StackOverflow();
return_value = EXCEPTION;
......
......@@ -235,9 +235,9 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
String subject, int start_index, const DisallowHeapAllocation& no_gc);
static int CheckStackGuardState(Isolate* isolate, int start_index,
bool is_direct_call, Address* return_address,
Code re_code, Address* subject,
const byte** input_start,
RegExp::CallOrigin call_origin,
Address* return_address, Code re_code,
Address* subject, const byte** input_start,
const byte** input_end);
// Byte map of one byte characters with a 0xff if the character is a word
......
......@@ -478,17 +478,13 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
int32_t* raw_output = &output[number_of_capture_registers];
do {
// We do not touch the actual capture result registers until we know there
// has been a match so that we can use those capture results to set the
// last match info.
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
raw_output[i] = -1;
}
Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
isolate);
IrregexpInterpreter::Result result = IrregexpInterpreter::Match(
isolate, byte_codes, subject, raw_output, index);
IrregexpInterpreter::Result result =
IrregexpInterpreter::MatchForCallFromRuntime(
isolate, byte_codes, subject, raw_output,
number_of_capture_registers, index);
DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
isolate->has_pending_exception());
......
......@@ -61,6 +61,11 @@ class RegExp final : public AllStatic {
Isolate* isolate, Handle<JSRegExp> re, Handle<String> pattern,
JSRegExp::Flags flags);
enum CallOrigin : int {
kFromRuntime = 0,
kFromJs = 1,
};
// See ECMA-262 section 15.10.6.2.
// This function calls the garbage collector if necessary.
V8_EXPORT_PRIVATE V8_WARN_UNUSED_RESULT static MaybeHandle<Object> Exec(
......@@ -73,7 +78,7 @@ class RegExp final : public AllStatic {
static constexpr int kInternalRegExpException = -1;
static constexpr int kInternalRegExpRetry = -2;
enum IrregexpResult {
enum IrregexpResult : int32_t {
RE_FAILURE = kInternalRegExpFailure,
RE_SUCCESS = kInternalRegExpSuccess,
RE_EXCEPTION = kInternalRegExpException,
......
......@@ -1120,8 +1120,10 @@ int RegExpMacroAssemblerS390::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<intptr_t>(re_frame, kStartIndex),
frame_entry<intptr_t>(re_frame, kDirectCall) == 1, return_address,
re_code, frame_entry_address<Address>(re_frame, kInputString),
static_cast<RegExp::CallOrigin>(
frame_entry<intptr_t>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
}
......
......@@ -1198,7 +1198,8 @@ int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
return NativeRegExpMacroAssembler::CheckStackGuardState(
frame_entry<Isolate*>(re_frame, kIsolate),
frame_entry<int>(re_frame, kStartIndex),
frame_entry<int>(re_frame, kDirectCall) == 1, return_address, re_code,
static_cast<RegExp::CallOrigin>(frame_entry<int>(re_frame, kDirectCall)),
return_address, re_code,
frame_entry_address<Address>(re_frame, kInputString),
frame_entry_address<const byte*>(re_frame, kInputStart),
frame_entry_address<const byte*>(re_frame, kInputEnd));
......
......@@ -1298,7 +1298,8 @@ TEST(MacroAssembler) {
Handle<String> f1_16 = factory->NewStringFromTwoByte(
Vector<const uc16>(str1, 6)).ToHandleChecked();
CHECK(IrregexpInterpreter::Match(isolate, array, f1_16, captures, 0));
CHECK(IrregexpInterpreter::MatchForCallFromRuntime(isolate, array, f1_16,
captures, 5, 0));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
......@@ -1309,7 +1310,8 @@ TEST(MacroAssembler) {
Handle<String> f2_16 = factory->NewStringFromTwoByte(
Vector<const uc16>(str2, 6)).ToHandleChecked();
CHECK(!IrregexpInterpreter::Match(isolate, array, f2_16, captures, 0));
CHECK(!IrregexpInterpreter::MatchForCallFromRuntime(isolate, array, f2_16,
captures, 5, 0));
CHECK_EQ(42, captures[0]);
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment