Commit 213504b9 authored by Patrick Thier's avatar Patrick Thier Committed by Commit Bot

[regexp] Consolidate calls to jitted irregexp and regexp interpreter

The code fields in a JSRegExp object now either contain irregexp
compiled code or a trampoline to the interpreter. This way the code
can be executed without explicitly checking if the regexp shall be
interpreted or executed natively.
In case of interpreted regexp the generated bytecode is now stored in
its own fields instead of the code fields for Latin1 and UC16
respectively.
The signatures of the jitted irregexp match and the regexp interpreter
have been equalized.

Bug: v8:9516
Change-Id: I30e3d86f4702a902d3387bccc1ee91dea501fe4e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1762513
Commit-Queue: Patrick Thier <pthier@google.com>
Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63457}
parent 37a4937b
......@@ -863,6 +863,7 @@ namespace internal {
/* RegExp helpers */ \
TFS(RegExpExecAtom, kRegExp, kString, kLastIndex, kMatchInfo) \
TFS(RegExpExecInternal, kRegExp, kString, kLastIndex, kMatchInfo) \
ASM(RegExpInterpreterTrampoline, CCall) \
TFS(RegExpMatchFast, kReceiver, kPattern) \
TFS(RegExpPrototypeExecSlow, kReceiver, kString) \
TFS(RegExpSearchFast, kReceiver, kPattern) \
......
This diff is collapsed.
......@@ -30,6 +30,8 @@ class RegExpBuiltinsAssembler : public CodeStubAssembler {
TNode<Smi> SmiZero();
TNode<IntPtrT> IntPtrZero();
TNode<RawPtrT> LoadCodeObjectEntry(TNode<Code> code);
// Allocate a RegExpResult with the given length (the number of captures,
// including the match itself), index (the index where the match starts),
// and input string.
......
......@@ -217,6 +217,13 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
Jump(static_cast<intptr_t>(code.address()), rmode, cond);
}
void TurboAssembler::Jump(const ExternalReference& reference) {
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
Move(scratch, reference);
Jump(scratch);
}
void TurboAssembler::Call(Register target, Condition cond) {
// Block constant pool for the call instruction sequence.
BlockConstPoolScope block_const_pool(this);
......
......@@ -409,6 +409,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Jump(Register target, Condition cond = al);
void Jump(Address target, RelocInfo::Mode rmode, Condition cond = al);
void Jump(Handle<Code> code, RelocInfo::Mode rmode, Condition cond = al);
void Jump(const ExternalReference& reference) override;
// Perform a floating-point min or max operation with the
// (IEEE-754-compatible) semantics of ARM64's fmin/fmax. Some cases, typically
......
......@@ -1867,6 +1867,13 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
}
}
void TurboAssembler::Jump(const ExternalReference& reference) {
UseScratchRegisterScope temps(this);
Register scratch = temps.AcquireX();
Mov(scratch, reference);
Jump(scratch);
}
void TurboAssembler::Call(Register target) {
BlockPoolsScope scope(this);
Blr(target);
......
......@@ -889,6 +889,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Jump(Register target, Condition cond = al);
void Jump(Address target, RelocInfo::Mode rmode, Condition cond = al);
void Jump(Handle<Code> code, RelocInfo::Mode rmode, Condition cond = al);
void Jump(const ExternalReference& reference) override;
void Call(Register target);
void Call(Address target, RelocInfo::Mode rmode);
......
......@@ -1957,6 +1957,12 @@ void TurboAssembler::JumpCodeObject(Register code_object) {
jmp(code_object);
}
void TurboAssembler::Jump(const ExternalReference& reference) {
DCHECK(root_array_available());
jmp(Operand(kRootRegister, RootRegisterOffsetForExternalReferenceTableEntry(
isolate(), reference)));
}
void TurboAssembler::Jump(Handle<Code> code_object, RelocInfo::Mode rmode) {
DCHECK_IMPLIES(options().isolate_independent_code,
Builtins::IsIsolateIndependentBuiltin(*code_object));
......
......@@ -96,6 +96,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void LoadCodeObjectEntry(Register destination, Register code_object) override;
void CallCodeObject(Register code_object) override;
void JumpCodeObject(Register code_object) override;
void Jump(const ExternalReference& reference) override;
void RetpolineCall(Register reg);
void RetpolineCall(Address destination, RelocInfo::Mode rmode);
......
......@@ -50,6 +50,8 @@ class V8_EXPORT_PRIVATE TurboAssemblerBase : public Assembler {
void set_has_frame(bool v) { has_frame_ = v; }
bool has_frame() const { return has_frame_; }
virtual void Jump(const ExternalReference& reference) = 0;
// Calls the builtin given by the Smi in |builtin|. If builtins are embedded,
// the trampoline Code object on the heap is not used.
virtual void CallBuiltinByIndex(Register builtin_index) = 0;
......
......@@ -1524,9 +1524,10 @@ void MacroAssembler::Pop(Operand dst) { popq(dst); }
void MacroAssembler::PopQuad(Operand dst) { popq(dst); }
void TurboAssembler::Jump(ExternalReference ext) {
LoadAddress(kScratchRegister, ext);
jmp(kScratchRegister);
void TurboAssembler::Jump(const ExternalReference& reference) {
DCHECK(root_array_available());
jmp(Operand(kRootRegister, RootRegisterOffsetForExternalReferenceTableEntry(
isolate(), reference)));
}
void TurboAssembler::Jump(Operand op) { jmp(op); }
......
......@@ -354,7 +354,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void RetpolineCall(Address destination, RelocInfo::Mode rmode);
void Jump(Address destination, RelocInfo::Mode rmode);
void Jump(ExternalReference ext);
void Jump(const ExternalReference& reference) override;
void Jump(Operand op);
void Jump(Handle<Code> code_object, RelocInfo::Mode rmode,
Condition cc = always);
......
......@@ -1448,22 +1448,34 @@ void JSRegExp::JSRegExpVerify(Isolate* isolate) {
break;
}
case JSRegExp::IRREGEXP: {
bool can_be_native = RegExp::CanGenerateNativeCode();
bool can_be_interpreted = RegExp::CanGenerateBytecode();
FixedArray arr = FixedArray::cast(data());
Object one_byte_data = arr.get(JSRegExp::kIrregexpLatin1CodeIndex);
// Smi : Not compiled yet (-1).
// Code/ByteArray: Compiled code.
// Code: Compiled irregexp code or trampoline to the interpreter.
CHECK((one_byte_data.IsSmi() &&
Smi::ToInt(one_byte_data) == JSRegExp::kUninitializedValue) ||
(can_be_interpreted && one_byte_data.IsByteArray()) ||
(can_be_native && one_byte_data.IsCode()));
one_byte_data.IsCode());
Object uc16_data = arr.get(JSRegExp::kIrregexpUC16CodeIndex);
CHECK((uc16_data.IsSmi() &&
Smi::ToInt(uc16_data) == JSRegExp::kUninitializedValue) ||
(can_be_interpreted && uc16_data.IsByteArray()) ||
(can_be_native && uc16_data.IsCode()));
uc16_data.IsCode());
Object one_byte_bytecode =
arr.get(JSRegExp::kIrregexpLatin1BytecodeIndex);
// Smi : Not compiled yet (-1).
// ByteArray: Bytecode to interpret regexp.
CHECK((one_byte_bytecode.IsSmi() &&
Smi::ToInt(one_byte_bytecode) == JSRegExp::kUninitializedValue) ||
(can_be_interpreted && one_byte_bytecode.IsByteArray()));
Object uc16_bytecode = arr.get(JSRegExp::kIrregexpUC16BytecodeIndex);
CHECK((uc16_bytecode.IsSmi() &&
Smi::ToInt(uc16_bytecode) == JSRegExp::kUninitializedValue) ||
(can_be_interpreted && uc16_bytecode.IsByteArray()));
CHECK_IMPLIES(one_byte_data.IsSmi(), one_byte_bytecode.IsSmi());
CHECK_IMPLIES(uc16_data.IsSmi(), uc16_bytecode.IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpCaptureCountIndex).IsSmi());
CHECK(arr.get(JSRegExp::kIrregexpMaxRegisterCountIndex).IsSmi());
......
......@@ -3899,6 +3899,8 @@ void Factory::SetRegExpIrregexpData(Handle<JSRegExp> regexp,
store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags));
store->set(JSRegExp::kIrregexpLatin1CodeIndex, uninitialized);
store->set(JSRegExp::kIrregexpUC16CodeIndex, uninitialized);
store->set(JSRegExp::kIrregexpLatin1BytecodeIndex, uninitialized);
store->set(JSRegExp::kIrregexpUC16BytecodeIndex, uninitialized);
store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::kZero);
store->set(JSRegExp::kIrregexpCaptureCountIndex, Smi::FromInt(capture_count));
store->set(JSRegExp::kIrregexpCaptureNameMapIndex, uninitialized);
......
......@@ -80,23 +80,28 @@ void JSRegExp::SetDataAt(int index, Object value) {
bool JSRegExp::HasCompiledCode() const {
if (TypeTag() != IRREGEXP) return false;
Smi uninitialized = Smi::FromInt(kUninitializedValue);
#ifdef DEBUG
DCHECK(DataAt(kIrregexpLatin1CodeIndex).IsCode() ||
DataAt(kIrregexpLatin1CodeIndex).IsByteArray() ||
DataAt(kIrregexpLatin1CodeIndex) == Smi::FromInt(kUninitializedValue));
DataAt(kIrregexpLatin1CodeIndex) == uninitialized);
DCHECK(DataAt(kIrregexpUC16CodeIndex).IsCode() ||
DataAt(kIrregexpUC16CodeIndex).IsByteArray() ||
DataAt(kIrregexpUC16CodeIndex) == Smi::FromInt(kUninitializedValue));
DataAt(kIrregexpUC16CodeIndex) == uninitialized);
DCHECK(DataAt(kIrregexpLatin1BytecodeIndex).IsByteArray() ||
DataAt(kIrregexpLatin1BytecodeIndex) == uninitialized);
DCHECK(DataAt(kIrregexpUC16BytecodeIndex).IsByteArray() ||
DataAt(kIrregexpUC16BytecodeIndex) == uninitialized);
#endif // DEBUG
Smi uninitialized = Smi::FromInt(kUninitializedValue);
return (DataAt(kIrregexpLatin1CodeIndex) != uninitialized ||
DataAt(kIrregexpUC16CodeIndex) != uninitialized);
}
void JSRegExp::DiscardCompiledCodeForSerialization() {
DCHECK(HasCompiledCode());
SetDataAt(kIrregexpLatin1CodeIndex, Smi::FromInt(kUninitializedValue));
SetDataAt(kIrregexpUC16CodeIndex, Smi::FromInt(kUninitializedValue));
Smi uninitialized = Smi::FromInt(kUninitializedValue);
SetDataAt(kIrregexpLatin1CodeIndex, uninitialized);
SetDataAt(kIrregexpUC16CodeIndex, uninitialized);
SetDataAt(kIrregexpLatin1BytecodeIndex, uninitialized);
SetDataAt(kIrregexpUC16BytecodeIndex, uninitialized);
}
} // namespace internal
......
......@@ -103,6 +103,10 @@ class JSRegExp : public JSObject {
void MarkTierUpForNextExec();
inline Type TypeTag() const;
// Maximum number of captures allowed.
static constexpr int kMaxCaptures = 1 << 16;
// Number of captures (without the match itself).
inline int CaptureCount();
inline Flags GetFlags();
......@@ -112,16 +116,19 @@ class JSRegExp : public JSObject {
// Set implementation data after the object has been prepared.
inline void SetDataAt(int index, Object value);
static int code_index(bool is_latin1) {
if (is_latin1) {
return kIrregexpLatin1CodeIndex;
} else {
return kIrregexpUC16CodeIndex;
}
static constexpr int code_index(bool is_latin1) {
return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;
}
static constexpr int bytecode_index(bool is_latin1) {
return is_latin1 ? kIrregexpLatin1BytecodeIndex
: kIrregexpUC16BytecodeIndex;
}
// This could be a Smi kUninitializedValue, a ByteArray, or Code.
// This could be a Smi kUninitializedValue or Code.
Object Code(bool is_latin1) const;
// This could be a Smi kUninitializedValue or ByteArray.
Object Bytecode(bool is_latin1) const;
bool ShouldProduceBytecode();
inline bool HasCompiledCode() const;
inline void DiscardCompiledCodeForSerialization();
......@@ -151,23 +158,33 @@ class JSRegExp : public JSObject {
static const int kAtomDataSize = kAtomPatternIndex + 1;
// Irregexp compiled code or bytecode for Latin1. If compilation
// fails, this fields hold an exception object that should be
// Irregexp compiled code or trampoline to interpreter for Latin1. If
// compilation fails, this fields hold an exception object that should be
// thrown if the regexp is used again.
static const int kIrregexpLatin1CodeIndex = kDataIndex;
// Irregexp compiled code or bytecode for UC16. If compilation
// fails, this fields hold an exception object that should be
// Irregexp compiled code or trampoline to interpreter for UC16. If
// compilation fails, this fields hold an exception object that should be
// thrown if the regexp is used again.
static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
// Bytecode to interpret the regexp for Latin1. Contains kUninitializedValue
// if we haven't compiled the regexp yet, regexp are always compiled or if
// tier-up has happened (i.e. when kIrregexpLatin1CodeIndex contains native
// irregexp code).
static const int kIrregexpLatin1BytecodeIndex = kDataIndex + 2;
// Bytecode to interpret the regexp for UC16. Contains kUninitializedValue if
// we haven't compiled the regxp yet, regexp are always compiled or if tier-up
// has happened (i.e. when kIrregexpUC16CodeIndex contains native irregexp
// code).
static const int kIrregexpUC16BytecodeIndex = kDataIndex + 3;
// Maximal number of registers used by either Latin1 or UC16.
// Only used to check that there is enough stack space
static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2;
static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 4;
// Number of captures in the compiled regexp.
static const int kIrregexpCaptureCountIndex = kDataIndex + 3;
static const int kIrregexpCaptureCountIndex = kDataIndex + 5;
// Maps names of named capture groups (at indices 2i) to their corresponding
// (1-based) capture group indices (at indices 2i + 1).
static const int kIrregexpCaptureNameMapIndex = kDataIndex + 4;
static const int kIrregexpTierUpTicksIndex = kDataIndex + 5;
static const int kIrregexpCaptureNameMapIndex = kDataIndex + 6;
static const int kIrregexpTierUpTicksIndex = kDataIndex + 7;
static const int kIrregexpDataSize = kIrregexpTierUpTicksIndex + 1;
......
......@@ -6141,6 +6141,10 @@ Object JSRegExp::Code(bool is_latin1) const {
return DataAt(code_index(is_latin1));
}
Object JSRegExp::Bytecode(bool is_latin1) const {
return DataAt(bytecode_index(is_latin1));
}
bool JSRegExp::ShouldProduceBytecode() {
return FLAG_regexp_interpret_all ||
(FLAG_regexp_tier_up && !MarkedForTierUp());
......
......@@ -40,6 +40,9 @@ namespace internal {
* Each call to a public method should retain this convention.
*
* The stack will have the following structure:
* - fp[56] Address regexp (address of the JSRegExp object; unused in
* native code, passed to match signature of
* the interpreter)
* - fp[52] Isolate* isolate (address of the current isolate)
* - fp[48] direct_call (if 1, direct call from JavaScript code,
* if 0, call through the runtime system).
......@@ -83,7 +86,8 @@ namespace internal {
* int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
* Isolate* isolate);
* Isolate* isolate,
* Address regexp);
* The call is performed by NativeRegExpMacroAssembler::Execute()
* (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
......
......@@ -55,7 +55,10 @@ namespace internal {
* (as referred to in
* the code)
*
* - fp[96] isolate Address of the current isolate.
* - fp[104] Address regexp Address of the JSRegExp object. Unused in
* native code, passed to match signature of
* the interpreter.
* - fp[96] isolate Address of the current isolate.
* ^^^ sp when called ^^^
* - fp[88] lr Return from the RegExp code.
* - fp[80] r29 Old frame pointer (CalleeSaved).
......@@ -93,7 +96,8 @@ namespace internal {
* int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
* Isolate* isolate);
* Isolate* isolate,
* Address regexp);
* The call is performed by NativeRegExpMacroAssembler::Execute()
* (in regexp-macro-assembler.cc) via the GeneratedCode wrapper.
*/
......
......@@ -34,6 +34,9 @@ namespace internal {
*
* Each call to a public method should retain this convention.
* The stack will have the following structure:
* - Address regexp (address of the JSRegExp object; unused in
* native code, passed to match signature of
* the interpreter)
* - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0
* call through the runtime system)
......@@ -73,7 +76,8 @@ namespace internal {
* int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
* Isolate* isolate);
* Isolate* isolate
* Address regexp);
*/
#define __ ACCESS_MASM(masm_)
......
......@@ -516,6 +516,7 @@ class RegExpCompiler {
const char* const error_message = nullptr;
Object code;
Object bytecode;
int num_registers = 0;
};
......
......@@ -811,7 +811,7 @@ IrregexpInterpreter::Result IrregexpInterpreter::Match(
}
bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
ByteArray code_array = ByteArray::cast(regexp.Code(is_one_byte));
ByteArray code_array = ByteArray::cast(regexp.Bytecode(is_one_byte));
return MatchInternal(isolate, code_array, subject_string, registers,
registers_length, start_position, call_origin);
......@@ -856,10 +856,12 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
// This method is called through an external reference from RegExpExecInternal
// builtin.
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
Isolate* isolate, Address regexp, Address subject, int* registers,
int32_t registers_length, int32_t start_position) {
Address subject, int32_t start_position, Address, Address, int* registers,
int32_t registers_length, Address, RegExp::CallOrigin call_origin,
Isolate* isolate, Address regexp) {
DCHECK_NOT_NULL(isolate);
DCHECK_NOT_NULL(registers);
DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
DisallowHeapAllocation no_gc;
DisallowJavascriptExecution no_js(isolate);
......@@ -868,7 +870,7 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
return Match(isolate, regexp_obj, subject_string, registers, registers_length,
start_position, RegExp::CallOrigin::kFromJs);
start_position, call_origin);
}
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
......
......@@ -31,10 +31,15 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
// In case a StackOverflow occurs, EXCEPTION is returned. The caller is
// responsible for creating the exception.
static Result MatchForCallFromJs(Isolate* isolate, Address regexp,
Address subject, int* registers,
int32_t registers_length,
int32_t start_position);
// Arguments input_start, input_end and backtrack_stack are
// unused. They are only passed to match the signature of the native irregex
// code.
static Result MatchForCallFromJs(Address subject, int32_t start_position,
Address input_start, Address input_end,
int* registers, int32_t registers_length,
Address backtrack_stack,
RegExp::CallOrigin call_origin,
Isolate* isolate, Address regexp);
static Result MatchInternal(Isolate* isolate, ByteArray code_array,
String subject_string, int* registers,
......
......@@ -219,7 +219,7 @@ int NativeRegExpMacroAssembler::CheckStackGuardState(
}
// Returns a {Result} sentinel, or the number of successful matches.
int NativeRegExpMacroAssembler::Match(Handle<Code> regexp_code,
int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
Handle<String> subject,
int* offsets_vector,
int offsets_vector_length,
......@@ -262,31 +262,36 @@ int NativeRegExpMacroAssembler::Match(Handle<Code> regexp_code,
StringCharacterPosition(subject_ptr, start_offset + slice_offset, no_gc);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
return Execute(*regexp_code, *subject, start_offset, input_start, input_end,
offsets_vector, offsets_vector_length, isolate);
return Execute(*subject, start_offset, input_start, input_end, offsets_vector,
offsets_vector_length, isolate, *regexp);
}
// Returns a {Result} sentinel, or the number of successful matches.
// TODO(pthier): The JSRegExp object is passed to native irregexp code to match
// the signature of the interpreter. We should get rid of JS objects passed to
// internal methods.
int NativeRegExpMacroAssembler::Execute(
Code code,
String input, // This needs to be the unpacked (sliced, cons) string.
int start_offset, const byte* input_start, const byte* input_end,
int* output, int output_size, Isolate* isolate) {
int* output, int output_size, Isolate* isolate, JSRegExp regexp) {
// Ensure that the minimum stack has been allocated.
RegExpStackScope stack_scope(isolate);
Address stack_base = stack_scope.stack()->stack_base();
int direct_call = 0;
bool is_one_byte = String::IsOneByteRepresentationUnderneath(input);
Code code = Code::cast(regexp.Code(is_one_byte));
RegExp::CallOrigin call_origin = RegExp::CallOrigin::kFromRuntime;
using RegexpMatcherSig = int(
Address input_string, int start_offset, // NOLINT(readability/casting)
const byte* input_start, const byte* input_end, int* output,
int output_size, Address stack_base, int direct_call, Isolate* isolate);
int output_size, Address stack_base, int call_origin, Isolate* isolate,
Address regexp);
auto fn = GeneratedCode<RegexpMatcherSig>::FromCode(code);
int result =
fn.CallIrregexp(input.ptr(), start_offset, input_start, input_end, output,
output_size, stack_base, direct_call, isolate);
int result = fn.CallIrregexp(input.ptr(), start_offset, input_start,
input_end, output, output_size, stack_base,
call_origin, isolate, regexp.ptr());
DCHECK(result >= RETRY);
if (result == EXCEPTION && !isolate->has_pending_exception()) {
......
......@@ -223,7 +223,7 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
bool CanReadUnaligned() override;
// Returns a {Result} sentinel, or the number of successful matches.
static int Match(Handle<Code> regexp, Handle<String> subject,
static int Match(Handle<JSRegExp> regexp, Handle<String> subject,
int* offsets_vector, int offsets_vector_length,
int previous_index, Isolate* isolate);
......@@ -254,11 +254,11 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
}
// Returns a {Result} sentinel, or the number of successful matches.
V8_EXPORT_PRIVATE static int Execute(Code code, String input,
int start_offset,
V8_EXPORT_PRIVATE static int Execute(String input, int start_offset,
const byte* input_start,
const byte* input_end, int* output,
int output_size, Isolate* isolate);
int output_size, Isolate* isolate,
JSRegExp regexp);
};
} // namespace internal
......
......@@ -692,7 +692,7 @@ RegExpParser::RegExpParserState* RegExpParser::ParseOpenParenthesis(
}
}
if (subexpr_type == CAPTURE) {
if (captures_started_ >= kMaxCaptures) {
if (captures_started_ >= JSRegExp::kMaxCaptures) {
ReportError(CStrVector("Too many captures"));
return nullptr;
}
......@@ -800,7 +800,7 @@ bool RegExpParser::ParseBackReferenceIndex(int* index_out) {
uc32 c = current();
if (IsDecimalDigit(c)) {
value = 10 * value + (c - '0');
if (value > kMaxCaptures) {
if (value > JSRegExp::kMaxCaptures) {
Reset(start);
return false;
}
......
......@@ -221,7 +221,6 @@ class V8_EXPORT_PRIVATE RegExpParser {
static bool IsSyntaxCharacterOrSlash(uc32 c);
static const int kMaxCaptures = 1 << 16;
static const uc32 kEndMarker = (1 << 21);
private:
......
......@@ -301,13 +301,14 @@ bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte) {
Object compiled_code = re->Code(is_one_byte);
Object bytecode = re->Bytecode(is_one_byte);
bool needs_initial_compilation =
compiled_code == Smi::FromInt(JSRegExp::kUninitializedValue);
// Recompile is needed when we're dealing with the first execution of the
// regexp after the decision to tier up has been made. If the tiering up
// strategy is not in use, this value is always false.
bool needs_tier_up_compilation =
re->MarkedForTierUp() && compiled_code.IsByteArray();
re->MarkedForTierUp() && bytecode.IsByteArray();
if (FLAG_trace_regexp_tier_up && needs_tier_up_compilation) {
PrintF("JSRegExp object %p needs tier-up compilation\n",
......@@ -315,14 +316,12 @@ bool RegExpImpl::EnsureCompiledIrregexp(Isolate* isolate, Handle<JSRegExp> re,
}
if (!needs_initial_compilation && !needs_tier_up_compilation) {
// We expect bytecode here only if we're interpreting all regexps. In all
// other cases, we can only expect the compiled code to be native code.
DCHECK(FLAG_regexp_interpret_all ? compiled_code.IsByteArray()
: compiled_code.IsCode());
DCHECK(compiled_code.IsCode());
DCHECK_IMPLIES(FLAG_regexp_interpret_all, bytecode.IsByteArray());
return true;
}
DCHECK_IMPLIES(needs_tier_up_compilation, compiled_code.IsByteArray());
DCHECK_IMPLIES(needs_tier_up_compilation, bytecode.IsByteArray());
return CompileIrregexp(isolate, re, sample_subject, is_one_byte);
}
......@@ -332,20 +331,25 @@ namespace {
bool RegExpCodeIsValidForPreCompilation(Handle<JSRegExp> re, bool is_one_byte) {
Object entry = re->Code(is_one_byte);
Object bytecode = re->Bytecode(is_one_byte);
// If we're not using the tier-up strategy, entry can only be a smi
// representing an uncompiled regexp here. If we're using the tier-up
// strategy, entry can still be a smi representing an uncompiled regexp, when
// compiling the regexp before the tier-up, or it can contain previously
// compiled bytecode, when recompiling the regexp after the tier-up. If the
// compiling the regexp before the tier-up, or it can contain a trampoline to
// the regexp interpreter, in which case the bytecode field contains compiled
// bytecode, when recompiling the regexp after the tier-up. If the
// tier-up was forced, which happens for global replaces, entry is a smi
// representing an uncompiled regexp, even though we're "recompiling" after
// the tier-up.
if (re->ShouldProduceBytecode()) {
DCHECK(entry.IsSmi());
DCHECK(bytecode.IsSmi());
int entry_value = Smi::ToInt(entry);
int bytecode_value = Smi::ToInt(bytecode);
DCHECK_EQ(JSRegExp::kUninitializedValue, entry_value);
DCHECK_EQ(JSRegExp::kUninitializedValue, bytecode_value);
} else {
DCHECK(entry.IsByteArray() || entry.IsSmi());
DCHECK(entry.IsSmi() || (entry.IsCode() && bytecode.IsByteArray()));
}
return true;
......@@ -395,7 +399,22 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<FixedArray> data =
Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
data->set(JSRegExp::code_index(is_one_byte), compile_data.code);
if (compile_data.compilation_target == RegExpCompilationTarget::kNative) {
data->set(JSRegExp::code_index(is_one_byte), compile_data.code);
// Reset bytecode to uninitialized. In case we use tier-up we know that
// tier-up has happened this way.
data->set(JSRegExp::bytecode_index(is_one_byte),
Smi::FromInt(JSRegExp::kUninitializedValue));
} else {
DCHECK_EQ(compile_data.compilation_target,
RegExpCompilationTarget::kBytecode);
// Store code generated by compiler in bytecode and trampoline to
// interpreter in code.
data->set(JSRegExp::bytecode_index(is_one_byte), compile_data.code);
Handle<Code> trampoline =
BUILTIN_CODE(isolate, RegExpInterpreterTrampoline);
data->set(JSRegExp::code_index(is_one_byte), *trampoline);
}
SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map);
int register_max = IrregexpMaxRegisterCount(*data);
if (compile_data.register_count > register_max) {
......@@ -440,7 +459,7 @@ int RegExpImpl::IrregexpNumberOfRegisters(FixedArray re) {
}
ByteArray RegExpImpl::IrregexpByteCode(FixedArray re, bool is_one_byte) {
return ByteArray::cast(re.get(JSRegExp::code_index(is_one_byte)));
return ByteArray::cast(re.get(JSRegExp::bytecode_index(is_one_byte)));
}
Code RegExpImpl::IrregexpNativeCode(FixedArray re, bool is_one_byte) {
......@@ -498,12 +517,11 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
// The stack is used to allocate registers for the compiled regexp code.
// This means that in case of failure, the output registers array is left
// untouched and contains the capture results from the previous successful
// match. We can use that to set the last match info lazily.
int res = NativeRegExpMacroAssembler::Match(code, subject, output,
int res = NativeRegExpMacroAssembler::Match(regexp, subject, output,
output_size, index, isolate);
if (res != NativeRegExpMacroAssembler::RETRY) {
DCHECK(res != NativeRegExpMacroAssembler::EXCEPTION ||
......@@ -862,6 +880,7 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
data->error =
isolate->factory()->NewStringFromAsciiChecked(result.error_message);
}
data->code = result.code;
data->register_count = result.num_registers;
......
......@@ -23,8 +23,8 @@ struct RegExpCompileData {
// The compiled Node graph as produced by RegExpTree::ToNode methods.
RegExpNode* node = nullptr;
// The generated code as produced by the compiler. Either a Code object (for
// irregexp native code) or a ByteArray (for irregexp bytecode).
// Either the generated code as produced by the compiler or a trampoline
// to the interpreter.
Object code;
// True, iff the pattern is a 'simple' atom with zero captures. In other
......
......@@ -48,6 +48,8 @@ namespace internal {
*
* The stack will have the following content, in some order, indexable from the
* frame pointer (see, e.g., kStackHighEnd):
* - Address regexp (address of the JSRegExp object; unused in native
* code, passed to match signature of interpreter)
* - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0 call
* through the runtime system)
......@@ -75,9 +77,8 @@ namespace internal {
* "character -1" in the string (i.e., char_size() bytes before the first
* character of the string). The remaining registers starts out uninitialized.
*
* The first seven values must be provided by the calling code by
* calling the code's entry address cast to a function pointer with the
* following signature:
* The argument values must be provided by the calling code by calling the
* code's entry address cast to a function pointer with the following signature:
* int (*match)(String input_string,
* int start_index,
* Address start,
......@@ -86,7 +87,8 @@ namespace internal {
* int num_capture_registers,
* byte* stack_area_base,
* bool direct_call = false,
* Isolate* isolate);
* Isolate* isolate,
* Address regexp);
*/
#define __ ACCESS_MASM((&masm_))
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment