Commit 8bbb44e5 authored by Jakob Gruber's avatar Jakob Gruber Committed by V8 LUCI CQ

[regexp] Compact codegen for large character classes

Large character classes may easily be created when unicode
properties (e.g.: /\p{L}/u and /\P{L}/u) are used - these are
expanded internally into character classes that consist of hundreds
of character ranges. Previously to this CL, we'd emit branching code
for each of these ranges, leading to very large regexp code objects.

This CL adds a new codegen mode for large character classes (where
'large' currently means > 16 ranges). Instead of emitting branching
code inline, the ranges are written into a ByteArray and we call into
the C function IsCharacterInRangeArray for the actual branching logic.
The ByteArray is smaller than emitted code and is deduplicated if the
same character class is matched repeatedly in the same pattern.

Note this mode is *not* implemented for the interpreter, since we
currently don't have a constant pool for irregexp bytecode, and thus
cannot reference ByteArrays.

Bug: v8:11069
Change-Id: I2d728e42d85114b796c637f791848731a104cd54
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3229377Reviewed-by: 's avatarPatrick Thier <pthier@chromium.org>
Auto-Submit: Jakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/main@{#77463}
parent fec145d1
......@@ -730,6 +730,9 @@ FUNCTION_REFERENCE(re_case_insensitive_compare_unicode,
FUNCTION_REFERENCE(re_case_insensitive_compare_non_unicode,
NativeRegExpMacroAssembler::CaseInsensitiveCompareNonUnicode)
FUNCTION_REFERENCE(re_is_character_in_range_array,
RegExpMacroAssembler::IsCharacterInRangeArray)
ExternalReference ExternalReference::re_word_character_map() {
return ExternalReference(
NativeRegExpMacroAssembler::word_character_map_address());
......
......@@ -288,6 +288,8 @@ class StatsCounter;
"RegExpMacroAssembler::CaseInsensitiveCompareUnicode()") \
V(re_case_insensitive_compare_non_unicode, \
"RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode()") \
V(re_is_character_in_range_array, \
"RegExpMacroAssembler::IsCharacterInRangeArray()") \
V(re_check_stack_guard_state, \
"RegExpMacroAssembler*::CheckStackGuardState()") \
V(re_grow_stack, "NativeRegExpMacroAssembler::GrowStack()") \
......
......@@ -556,12 +556,12 @@ void ArrayList::Clear(int index, Object undefined) {
int ByteArray::Size() { return RoundUp(length() + kHeaderSize, kTaggedSize); }
byte ByteArray::get(int index) const {
DCHECK(index >= 0 && index < this->length());
DCHECK(index >= 0 && index < length());
return ReadField<byte>(kHeaderSize + index * kCharSize);
}
void ByteArray::set(int index, byte value) {
DCHECK(index >= 0 && index < this->length());
DCHECK(index >= 0 && index < length());
WriteField<byte>(kHeaderSize + index * kCharSize, value);
}
......@@ -580,35 +580,45 @@ void ByteArray::copy_out(int index, byte* buffer, int length) {
}
int ByteArray::get_int(int index) const {
DCHECK(index >= 0 && index < this->length() / kIntSize);
DCHECK(index >= 0 && index < length() / kIntSize);
return ReadField<int>(kHeaderSize + index * kIntSize);
}
void ByteArray::set_int(int index, int value) {
DCHECK(index >= 0 && index < this->length() / kIntSize);
DCHECK(index >= 0 && index < length() / kIntSize);
WriteField<int>(kHeaderSize + index * kIntSize, value);
}
uint32_t ByteArray::get_uint32(int index) const {
DCHECK(index >= 0 && index < this->length() / kUInt32Size);
DCHECK(index >= 0 && index < length() / kUInt32Size);
return ReadField<uint32_t>(kHeaderSize + index * kUInt32Size);
}
void ByteArray::set_uint32(int index, uint32_t value) {
DCHECK(index >= 0 && index < this->length() / kUInt32Size);
DCHECK(index >= 0 && index < length() / kUInt32Size);
WriteField<uint32_t>(kHeaderSize + index * kUInt32Size, value);
}
uint32_t ByteArray::get_uint32_relaxed(int index) const {
DCHECK(index >= 0 && index < this->length() / kUInt32Size);
DCHECK(index >= 0 && index < length() / kUInt32Size);
return RELAXED_READ_UINT32_FIELD(*this, kHeaderSize + index * kUInt32Size);
}
void ByteArray::set_uint32_relaxed(int index, uint32_t value) {
DCHECK(index >= 0 && index < this->length() / kUInt32Size);
DCHECK(index >= 0 && index < length() / kUInt32Size);
RELAXED_WRITE_UINT32_FIELD(*this, kHeaderSize + index * kUInt32Size, value);
}
uint16_t ByteArray::get_uint16(int index) const {
DCHECK(index >= 0 && index < length() / kUInt16Size);
return ReadField<uint16_t>(kHeaderSize + index * kUInt16Size);
}
void ByteArray::set_uint16(int index, uint16_t value) {
DCHECK(index >= 0 && index < length() / kUInt16Size);
WriteField<uint16_t>(kHeaderSize + index * kUInt16Size, value);
}
void ByteArray::clear_padding() {
int data_size = length() + kHeaderSize;
memset(reinterpret_cast<void*>(address() + data_size), 0, Size() - data_size);
......@@ -621,7 +631,7 @@ ByteArray ByteArray::FromDataStartAddress(Address address) {
int ByteArray::DataSize() const { return RoundUp(length(), kTaggedSize); }
int ByteArray::ByteArraySize() { return SizeFor(this->length()); }
int ByteArray::ByteArraySize() { return SizeFor(length()); }
byte* ByteArray::GetDataStartAddress() {
return reinterpret_cast<byte*>(address() + kHeaderSize);
......
......@@ -521,6 +521,9 @@ class ByteArray : public TorqueGeneratedByteArray<ByteArray, FixedArrayBase> {
inline uint32_t get_uint32_relaxed(int index) const;
inline void set_uint32_relaxed(int index, uint32_t value);
inline uint16_t get_uint16(int index) const;
inline void set_uint16(int index, uint16_t value);
// Clear uninitialized padding space. This ensures that the snapshot content
// is deterministic.
inline void clear_padding();
......
......@@ -477,6 +477,41 @@ void RegExpMacroAssemblerARM::CheckCharacterNotInRange(base::uc16 from,
BranchOrBacktrack(hi, on_not_in_range); // Unsigned higher condition.
}
void RegExpMacroAssemblerARM::CallIsCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges) {
static const int kNumArguments = 3;
__ PrepareCallCFunction(kNumArguments);
__ mov(r0, current_character());
__ mov(r1, Operand(GetOrAddRangeArray(ranges)));
__ mov(r2, Operand(ExternalReference::isolate_address(isolate())));
{
// We have a frame (set up in GetCode), but the assembler doesn't know.
FrameScope scope(masm_.get(), StackFrame::MANUAL);
__ CallCFunction(ExternalReference::re_is_character_in_range_array(),
kNumArguments);
}
__ mov(code_pointer(), Operand(masm_->CodeObject()));
}
bool RegExpMacroAssemblerARM::CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
CallIsCharacterInRangeArray(ranges);
__ cmp(r0, Operand::Zero());
BranchOrBacktrack(ne, on_in_range);
return true;
}
bool RegExpMacroAssemblerARM::CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
CallIsCharacterInRangeArray(ranges);
__ cmp(r0, Operand::Zero());
BranchOrBacktrack(eq, on_not_in_range);
return true;
}
void RegExpMacroAssemblerARM::CheckBitInTable(
Handle<ByteArray> table,
Label* on_bit_set) {
......
......@@ -48,6 +48,10 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM
Label* on_in_range) override;
void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
Label* on_not_in_range) override;
bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_in_range) override;
bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_not_in_range) override;
void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
// Checks whether the given offset from the current position is before
......@@ -131,8 +135,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM
// Check whether we are exceeding the stack limit on the backtrack stack.
void CheckStackLimit();
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState();
void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
// The ebp-relative location of a regexp register.
MemOperand register_location(int register_index);
......
......@@ -293,6 +293,18 @@ void RegExpMacroAssemblerARM64::CheckGreedyLoop(Label* on_equal) {
BranchOrBacktrack(eq, on_equal);
}
void RegExpMacroAssemblerARM64::PushCachedRegisters() {
CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
__ PushCPURegList(cached_registers);
}
void RegExpMacroAssemblerARM64::PopCachedRegisters() {
CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
__ PopCPURegList(cached_registers);
}
void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
......@@ -398,10 +410,7 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
DCHECK(mode_ == UC16);
int argument_count = 4;
// The cached registers need to be retained.
CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
__ PushCPURegList(cached_registers);
PushCachedRegisters();
// Put arguments into arguments registers.
// Parameters are
......@@ -435,7 +444,7 @@ void RegExpMacroAssemblerARM64::CheckNotBackReferenceIgnoreCase(
// x0 is one of the registers used as a cache so it must be tested before
// the cache is restored.
__ Cmp(x0, 0);
__ PopCPURegList(cached_registers);
PopCachedRegisters();
BranchOrBacktrack(eq, on_no_match);
// On success, advance position by length of capture.
......@@ -574,6 +583,49 @@ void RegExpMacroAssemblerARM64::CheckCharacterNotInRange(
CompareAndBranchOrBacktrack(w10, to - from, hi, on_not_in_range);
}
void RegExpMacroAssemblerARM64::CallIsCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges) {
static const int kNumArguments = 3;
__ Mov(w0, current_character());
__ Mov(x1, GetOrAddRangeArray(ranges));
__ Mov(x2, ExternalReference::isolate_address(isolate()));
{
// We have a frame (set up in GetCode), but the assembler doesn't know.
FrameScope scope(masm_.get(), StackFrame::MANUAL);
__ CallCFunction(ExternalReference::re_is_character_in_range_array(),
kNumArguments);
}
__ Mov(code_pointer(), Operand(masm_->CodeObject()));
}
bool RegExpMacroAssemblerARM64::CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
// Note: due to the arm64 oddity of x0 being a 'cached register',
// pushing/popping registers must happen outside of CallIsCharacterInRange
// s.t. we can compare the return value to 0 before popping x0.
PushCachedRegisters();
CallIsCharacterInRangeArray(ranges);
__ Cmp(x0, 0);
PopCachedRegisters();
BranchOrBacktrack(ne, on_in_range);
return true;
}
bool RegExpMacroAssemblerARM64::CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
// Note: due to the arm64 oddity of x0 being a 'cached register',
// pushing/popping registers must happen outside of CallIsCharacterInRange
// s.t. we can compare the return value to 0 before popping x0.
PushCachedRegisters();
CallIsCharacterInRangeArray(ranges);
__ Cmp(x0, 0);
PopCachedRegisters();
BranchOrBacktrack(eq, on_not_in_range);
return true;
}
void RegExpMacroAssemblerARM64::CheckBitInTable(
Handle<ByteArray> table,
Label* on_bit_set) {
......@@ -1081,25 +1133,19 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
__ Ret();
Label exit_with_exception;
// Registers x0 to x7 are used to store the first captures, they need to be
// retained over calls to C++ code.
CPURegList cached_registers(CPURegister::kRegister, kXRegSizeInBits, 0, 7);
DCHECK_EQ(kNumCachedRegisters, cached_registers.Count() * 2);
if (check_preempt_label_.is_linked()) {
__ Bind(&check_preempt_label_);
StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10);
SaveLinkRegister();
// The cached registers need to be retained.
__ PushCPURegList(cached_registers);
PushCachedRegisters();
CallCheckStackGuardState(x10);
// Returning from the regexp code restores the stack (sp <- fp)
// so we don't need to drop the link register from it before exiting.
__ Cbnz(w0, &return_w0);
// Reset the cached registers.
__ PopCPURegList(cached_registers);
PopCachedRegisters();
LoadRegExpStackPointerFromMemory(backtrack_stackpointer());
......@@ -1113,9 +1159,8 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
StoreRegExpStackPointerToMemory(backtrack_stackpointer(), x10);
SaveLinkRegister();
// The cached registers need to be retained.
__ PushCPURegList(cached_registers);
// Call GrowStack(isolate)
PushCachedRegisters();
// Call GrowStack(isolate).
static constexpr int kNumArguments = 1;
__ Mov(x0, ExternalReference::isolate_address(isolate()));
__ CallCFunction(ExternalReference::re_grow_stack(), kNumArguments);
......@@ -1126,8 +1171,7 @@ Handle<HeapObject> RegExpMacroAssemblerARM64::GetCode(Handle<String> source) {
__ Cbz(w0, &exit_with_exception);
// Otherwise use return value as new stack pointer.
__ Mov(backtrack_stackpointer(), x0);
// Reset the cached registers.
__ PopCPURegList(cached_registers);
PopCachedRegisters();
RestoreLinkRegister();
__ Ret();
}
......
......@@ -52,6 +52,10 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
Label* on_in_range) override;
void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
Label* on_not_in_range) override;
bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_in_range) override;
bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_not_in_range) override;
void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
// Checks whether the given offset from the current position is before
......@@ -130,6 +134,11 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
// Initial size of code buffer.
static const int kRegExpCodeSize = 1024;
// Registers x0 to x7 are used to store the first captures, they need to be
// retained over calls to C++ code.
void PushCachedRegisters();
void PopCachedRegisters();
// When initializing registers to a non-position value we can unroll
// the loop. Set the limit of registers to unroll.
static const int kNumRegistersToUnroll = 16;
......@@ -145,8 +154,8 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerARM64
// Check whether we are exceeding the stack limit on the backtrack stack.
void CheckStackLimit();
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState(Register scratch);
void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
// Location of a 32 bit position register.
MemOperand register_location(int register_index);
......
......@@ -205,6 +205,18 @@ void RegExpMacroAssemblerIA32::CheckGreedyLoop(Label* on_equal) {
__ bind(&fallthrough);
}
void RegExpMacroAssemblerIA32::PushCallerSavedRegisters() {
STATIC_ASSERT(backtrack_stackpointer() == ecx);
STATIC_ASSERT(current_character() == edx);
__ push(ecx);
__ push(edx);
}
void RegExpMacroAssemblerIA32::PopCallerSavedRegisters() {
__ pop(edx);
__ pop(ecx);
}
void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
......@@ -502,6 +514,44 @@ void RegExpMacroAssemblerIA32::CheckCharacterNotInRange(
BranchOrBacktrack(above, on_not_in_range);
}
void RegExpMacroAssemblerIA32::CallIsCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges) {
PushCallerSavedRegisters();
static const int kNumArguments = 3;
__ PrepareCallCFunction(kNumArguments, ecx);
__ mov(Operand(esp, 0 * kSystemPointerSize), current_character());
__ mov(Operand(esp, 1 * kSystemPointerSize), GetOrAddRangeArray(ranges));
__ mov(Operand(esp, 2 * kSystemPointerSize),
Immediate(ExternalReference::isolate_address(isolate())));
{
// We have a frame (set up in GetCode), but the assembler doesn't know.
FrameScope scope(masm_.get(), StackFrame::MANUAL);
__ CallCFunction(ExternalReference::re_is_character_in_range_array(),
kNumArguments);
}
PopCallerSavedRegisters();
}
bool RegExpMacroAssemblerIA32::CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
CallIsCharacterInRangeArray(ranges);
__ or_(eax, eax);
BranchOrBacktrack(not_zero, on_in_range);
return true;
}
bool RegExpMacroAssemblerIA32::CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
CallIsCharacterInRangeArray(ranges);
__ or_(eax, eax);
BranchOrBacktrack(zero, on_not_in_range);
return true;
}
void RegExpMacroAssemblerIA32::CheckBitInTable(
Handle<ByteArray> table,
Label* on_bit_set) {
......
......@@ -49,6 +49,10 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32
Label* on_in_range) override;
void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
Label* on_not_in_range) override;
bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_in_range) override;
bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_not_in_range) override;
void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
// Checks whether the given offset from the current position is before
......@@ -130,14 +134,17 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerIA32
// Initial size of code buffer.
static const int kRegExpCodeSize = 1024;
void PushCallerSavedRegisters();
void PopCallerSavedRegisters();
// Check whether preemption has been requested.
void CheckPreemption();
// Check whether we are exceeding the stack limit on the backtrack stack.
void CheckStackLimit();
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState(Register scratch);
void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
// The ebp-relative location of a regexp register.
Operand register_location(int register_index);
......
......@@ -69,6 +69,21 @@ class V8_EXPORT_PRIVATE RegExpBytecodeGenerator : public RegExpMacroAssembler {
Label* on_in_range) override;
void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
Label* on_not_in_range) override;
bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_in_range) override {
// Disabled in the interpreter, because 1) there is no constant pool that
// could store the ByteArray pointer, 2) bytecode size limits are not as
// restrictive as code (e.g. branch distances on arm), 3) bytecode for
// large character classes is already quite compact.
// TODO(jgruber): Consider using BytecodeArrays (with a constant pool)
// instead of plain ByteArrays; then we could implement
// CheckCharacterInRangeArray in the interpreter.
return false;
}
bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_not_in_range) override {
return false;
}
void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
void CheckNotBackReference(int start_reg, bool read_backward,
Label* on_no_match) override;
......
......@@ -27,6 +27,7 @@ const unsigned int MAX_FIRST_ARG = 0x7fffffu;
const int BYTECODE_SHIFT = 8;
STATIC_ASSERT(1 << BYTECODE_SHIFT > BYTECODE_MASK);
// The list of bytecodes, in format: V(Name, Code, ByteLength).
// TODO(pthier): Argument offsets of bytecodes should be easily accessible by
// name or at least by position.
// TODO(jgruber): More precise types (e.g. int32/uint32 instead of value32).
......
......@@ -215,14 +215,60 @@ void AddBmpCharacters(RegExpCompiler* compiler, ChoiceNode* result,
compiler->zone(), bmp, compiler->read_backward(), on_success)));
}
using UC16Range = uint32_t; // {from, to} packed into one uint32_t.
constexpr UC16Range ToUC16Range(base::uc16 from, base::uc16 to) {
return (static_cast<uint32_t>(from) << 16) | to;
}
constexpr base::uc16 ExtractFrom(UC16Range r) {
return static_cast<base::uc16>(r >> 16);
}
constexpr base::uc16 ExtractTo(UC16Range r) {
return static_cast<base::uc16>(r);
}
void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
RegExpNode* on_success,
UnicodeRangeSplitter* splitter) {
DCHECK(!compiler->one_byte());
Zone* const zone = compiler->zone();
ZoneList<CharacterRange>* non_bmp =
ToCanonicalZoneList(splitter->non_bmp(), compiler->zone());
ToCanonicalZoneList(splitter->non_bmp(), zone);
if (non_bmp == nullptr) return;
DCHECK(!compiler->one_byte());
Zone* zone = compiler->zone();
// Translate each 32-bit code point range into the corresponding 16-bit code
// unit representation consisting of the lead- and trail surrogate.
//
// The generated alternatives are grouped by the leading surrogate to avoid
// emitting excessive code. For example, for
//
// { \ud800[\udc00-\udc01]
// , \ud800[\udc05-\udc06]
// }
//
// there's no need to emit matching code for the leading surrogate \ud800
// twice. We also create a dedicated grouping for full trailing ranges, i.e.
// [dc00-dfff].
ZoneUnorderedMap<UC16Range, ZoneList<CharacterRange>*> grouped_by_leading(
zone);
ZoneList<CharacterRange>* leading_with_full_trailing_range =
zone->New<ZoneList<CharacterRange>>(1, zone);
const auto AddRange = [&](base::uc16 from_l, base::uc16 to_l,
base::uc16 from_t, base::uc16 to_t) {
const UC16Range leading_range = ToUC16Range(from_l, to_l);
if (grouped_by_leading.count(leading_range) == 0) {
if (from_t == kTrailSurrogateStart && to_t == kTrailSurrogateEnd) {
leading_with_full_trailing_range->Add(
CharacterRange::Range(from_l, to_l), zone);
return;
}
grouped_by_leading[leading_range] =
zone->New<ZoneList<CharacterRange>>(2, zone);
}
grouped_by_leading[leading_range]->Add(CharacterRange::Range(from_t, to_t),
zone);
};
// First, create the grouped ranges.
CharacterRange::Canonicalize(non_bmp);
for (int i = 0; i < non_bmp->length(); i++) {
// Match surrogate pair.
......@@ -236,41 +282,45 @@ void AddNonBmpSurrogatePairs(RegExpCompiler* compiler, ChoiceNode* result,
base::uc16 from_t = unibrow::Utf16::TrailSurrogate(from);
base::uc16 to_l = unibrow::Utf16::LeadSurrogate(to);
base::uc16 to_t = unibrow::Utf16::TrailSurrogate(to);
if (from_l == to_l) {
// The lead surrogate is the same.
result->AddAlternative(
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, to_t), compiler->read_backward(),
on_success)));
} else {
if (from_t != kTrailSurrogateStart) {
// Add [from_l][from_t-\udfff]
result->AddAlternative(
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(from_l),
CharacterRange::Range(from_t, kTrailSurrogateEnd),
compiler->read_backward(), on_success)));
from_l++;
}
if (to_t != kTrailSurrogateEnd) {
// Add [to_l][\udc00-to_t]
result->AddAlternative(
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Singleton(to_l),
CharacterRange::Range(kTrailSurrogateStart, to_t),
compiler->read_backward(), on_success)));
to_l--;
}
if (from_l <= to_l) {
// Add [from_l-to_l][\udc00-\udfff]
result->AddAlternative(
GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, CharacterRange::Range(from_l, to_l),
CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
compiler->read_backward(), on_success)));
}
AddRange(from_l, to_l, from_t, to_t);
continue;
}
if (from_t != kTrailSurrogateStart) {
// Add [from_l][from_t-\udfff].
AddRange(from_l, from_l, from_t, kTrailSurrogateEnd);
from_l++;
}
if (to_t != kTrailSurrogateEnd) {
// Add [to_l][\udc00-to_t].
AddRange(to_l, to_l, kTrailSurrogateStart, to_t);
to_l--;
}
if (from_l <= to_l) {
// Add [from_l-to_l][\udc00-\udfff].
AddRange(from_l, to_l, kTrailSurrogateStart, kTrailSurrogateEnd);
}
}
// Create the actual TextNode now that ranges are fully grouped.
if (!leading_with_full_trailing_range->is_empty()) {
CharacterRange::Canonicalize(leading_with_full_trailing_range);
result->AddAlternative(GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, leading_with_full_trailing_range,
CharacterRange::Range(kTrailSurrogateStart, kTrailSurrogateEnd),
compiler->read_backward(), on_success)));
}
for (const auto& it : grouped_by_leading) {
CharacterRange leading_range =
CharacterRange::Range(ExtractFrom(it.first), ExtractTo(it.first));
ZoneList<CharacterRange>* trailing_ranges = it.second;
CharacterRange::Canonicalize(trailing_ranges);
result->AddAlternative(GuardedAlternative(TextNode::CreateForSurrogatePair(
zone, leading_range, trailing_ranges, compiler->read_backward(),
on_success)));
}
}
......@@ -409,41 +459,52 @@ void AddUnicodeCaseEquivalents(ZoneList<CharacterRange>* ranges, Zone* zone) {
RegExpNode* RegExpCharacterClass::ToNode(RegExpCompiler* compiler,
RegExpNode* on_success) {
set_.Canonicalize();
Zone* zone = compiler->zone();
Zone* const zone = compiler->zone();
ZoneList<CharacterRange>* ranges = this->ranges(zone);
if (NeedsUnicodeCaseEquivalents(compiler->flags())) {
AddUnicodeCaseEquivalents(ranges, zone);
}
if (IsUnicode(compiler->flags()) && !compiler->one_byte() &&
!contains_split_surrogate()) {
if (is_negated()) {
ZoneList<CharacterRange>* negated =
zone->New<ZoneList<CharacterRange>>(2, zone);
CharacterRange::Negate(ranges, negated, zone);
ranges = negated;
}
if (ranges->length() == 0) {
RegExpCharacterClass* fail =
zone->New<RegExpCharacterClass>(zone, ranges);
return zone->New<TextNode>(fail, compiler->read_backward(), on_success);
}
if (set_.is_standard() &&
standard_type() == StandardCharacterSet::kEverything) {
return UnanchoredAdvance(compiler, on_success);
} else {
ChoiceNode* result = zone->New<ChoiceNode>(2, zone);
UnicodeRangeSplitter splitter(ranges);
AddBmpCharacters(compiler, result, on_success, &splitter);
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
static constexpr int kMaxRangesToInline = 32; // Arbitrary.
if (ranges->length() > kMaxRangesToInline) result->SetDoNotInline();
return result;
}
} else {
if (!IsUnicode(compiler->flags()) || compiler->one_byte() ||
contains_split_surrogate()) {
return zone->New<TextNode>(this, compiler->read_backward(), on_success);
}
if (is_negated()) {
ZoneList<CharacterRange>* negated =
zone->New<ZoneList<CharacterRange>>(2, zone);
CharacterRange::Negate(ranges, negated, zone);
ranges = negated;
}
if (ranges->length() == 0) {
// The empty character class is used as a 'fail' node.
RegExpCharacterClass* fail = zone->New<RegExpCharacterClass>(zone, ranges);
return zone->New<TextNode>(fail, compiler->read_backward(), on_success);
}
if (set_.is_standard() &&
standard_type() == StandardCharacterSet::kEverything) {
return UnanchoredAdvance(compiler, on_success);
}
// Split ranges in order to handle surrogates correctly:
// - Surrogate pairs: translate the 32-bit code point into two uc16 code
// units (irregexp operates only on code units).
// - Lone surrogates: these require lookarounds to ensure we don't match in
// the middle of a surrogate pair.
ChoiceNode* result = zone->New<ChoiceNode>(2, zone);
UnicodeRangeSplitter splitter(ranges);
AddBmpCharacters(compiler, result, on_success, &splitter);
AddNonBmpSurrogatePairs(compiler, result, on_success, &splitter);
AddLoneLeadSurrogates(compiler, result, on_success, &splitter);
AddLoneTrailSurrogates(compiler, result, on_success, &splitter);
static constexpr int kMaxRangesToInline = 32; // Arbitrary.
if (ranges->length() > kMaxRangesToInline) result->SetDoNotInline();
return result;
}
namespace {
......
......@@ -1223,16 +1223,18 @@ void EmitCharClass(RegExpMacroAssembler* macro_assembler,
CharacterRange::Canonicalize(ranges);
const base::uc32 max_char = MaxCodeUnit(one_byte);
int range_count = ranges->length();
int last_valid_range = range_count - 1;
while (last_valid_range >= 0) {
CharacterRange& range = ranges->at(last_valid_range);
// Determine the 'interesting' set of ranges; may be a subset of the given
// range set if it contains ranges not representable by the current string
// representation.
int ranges_length = ranges->length();
while (ranges_length > 0) {
CharacterRange& range = ranges->at(ranges_length - 1);
if (range.from() <= max_char) break;
last_valid_range--;
ranges_length--;
}
if (last_valid_range < 0) {
if (ranges_length == 0) {
if (!cc->is_negated()) {
macro_assembler->GoTo(on_failure);
}
......@@ -1242,7 +1244,7 @@ void EmitCharClass(RegExpMacroAssembler* macro_assembler,
return;
}
if (last_valid_range == 0 && ranges->at(0).IsEverything(max_char)) {
if (ranges_length == 1 && ranges->at(0).IsEverything(max_char)) {
if (cc->is_negated()) {
macro_assembler->GoTo(on_failure);
} else {
......@@ -1263,15 +1265,33 @@ void EmitCharClass(RegExpMacroAssembler* macro_assembler,
return;
}
static constexpr int kMaxRangesForInlineBranchGeneration = 16;
if (ranges_length > kMaxRangesForInlineBranchGeneration) {
// For large range sets, emit a more compact instruction sequence to avoid
// a potentially problematic increase in code size.
// Note the flipped logic below (we check InRange if negated, NotInRange if
// not negated); this is necessary since the method falls through on
// failure whereas we want to fall through on success.
if (cc->is_negated()) {
if (macro_assembler->CheckCharacterInRangeArray(ranges, on_failure)) {
return;
}
} else {
if (macro_assembler->CheckCharacterNotInRangeArray(ranges, on_failure)) {
return;
}
}
}
// Generate a flat list of range boundaries for consumption by
// GenerateBranches. See the comment on that function for how the list should
// be structured
ZoneList<base::uc32>* range_boundaries =
zone->New<ZoneList<base::uc32>>(last_valid_range * 2, zone);
zone->New<ZoneList<base::uc32>>(ranges_length * 2, zone);
bool zeroth_entry_is_failure = !cc->is_negated();
for (int i = 0; i <= last_valid_range; i++) {
for (int i = 0; i < ranges_length; i++) {
CharacterRange& range = ranges->at(i);
if (range.from() == 0) {
DCHECK_EQ(i, 0);
......@@ -1280,6 +1300,7 @@ void EmitCharClass(RegExpMacroAssembler* macro_assembler,
range_boundaries->Add(range.from(), zone);
}
// `+ 1` to convert from inclusive to exclusive `to`.
// [from, to] == [from, to+1[.
range_boundaries->Add(range.to() + 1, zone);
}
int end_index = range_boundaries->length() - 1;
......@@ -2410,11 +2431,23 @@ TextNode* TextNode::CreateForCharacterRanges(Zone* zone,
read_backward, on_success);
}
TextNode* TextNode::CreateForSurrogatePair(Zone* zone, CharacterRange lead,
CharacterRange trail,
bool read_backward,
RegExpNode* on_success) {
TextNode* TextNode::CreateForSurrogatePair(
Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges,
bool read_backward, RegExpNode* on_success) {
ZoneList<CharacterRange>* lead_ranges = CharacterRange::List(zone, lead);
ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
elms->Add(TextElement::CharClass(
zone->New<RegExpCharacterClass>(zone, lead_ranges)),
zone);
elms->Add(TextElement::CharClass(
zone->New<RegExpCharacterClass>(zone, trail_ranges)),
zone);
return zone->New<TextNode>(elms, read_backward, on_success);
}
TextNode* TextNode::CreateForSurrogatePair(
Zone* zone, ZoneList<CharacterRange>* lead_ranges, CharacterRange trail,
bool read_backward, RegExpNode* on_success) {
ZoneList<CharacterRange>* trail_ranges = CharacterRange::List(zone, trail);
ZoneList<TextElement>* elms = zone->New<ZoneList<TextElement>>(2, zone);
elms->Add(TextElement::CharClass(
......
......@@ -172,6 +172,8 @@ void RegExpMacroAssemblerTracer::LoadCurrentCharacterImpl(
characters, eats_at_least);
}
namespace {
class PrintablePrinter {
public:
explicit PrintablePrinter(base::uc16 character) : character_(character) {}
......@@ -193,6 +195,8 @@ class PrintablePrinter {
char buffer_[4];
};
} // namespace
void RegExpMacroAssemblerTracer::CheckCharacterLT(base::uc16 limit,
Label* on_less) {
PrintablePrinter printable(limit);
......@@ -315,6 +319,41 @@ void RegExpMacroAssemblerTracer::CheckCharacterNotInRange(base::uc16 from,
assembler_->CheckCharacterNotInRange(from, to, on_in_range);
}
namespace {
void PrintRangeArray(const ZoneList<CharacterRange>* ranges) {
for (int i = 0; i < ranges->length(); i++) {
base::uc16 from = ranges->at(i).from();
base::uc16 to = ranges->at(i).to();
PrintablePrinter printable_from(from);
PrintablePrinter printable_to(to);
PrintF(" [from=0x%04x%s, to=%04x%s],\n", from, *printable_from, to,
*printable_to);
}
}
} // namespace
bool RegExpMacroAssemblerTracer::CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
PrintF(
" CheckCharacterInRangeArray(\n"
" label[%08x]);\n",
LabelToInt(on_in_range));
PrintRangeArray(ranges);
return assembler_->CheckCharacterInRangeArray(ranges, on_in_range);
}
bool RegExpMacroAssemblerTracer::CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
PrintF(
" CheckCharacterNotInRangeArray(\n"
" label[%08x]);\n",
LabelToInt(on_not_in_range));
PrintRangeArray(ranges);
return assembler_->CheckCharacterNotInRangeArray(ranges, on_not_in_range);
}
void RegExpMacroAssemblerTracer::CheckBitInTable(
Handle<ByteArray> table, Label* on_bit_set) {
PrintF(" CheckBitInTable(label[%08x] ", LabelToInt(on_bit_set));
......
......@@ -48,6 +48,10 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
Label* on_in_range) override;
void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
Label* on_not_in_range) override;
bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_in_range) override;
bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_not_in_range) override;
void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
void CheckPosition(int cp_offset, Label* on_outside_input) override;
bool CheckSpecialCharacterClass(StandardCharacterSet type,
......
......@@ -104,6 +104,121 @@ int RegExpMacroAssembler::CaseInsensitiveCompareUnicode(Address byte_offset1,
#endif // V8_INTL_SUPPORT
}
namespace {
uint32_t Hash(const ZoneList<CharacterRange>* ranges) {
size_t seed = 0;
for (int i = 0; i < ranges->length(); i++) {
const CharacterRange& r = ranges->at(i);
seed = base::hash_combine(seed, r.from(), r.to());
}
return static_cast<uint32_t>(seed);
}
int RangeArrayLengthFor(const ZoneList<CharacterRange>* ranges) {
const int ranges_length = ranges->length();
return ranges->at(ranges_length - 1).to() == kMaxUInt16
? ranges_length * 2 - 1
: ranges_length * 2;
}
bool Equals(const ZoneList<CharacterRange>* lhs, const Handle<ByteArray>& rhs) {
if (rhs->length() != RangeArrayLengthFor(lhs) * kUInt16Size) return false;
for (int i = 0; i < lhs->length(); i++) {
const CharacterRange& r = lhs->at(i);
if (rhs->get_uint16(i * 2 + 0) != r.from()) return false;
if (i == lhs->length() - 1 && r.to() == kMaxUInt16) {
break; // Avoid overflow by leaving the last range open-ended.
}
if (rhs->get_uint16(i * 2 + 1) != r.to() + 1) return false;
}
return true;
}
Handle<ByteArray> MakeRangeArray(Isolate* isolate,
const ZoneList<CharacterRange>* ranges) {
const int ranges_length = ranges->length();
const int byte_array_length = RangeArrayLengthFor(ranges);
const int size_in_bytes = byte_array_length * kUInt16Size;
Handle<ByteArray> range_array =
isolate->factory()->NewByteArray(size_in_bytes);
for (int i = 0; i < ranges_length; i++) {
const CharacterRange& r = ranges->at(i);
DCHECK_NE(r.from(), kMaxUInt16);
range_array->set_uint16(i * 2 + 0, r.from());
if (i == ranges_length - 1 && r.to() == kMaxUInt16) {
break; // Avoid overflow by leaving the last range open-ended.
}
DCHECK_NE(r.to(), kMaxUInt16);
range_array->set_uint16(i * 2 + 1, r.to() + 1); // Exclusive.
}
return range_array;
}
} // namespace
Handle<ByteArray> NativeRegExpMacroAssembler::GetOrAddRangeArray(
const ZoneList<CharacterRange>* ranges) {
const uint32_t hash = Hash(ranges);
if (range_array_cache_.count(hash) != 0) {
Handle<ByteArray> range_array = range_array_cache_[hash];
if (Equals(ranges, range_array)) return range_array;
}
Handle<ByteArray> range_array = MakeRangeArray(isolate(), ranges);
range_array_cache_[hash] = range_array;
return range_array;
}
// static
uint32_t RegExpMacroAssembler::IsCharacterInRangeArray(uint32_t current_char,
Address raw_byte_array,
Isolate* isolate) {
// Use uint32_t to avoid complexity around bool return types (which may be
// optimized to use only the least significant byte).
static constexpr uint32_t kTrue = 1;
static constexpr uint32_t kFalse = 0;
ByteArray ranges = ByteArray::cast(Object(raw_byte_array));
DCHECK_EQ(ranges.length() % kUInt16Size, 0); // uc16 elements.
const int length = ranges.length() / kUInt16Size;
DCHECK_GE(length, 1);
// Shortcut for fully out of range chars.
if (current_char < ranges.get_uint16(0)) return kFalse;
if (current_char >= ranges.get_uint16(length - 1)) {
// The last range may be open-ended.
return (length % 2) == 0 ? kFalse : kTrue;
}
// Binary search for the matching range. `ranges` is encoded as
// [from0, to0, from1, to1, ..., fromN, toN], or
// [from0, to0, from1, to1, ..., fromN] (open-ended last interval).
int mid, lower = 0, upper = length;
do {
mid = lower + (upper - lower) / 2;
const base::uc16 elem = ranges.get_uint16(mid);
if (current_char < elem) {
upper = mid;
} else if (current_char > elem) {
lower = mid + 1;
} else {
DCHECK_EQ(current_char, elem);
break;
}
} while (lower < upper);
const bool current_char_ge_last_elem = current_char >= ranges.get_uint16(mid);
const int current_range_start_index =
current_char_ge_last_elem ? mid : mid - 1;
// Ranges start at even indices and end at odd indices.
return (current_range_start_index % 2) == 0 ? kTrue : kFalse;
}
void RegExpMacroAssembler::CheckNotInSurrogatePair(int cp_offset,
Label* on_failure) {
Label ok;
......
......@@ -95,6 +95,11 @@ class RegExpMacroAssembler {
virtual void CheckCharacterNotInRange(base::uc16 from,
base::uc16 to, // Both inclusive.
Label* on_not_in_range) = 0;
// Returns true if the check was emitted, false otherwise.
virtual bool CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) = 0;
virtual bool CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) = 0;
// The current character (modulus the kTableSize) is looked up in the byte
// array, and if the found byte is non-zero, we jump to the on_bit_set label.
......@@ -196,6 +201,19 @@ class RegExpMacroAssembler {
size_t byte_length,
Isolate* isolate);
// `raw_byte_array` is a ByteArray containing a set of character ranges,
// where ranges are encoded as uint16_t elements:
//
// [from0, to0, from1, to1, ..., fromN, toN], or
// [from0, to0, from1, to1, ..., fromN] (open-ended last interval).
//
// fromN is inclusive, toN is exclusive. Returns zero if not in a range,
// non-zero otherwise.
//
// Called from generated code.
static uint32_t IsCharacterInRangeArray(uint32_t current_char,
Address raw_byte_array,
Isolate* isolate);
// Controls the generation of large inlined constants in the code.
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
......@@ -323,11 +341,15 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
// Used by generated RegExp code.
static const byte word_character_map[256];
Handle<ByteArray> GetOrAddRangeArray(const ZoneList<CharacterRange>* ranges);
private:
// Returns a {Result} sentinel, or the number of successful matches.
static int Execute(String input, int start_offset, const byte* input_start,
const byte* input_end, int* output, int output_size,
Isolate* isolate, JSRegExp regexp);
std::unordered_map<uint32_t, Handle<ByteArray>> range_array_cache_;
};
} // namespace internal
......
......@@ -409,9 +409,13 @@ class TextNode : public SeqRegExpNode {
ZoneList<CharacterRange>* ranges,
bool read_backward,
RegExpNode* on_success);
// Create TextNode for a surrogate pair with a range given for the
// lead and the trail surrogate each.
static TextNode* CreateForSurrogatePair(Zone* zone, CharacterRange lead,
// Create TextNode for a surrogate pair (i.e. match a sequence of two uc16
// code unit ranges).
static TextNode* CreateForSurrogatePair(
Zone* zone, CharacterRange lead, ZoneList<CharacterRange>* trail_ranges,
bool read_backward, RegExpNode* on_success);
static TextNode* CreateForSurrogatePair(Zone* zone,
ZoneList<CharacterRange>* lead_ranges,
CharacterRange trail,
bool read_backward,
RegExpNode* on_success);
......
......@@ -216,6 +216,24 @@ void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
__ bind(&fallthrough);
}
// Push (pop) caller-saved registers used by irregexp.
void RegExpMacroAssemblerX64::PushCallerSavedRegisters() {
#ifndef V8_TARGET_OS_WIN
// Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI.
__ pushq(rsi);
__ pushq(rdi);
#endif
__ pushq(rcx);
}
void RegExpMacroAssemblerX64::PopCallerSavedRegisters() {
__ popq(rcx);
#ifndef V8_TARGET_OS_WIN
__ popq(rdi);
__ popq(rsi);
#endif
}
void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
int start_reg, bool read_backward, bool unicode, Label* on_no_match) {
Label fallthrough;
......@@ -307,13 +325,7 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
}
} else {
DCHECK(mode_ == UC16);
// Save important/volatile registers before calling C function.
#ifndef V8_TARGET_OS_WIN
// Caller save on Linux and callee save in Windows.
__ pushq(rsi);
__ pushq(rdi);
#endif
__ pushq(backtrack_stackpointer());
PushCallerSavedRegisters();
static const int num_arguments = 4;
__ PrepareCallCFunction(num_arguments);
......@@ -363,11 +375,7 @@ void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
// Restore original values before reacting on result value.
__ Move(code_object_pointer(), masm_.CodeObject());
__ popq(backtrack_stackpointer());
#ifndef V8_TARGET_OS_WIN
__ popq(rdi);
__ popq(rsi);
#endif
PopCallerSavedRegisters();
// Check if function returned non-zero for success or zero for failure.
__ testq(rax, rax);
......@@ -517,6 +525,44 @@ void RegExpMacroAssemblerX64::CheckCharacterNotInRange(base::uc16 from,
BranchOrBacktrack(above, on_not_in_range);
}
void RegExpMacroAssemblerX64::CallIsCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges) {
PushCallerSavedRegisters();
static const int kNumArguments = 3;
__ PrepareCallCFunction(kNumArguments);
__ Move(arg_reg_1, current_character());
__ Move(arg_reg_2, GetOrAddRangeArray(ranges));
__ LoadAddress(arg_reg_3, ExternalReference::isolate_address(isolate()));
{
// We have a frame (set up in GetCode), but the assembler doesn't know.
FrameScope scope(&masm_, StackFrame::MANUAL);
__ CallCFunction(ExternalReference::re_is_character_in_range_array(),
kNumArguments);
}
PopCallerSavedRegisters();
__ Move(code_object_pointer(), masm_.CodeObject());
}
bool RegExpMacroAssemblerX64::CheckCharacterInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_in_range) {
CallIsCharacterInRangeArray(ranges);
__ testq(rax, rax);
BranchOrBacktrack(not_zero, on_in_range);
return true;
}
bool RegExpMacroAssemblerX64::CheckCharacterNotInRangeArray(
const ZoneList<CharacterRange>* ranges, Label* on_not_in_range) {
CallIsCharacterInRangeArray(ranges);
__ testq(rax, rax);
BranchOrBacktrack(zero, on_not_in_range);
return true;
}
void RegExpMacroAssemblerX64::CheckBitInTable(
Handle<ByteArray> table,
Label* on_bit_set) {
......@@ -1007,12 +1053,7 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
SafeCallTarget(&stack_overflow_label_);
// Reached if the backtrack-stack limit has been hit.
// Save registers before calling C function
#ifndef V8_TARGET_OS_WIN
// Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI.
__ pushq(rsi);
__ pushq(rdi);
#endif
PushCallerSavedRegisters();
// Call GrowStack(isolate).
......@@ -1028,14 +1069,11 @@ Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
// with a stack-overflow exception.
__ testq(rax, rax);
__ j(equal, &exit_with_exception);
PopCallerSavedRegisters();
// Otherwise use return value as new stack pointer.
__ movq(backtrack_stackpointer(), rax);
// Restore saved registers and continue.
__ Move(code_object_pointer(), masm_.CodeObject());
#ifndef V8_TARGET_OS_WIN
__ popq(rdi);
__ popq(rsi);
#endif
SafeReturn();
}
......
......@@ -48,6 +48,10 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64
Label* on_in_range) override;
void CheckCharacterNotInRange(base::uc16 from, base::uc16 to,
Label* on_not_in_range) override;
bool CheckCharacterInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_in_range) override;
bool CheckCharacterNotInRangeArray(const ZoneList<CharacterRange>* ranges,
Label* on_not_in_range) override;
void CheckBitInTable(Handle<ByteArray> table, Label* on_bit_set) override;
// Checks whether the given offset from the current position is before
......@@ -163,14 +167,17 @@ class V8_EXPORT_PRIVATE RegExpMacroAssemblerX64
// Initial size of code buffer.
static const int kRegExpCodeSize = 1024;
void PushCallerSavedRegisters();
void PopCallerSavedRegisters();
// Check whether preemption has been requested.
void CheckPreemption();
// Check whether we are exceeding the stack limit on the backtrack stack.
void CheckStackLimit();
// Generate a call to CheckStackGuardState.
void CallCheckStackGuardState();
void CallIsCharacterInRangeArray(const ZoneList<CharacterRange>* ranges);
// The rbp-relative location of a regexp register.
Operand register_location(int register_index);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment