Commit 40d1b660 authored by Iain Ireland's avatar Iain Ireland Committed by Commit Bot

[regexp] Refactor to simplify embedding irregexp

Some code at the interface between irregexp and the rest of V8 uses
V8-specific APIs that are not used elsewhere in irregexp. For example,
only a handful of functions in irregexp call or are called from
generated code. When embedding irregexp into SpiderMonkey, these
functions are an obstacle, because they are dead code, but still have
to compile.

To simplify the process of embedding, this patch does two things:

1. It moves StringCharacterPosition out of irregexp and into
objects/string, renaming it "AddressOfCharacterAt".

2. It guards the following set of functions with
'#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER':
- IrregexpInterpreter::MatchForCallFromJs
- NativeRegExpMacroAssembler::CheckStackGuardState
- NativeRegExpMacroAssembler::Match
- NativeRegExpMacroAssembler::Execute

This will have no effect in a V8 build, but can be defined by
SpiderMonkey or another embedder to omit the problematic functions.

In the future, if we attempt to make a cleaner separation between V8
and irregexp, these functions will be a good place to start defining
the API boundary.

R=jgruber@chromium.org

Bug: v8:10303
Change-Id: I9f531a36e4f13440cafb0d0ade921f4c09f39c05
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2097220Reviewed-by: 's avatarJakob Gruber <jgruber@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66750}
parent e71d328f
......@@ -1657,6 +1657,37 @@ String ConsStringIterator::NextLeaf(bool* blew_stack) {
UNREACHABLE();
}
const byte* String::AddressOfCharacterAt(int start_index,
const DisallowHeapAllocation& no_gc) {
DCHECK(IsFlat());
String subject = *this;
if (subject.IsConsString()) {
subject = ConsString::cast(subject).first();
} else if (subject.IsSlicedString()) {
start_index += SlicedString::cast(subject).offset();
subject = SlicedString::cast(subject).parent();
}
if (subject.IsThinString()) {
subject = ThinString::cast(subject).actual();
}
CHECK_LE(0, start_index);
CHECK_LE(start_index, subject.length());
if (subject.IsSeqOneByteString()) {
return reinterpret_cast<const byte*>(
SeqOneByteString::cast(subject).GetChars(no_gc) + start_index);
} else if (subject.IsSeqTwoByteString()) {
return reinterpret_cast<const byte*>(
SeqTwoByteString::cast(subject).GetChars(no_gc) + start_index);
} else if (subject.IsExternalOneByteString()) {
return reinterpret_cast<const byte*>(
ExternalOneByteString::cast(subject).GetChars() + start_index);
} else {
DCHECK(subject.IsExternalTwoByteString());
return reinterpret_cast<const byte*>(
ExternalTwoByteString::cast(subject).GetChars() + start_index);
}
}
template EXPORT_TEMPLATE_DEFINE(V8_EXPORT_PRIVATE) void String::WriteToFlat(
String source, uint16_t* sink, int from, int to);
......
......@@ -161,6 +161,11 @@ class String : public TorqueGeneratedString<String, Name> {
template <typename Char>
inline const Char* GetChars(const DisallowHeapAllocation& no_gc);
// Returns the address of the character at an offset into this string.
// Requires: this->IsFlat()
const byte* AddressOfCharacterAt(int start_index,
const DisallowHeapAllocation& no_gc);
// Get and set the length of the string using acquire loads and release
// stores.
DECL_SYNCHRONIZED_INT_ACCESSORS(length)
......
......@@ -1004,6 +1004,8 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
}
}
#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
// This method is called through an external reference from RegExpExecInternal
// builtin.
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
......@@ -1030,6 +1032,8 @@ IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
start_position, call_origin);
}
#endif // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string,
int* registers, int registers_length, int start_position) {
......
......@@ -115,34 +115,7 @@ bool NativeRegExpMacroAssembler::CanReadUnaligned() {
return FLAG_enable_regexp_unaligned_accesses && !slow_safe();
}
const byte* NativeRegExpMacroAssembler::StringCharacterPosition(
String subject, int start_index, const DisallowHeapAllocation& no_gc) {
if (subject.IsConsString()) {
subject = ConsString::cast(subject).first();
} else if (subject.IsSlicedString()) {
start_index += SlicedString::cast(subject).offset();
subject = SlicedString::cast(subject).parent();
}
if (subject.IsThinString()) {
subject = ThinString::cast(subject).actual();
}
DCHECK_LE(0, start_index);
DCHECK_LE(start_index, subject.length());
if (subject.IsSeqOneByteString()) {
return reinterpret_cast<const byte*>(
SeqOneByteString::cast(subject).GetChars(no_gc) + start_index);
} else if (subject.IsSeqTwoByteString()) {
return reinterpret_cast<const byte*>(
SeqTwoByteString::cast(subject).GetChars(no_gc) + start_index);
} else if (subject.IsExternalOneByteString()) {
return reinterpret_cast<const byte*>(
ExternalOneByteString::cast(subject).GetChars() + start_index);
} else {
DCHECK(subject.IsExternalTwoByteString());
return reinterpret_cast<const byte*>(
ExternalTwoByteString::cast(subject).GetChars() + start_index);
}
}
#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
// This method may only be called after an interrupt.
int NativeRegExpMacroAssembler::CheckStackGuardState(
......@@ -214,8 +187,7 @@ int NativeRegExpMacroAssembler::CheckStackGuardState(
} else {
*subject = subject_handle->ptr();
intptr_t byte_length = *input_end - *input_start;
*input_start =
StringCharacterPosition(*subject_handle, start_index, no_gc);
*input_start = subject_handle->AddressOfCharacterAt(start_index, no_gc);
*input_end = *input_start + byte_length;
}
}
......@@ -263,7 +235,7 @@ int NativeRegExpMacroAssembler::Match(Handle<JSRegExp> regexp,
DisallowHeapAllocation no_gc;
const byte* input_start =
StringCharacterPosition(subject_ptr, start_offset + slice_offset, no_gc);
subject_ptr.AddressOfCharacterAt(start_offset + slice_offset, no_gc);
int byte_length = char_length << char_size_shift;
const byte* input_end = input_start + byte_length;
return Execute(*subject, start_offset, input_start, input_end, offsets_vector,
......@@ -309,6 +281,8 @@ int NativeRegExpMacroAssembler::Execute(
return result;
}
#endif // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER
// clang-format off
const byte NativeRegExpMacroAssembler::word_character_map[] = {
0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u, 0x00u,
......
......@@ -246,9 +246,6 @@ class NativeRegExpMacroAssembler: public RegExpMacroAssembler {
static Address GrowStack(Address stack_pointer, Address* stack_top,
Isolate* isolate);
static const byte* StringCharacterPosition(
String subject, int start_index, const DisallowHeapAllocation& no_gc);
static int CheckStackGuardState(Isolate* isolate, int start_index,
RegExp::CallOrigin call_origin,
Address* return_address, Code re_code,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment