Commit fe609139 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[regexp] Consistent expectations for output registers

... between the interpreter and generated code.

Prior to this CL, pre- and post conditions on the output register
array differed between the interpreter and generated code.

Interpreter
Pre: `output` fits captures and temporary registers.
Post: None.

Generated code
Pre:  `output` fits capture registers.
Post: `output` is modified if and only if the match succeeded.

This CL changes the interpreter to match generated code pre- and
post conditions by allocating space for temporary registers inside
the interpreter.

Drive-by: Add MaxRegisterCount, RegistersForCaptureCount helpers.

Bug: chromium:1067270
Change-Id: I2900ef2f31207d817ec7ead3e0e2215b23b398f0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2135642
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarLeszek Swirski <leszeks@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67268}
parent 3a83b134
......@@ -528,7 +528,7 @@ TNode<HeapObject> RegExpBuiltinsAssembler::RegExpExecInternal(
data, JSRegExp::kIrregexpCaptureCountIndex));
// capture_count is the number of captures without the match itself.
// Required registers = (capture_count + 1) * 2.
STATIC_ASSERT(Internals::IsValidSmi((JSRegExp::kMaxCaptures + 1) << 1));
STATIC_ASSERT(Internals::IsValidSmi((JSRegExp::kMaxCaptures + 1) * 2));
TNode<Smi> register_count =
SmiShl(SmiAdd(capture_count, SmiConstant(1)), 1);
......
......@@ -34,7 +34,7 @@ JSRegExp::Type JSRegExp::TypeTag() const {
return static_cast<JSRegExp::Type>(smi.value());
}
int JSRegExp::CaptureCount() {
int JSRegExp::CaptureCount() const {
switch (TypeTag()) {
case ATOM:
return 0;
......@@ -45,6 +45,11 @@ int JSRegExp::CaptureCount() {
}
}
int JSRegExp::MaxRegisterCount() const {
CHECK_EQ(TypeTag(), IRREGEXP);
return Smi::ToInt(DataAt(kIrregexpMaxRegisterCountIndex));
}
JSRegExp::Flags JSRegExp::GetFlags() {
DCHECK(this->data().IsFixedArray());
Object data = this->data();
......
......@@ -113,7 +113,11 @@ class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
static constexpr int kMaxCaptures = 1 << 16;
// Number of captures (without the match itself).
inline int CaptureCount();
inline int CaptureCount() const;
// Each capture (including the match itself) needs two registers.
static int RegistersForCaptureCount(int count) { return (count + 1) * 2; }
inline int MaxRegisterCount() const;
inline Flags GetFlags();
inline String Pattern();
inline Object CaptureNameMap();
......
......@@ -4212,7 +4212,8 @@ Handle<RegExpMatchInfo> RegExpMatchInfo::ReserveCaptures(
Isolate* isolate, Handle<RegExpMatchInfo> match_info, int capture_count) {
DCHECK_GE(match_info->length(), kLastMatchOverhead);
int capture_register_count = (capture_count + 1) * 2;
int capture_register_count =
JSRegExp::RegistersForCaptureCount(capture_count);
const int required_length = kFirstCaptureIndex + capture_register_count;
Handle<RegExpMatchInfo> result = Handle<RegExpMatchInfo>::cast(
EnsureSpaceInFixedArray(isolate, match_info, required_length));
......
......@@ -223,7 +223,7 @@ class RecursionCheck {
// a fixed array or a null handle depending on whether it succeeded.
RegExpCompiler::RegExpCompiler(Isolate* isolate, Zone* zone, int capture_count,
bool one_byte)
: next_register_(2 * (capture_count + 1)),
: next_register_(JSRegExp::RegistersForCaptureCount(capture_count)),
unicode_lookaround_stack_register_(kNoRegister),
unicode_lookaround_position_register_(kNoRegister),
work_list_(nullptr),
......
This diff is collapsed.
......@@ -23,36 +23,42 @@ class V8_EXPORT_PRIVATE IrregexpInterpreter : public AllStatic {
// In case a StackOverflow occurs, a StackOverflowException is created and
// EXCEPTION is returned.
static Result MatchForCallFromRuntime(Isolate* isolate,
Handle<JSRegExp> regexp,
Handle<String> subject_string,
int* registers, int registers_length,
int start_position);
static Result MatchForCallFromRuntime(
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string,
int* output_registers, int output_register_count, int start_position);
// In case a StackOverflow occurs, EXCEPTION is returned. The caller is
// responsible for creating the exception.
//
// RETRY is returned if a retry through the runtime is needed (e.g. when
// interrupts have been scheduled or the regexp is marked for tier-up).
//
// Arguments input_start, input_end and backtrack_stack are
// unused. They are only passed to match the signature of the native irregex
// code.
//
// Arguments output_registers and output_register_count describe the results
// array, which will contain register values of all captures if SUCCESS is
// returned. For all other return codes, the results array remains unmodified.
static Result MatchForCallFromJs(Address subject, int32_t start_position,
Address input_start, Address input_end,
int* registers, int32_t registers_length,
int* output_registers,
int32_t output_register_count,
Address backtrack_stack,
RegExp::CallOrigin call_origin,
Isolate* isolate, Address regexp);
static Result MatchInternal(Isolate* isolate, ByteArray code_array,
String subject_string, int* registers,
int registers_length, int start_position,
String subject_string, int* output_registers,
int output_register_count,
int total_register_count, int start_position,
RegExp::CallOrigin call_origin,
uint32_t backtrack_limit);
private:
static Result Match(Isolate* isolate, JSRegExp regexp, String subject_string,
int* registers, int registers_length, int start_position,
RegExp::CallOrigin call_origin);
int* output_registers, int output_register_count,
int start_position, RegExp::CallOrigin call_origin);
};
} // namespace internal
......
......@@ -28,13 +28,14 @@ struct DisjunctDecisionRow {
class RegExpMacroAssembler {
public:
// The implementation must be able to handle at least:
static const int kMaxRegister = (1 << 16) - 1;
static const int kMaxCPOffset = (1 << 15) - 1;
static const int kMinCPOffset = -(1 << 15);
static const int kTableSizeBits = 7;
static const int kTableSize = 1 << kTableSizeBits;
static const int kTableMask = kTableSize - 1;
static constexpr int kMaxRegisterCount = (1 << 16);
static constexpr int kMaxRegister = kMaxRegisterCount - 1;
static constexpr int kMaxCPOffset = (1 << 15) - 1;
static constexpr int kMinCPOffset = -(1 << 15);
static constexpr int kTableSizeBits = 7;
static constexpr int kTableSize = 1 << kTableSizeBits;
static constexpr int kTableMask = kTableSize - 1;
static constexpr int kUseCharactersValue = -1;
......
......@@ -34,8 +34,7 @@ class RegExpImpl final : public AllStatic {
// Prepares a JSRegExp object with Irregexp-specific data.
static void IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
int capture_register_count,
uint32_t backtrack_limit);
int capture_count, uint32_t backtrack_limit);
static void AtomCompile(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
......@@ -86,7 +85,6 @@ class RegExpImpl final : public AllStatic {
static void SetIrregexpCaptureNameMap(FixedArray re,
Handle<FixedArray> value);
static int IrregexpNumberOfCaptures(FixedArray re);
static int IrregexpNumberOfRegisters(FixedArray re);
static ByteArray IrregexpByteCode(FixedArray re, bool is_one_byte);
static Code IrregexpNativeCode(FixedArray re, bool is_one_byte);
};
......@@ -456,7 +454,7 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
}
int RegExpImpl::IrregexpMaxRegisterCount(FixedArray re) {
return Smi::cast(re.get(JSRegExp::kIrregexpMaxRegisterCountIndex)).value();
return Smi::ToInt(re.get(JSRegExp::kIrregexpMaxRegisterCountIndex));
}
void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray re, int value) {
......@@ -476,10 +474,6 @@ int RegExpImpl::IrregexpNumberOfCaptures(FixedArray re) {
return Smi::ToInt(re.get(JSRegExp::kIrregexpCaptureCountIndex));
}
int RegExpImpl::IrregexpNumberOfRegisters(FixedArray re) {
return Smi::ToInt(re.get(JSRegExp::kIrregexpMaxRegisterCountIndex));
}
ByteArray RegExpImpl::IrregexpByteCode(FixedArray re, bool is_one_byte) {
return ByteArray::cast(re.get(JSRegExp::bytecode_index(is_one_byte)));
}
......@@ -509,35 +503,23 @@ int RegExp::IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
return -1;
}
DisallowHeapAllocation no_gc;
FixedArray data = FixedArray::cast(regexp->data());
if (regexp->ShouldProduceBytecode()) {
// Byte-code regexp needs space allocated for all its registers.
// The result captures are copied to the start of the registers array
// if the match succeeds. This way those registers are not clobbered
// when we set the last match info from last successful match.
return RegExpImpl::IrregexpNumberOfRegisters(data) +
(RegExpImpl::IrregexpNumberOfCaptures(data) + 1) * 2;
} else {
// Native regexp only needs room to output captures. Registers are handled
// internally.
return (RegExpImpl::IrregexpNumberOfCaptures(data) + 1) * 2;
}
// Only reserve room for output captures. Internal registers are allocated by
// the engine.
return JSRegExp::RegistersForCaptureCount(regexp->CaptureCount());
}
int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject, int index,
int32_t* output, int output_size) {
Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()), isolate);
DCHECK_LE(0, index);
DCHECK_LE(index, subject->length());
DCHECK(subject->IsFlat());
DCHECK_GE(output_size,
JSRegExp::RegistersForCaptureCount(regexp->CaptureCount()));
bool is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
if (!regexp->ShouldProduceBytecode()) {
DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
// The stack is used to allocate registers for the compiled regexp code.
......@@ -568,27 +550,16 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
UNREACHABLE();
} else {
DCHECK(regexp->ShouldProduceBytecode());
DCHECK(output_size >= IrregexpNumberOfRegisters(*irregexp));
// We must have done EnsureCompiledIrregexp, so we can get the number of
// registers.
int number_of_capture_registers =
(IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
int32_t* raw_output = &output[number_of_capture_registers];
do {
IrregexpInterpreter::Result result =
IrregexpInterpreter::MatchForCallFromRuntime(
isolate, regexp, subject, raw_output, number_of_capture_registers,
index);
isolate, regexp, subject, output, output_size, index);
DCHECK_IMPLIES(result == IrregexpInterpreter::EXCEPTION,
isolate->has_pending_exception());
switch (result) {
case IrregexpInterpreter::SUCCESS:
// Copy capture results to the start of the registers array.
MemCopy(output, raw_output,
number_of_capture_registers * sizeof(int32_t));
return result;
case IrregexpInterpreter::EXCEPTION:
case IrregexpInterpreter::FAILURE:
return result;
......@@ -596,9 +567,7 @@ int RegExpImpl::IrregexpExecRaw(Isolate* isolate, Handle<JSRegExp> regexp,
// The string has changed representation, and we must restart the
// match.
// We need to reset the tier up to start over with compilation.
if (FLAG_regexp_tier_up) {
regexp->ResetLastTierUpTick();
}
if (FLAG_regexp_tier_up) regexp->ResetLastTierUpTick();
is_one_byte = String::IsOneByteRepresentationUnderneath(*subject);
EnsureCompiledIrregexp(isolate, regexp, subject, is_one_byte);
break;
......@@ -659,8 +628,7 @@ MaybeHandle<Object> RegExpImpl::IrregexpExec(
output_registers, required_registers);
if (res == RegExp::RE_SUCCESS) {
int capture_count =
IrregexpNumberOfCaptures(FixedArray::cast(regexp->data()));
int capture_count = regexp->CaptureCount();
return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
capture_count, output_registers);
}
......@@ -692,7 +660,8 @@ Handle<RegExpMatchInfo> RegExp::SetLastMatchInfo(
}
}
int capture_register_count = (capture_count + 1) * 2;
int capture_register_count =
JSRegExp::RegistersForCaptureCount(capture_count);
DisallowHeapAllocation no_allocation;
if (match != nullptr) {
for (int i = 0; i < capture_register_count; i += 2) {
......@@ -746,7 +715,8 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte,
uint32_t backtrack_limit) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
if (JSRegExp::RegistersForCaptureCount(data->capture_count) >
RegExpMacroAssembler::kMaxRegisterCount) {
data->error = RegExpError::kTooLarge;
return false;
}
......@@ -785,30 +755,32 @@ bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
is_one_byte ? NativeRegExpMacroAssembler::LATIN1
: NativeRegExpMacroAssembler::UC16;
const int output_register_count =
JSRegExp::RegistersForCaptureCount(data->capture_count);
#if V8_TARGET_ARCH_IA32
macro_assembler.reset(new RegExpMacroAssemblerIA32(
isolate, zone, mode, (data->capture_count + 1) * 2));
macro_assembler.reset(new RegExpMacroAssemblerIA32(isolate, zone, mode,
output_register_count));
#elif V8_TARGET_ARCH_X64
macro_assembler.reset(new RegExpMacroAssemblerX64(
isolate, zone, mode, (data->capture_count + 1) * 2));
macro_assembler.reset(new RegExpMacroAssemblerX64(isolate, zone, mode,
output_register_count));
#elif V8_TARGET_ARCH_ARM
macro_assembler.reset(new RegExpMacroAssemblerARM(
isolate, zone, mode, (data->capture_count + 1) * 2));
macro_assembler.reset(new RegExpMacroAssemblerARM(isolate, zone, mode,
output_register_count));
#elif V8_TARGET_ARCH_ARM64
macro_assembler.reset(new RegExpMacroAssemblerARM64(
isolate, zone, mode, (data->capture_count + 1) * 2));
macro_assembler.reset(new RegExpMacroAssemblerARM64(isolate, zone, mode,
output_register_count));
#elif V8_TARGET_ARCH_S390
macro_assembler.reset(new RegExpMacroAssemblerS390(
isolate, zone, mode, (data->capture_count + 1) * 2));
isolate, zone, mode, output_register_count);
#elif V8_TARGET_ARCH_PPC || V8_TARGET_ARCH_PPC64
macro_assembler.reset(new RegExpMacroAssemblerPPC(
isolate, zone, mode, (data->capture_count + 1) * 2));
macro_assembler.reset(new RegExpMacroAssemblerPPC(isolate, zone, mode,
output_register_count));
#elif V8_TARGET_ARCH_MIPS
macro_assembler.reset(new RegExpMacroAssemblerMIPS(
isolate, zone, mode, (data->capture_count + 1) * 2));
macro_assembler.reset(new RegExpMacroAssemblerMIPS(isolate, zone, mode,
output_register_count));
#elif V8_TARGET_ARCH_MIPS64
macro_assembler.reset(new RegExpMacroAssemblerMIPS(
isolate, zone, mode, (data->capture_count + 1) * 2));
macro_assembler.reset(new RegExpMacroAssemblerMIPS(isolate, zone, mode,
output_register_count));
#else
#error "Unsupported architecture"
#endif
......
......@@ -1109,7 +1109,7 @@ static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
isolate->heap(), *subject, regexp->data(), &last_match_cache,
RegExpResultsCache::REGEXP_MULTIPLE_INDICES);
if (cached_answer.IsFixedArray()) {
int capture_registers = (capture_count + 1) * 2;
int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
int32_t* last_match = NewArray<int32_t>(capture_registers);
for (int i = 0; i < capture_registers; i++) {
last_match[i] = Smi::ToInt(last_match_cache.get(i));
......@@ -1234,7 +1234,7 @@ static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
if (subject_length > kMinLengthToCache) {
// Store the last successful match into the array for caching.
int capture_registers = (capture_count + 1) * 2;
int capture_registers = JSRegExp::RegistersForCaptureCount(capture_count);
Handle<FixedArray> last_match_cache =
isolate->factory()->NewFixedArray(capture_registers);
int32_t* last_match = global_cache.LastSuccessfulMatch();
......
......@@ -1278,14 +1278,16 @@ TEST(MacroAssembler) {
Handle<String> source = factory->NewStringFromStaticChars("^f(o)o");
Handle<ByteArray> array = Handle<ByteArray>::cast(m.GetCode(source));
int captures[5];
std::memset(captures, 0, sizeof(captures));
const uc16 str1[] = {'f', 'o', 'o', 'b', 'a', 'r'};
Handle<String> f1_16 = factory->NewStringFromTwoByte(
Vector<const uc16>(str1, 6)).ToHandleChecked();
CHECK(IrregexpInterpreter::MatchInternal(isolate, *array, *f1_16, captures, 5,
0, RegExp::CallOrigin::kFromRuntime,
JSRegExp::kNoBacktrackLimit));
CHECK_EQ(IrregexpInterpreter::SUCCESS,
IrregexpInterpreter::MatchInternal(
isolate, *array, *f1_16, captures, 5, 5, 0,
RegExp::CallOrigin::kFromRuntime, JSRegExp::kNoBacktrackLimit));
CHECK_EQ(0, captures[0]);
CHECK_EQ(3, captures[1]);
CHECK_EQ(1, captures[2]);
......@@ -1296,10 +1298,17 @@ TEST(MacroAssembler) {
Handle<String> f2_16 = factory->NewStringFromTwoByte(
Vector<const uc16>(str2, 6)).ToHandleChecked();
CHECK(!IrregexpInterpreter::MatchInternal(
isolate, *array, *f2_16, captures, 5, 0, RegExp::CallOrigin::kFromRuntime,
JSRegExp::kNoBacktrackLimit));
CHECK_EQ(42, captures[0]);
std::memset(captures, 0, sizeof(captures));
CHECK_EQ(IrregexpInterpreter::FAILURE,
IrregexpInterpreter::MatchInternal(
isolate, *array, *f2_16, captures, 5, 5, 0,
RegExp::CallOrigin::kFromRuntime, JSRegExp::kNoBacktrackLimit));
// Failed matches don't alter output registers.
CHECK_EQ(0, captures[0]);
CHECK_EQ(0, captures[1]);
CHECK_EQ(0, captures[2]);
CHECK_EQ(0, captures[3]);
CHECK_EQ(0, captures[4]);
}
#ifndef V8_INTL_SUPPORT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment