Commit c7d57dd3 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[regexp] Reduce public API surface

This further reduces the number of things declared in the public
regexp API file, currently still named jsregexp.h.

* Move JSRegExp::Flags convenience functions to regexp-compiler.h.
* Set RegExpImpl methods private if possible (these will later be
  moved to a new hidden impl class).
* Merge RegExpEngine::CompilationResult into RegExpCompileData.
* Move remaining RegExpEngine methods to RegExpImpl and delete
  RegExpEngine.
* Extract RegExpGlobalCache.
* Document a few data structures.

Upcoming CLs will rename RegExpImpl to RegExp and jsregexp.h to
regexp.h. This should then be the only header included from other
directories.

Bug: v8:9359
Change-Id: I78c8f4cca495a2b95735a48b6181583bc3310bdf
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1662294Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62218}
parent 8e53e4b2
......@@ -2677,7 +2677,6 @@ v8_source_set("v8_base_without_compiler") {
"src/profiler/tick-sample.h",
"src/profiler/tracing-cpu-profiler.cc",
"src/profiler/tracing-cpu-profiler.h",
"src/regexp/jsregexp-inl.h",
"src/regexp/jsregexp.cc",
"src/regexp/jsregexp.h",
"src/regexp/property-sequences.cc",
......
// Copyright 2013 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_JSREGEXP_INL_H_
#define V8_REGEXP_JSREGEXP_INL_H_
#include "src/objects/js-regexp-inl.h"
#include "src/objects/objects.h"
#include "src/regexp/jsregexp.h"
#include "src/utils/allocation.h"
namespace v8 {
namespace internal {
RegExpImpl::GlobalCache::~GlobalCache() {
// Deallocate the register array if we allocated it in the constructor
// (as opposed to using the existing jsregexp_static_offsets_vector).
if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
DeleteArray(register_array_);
}
}
int32_t* RegExpImpl::GlobalCache::FetchNext() {
current_match_index_++;
if (current_match_index_ >= num_matches_) {
// Current batch of results exhausted.
// Fail if last batch was not even fully filled.
if (num_matches_ < max_matches_) {
num_matches_ = 0; // Signal failed match.
return nullptr;
}
int32_t* last_match =
&register_array_[(current_match_index_ - 1) * registers_per_match_];
int last_end_index = last_match[1];
if (regexp_->TypeTag() == JSRegExp::ATOM) {
num_matches_ =
RegExpImpl::AtomExecRaw(isolate_, regexp_, subject_, last_end_index,
register_array_, register_array_size_);
} else {
int last_start_index = last_match[0];
if (last_start_index == last_end_index) {
// Zero-length match. Advance by one code point.
last_end_index = AdvanceZeroLength(last_end_index);
}
if (last_end_index > subject_->length()) {
num_matches_ = 0; // Signal failed match.
return nullptr;
}
num_matches_ = RegExpImpl::IrregexpExecRaw(
isolate_, regexp_, subject_, last_end_index, register_array_,
register_array_size_);
}
if (num_matches_ <= 0) return nullptr;
current_match_index_ = 0;
return register_array_;
} else {
return &register_array_[current_match_index_ * registers_per_match_];
}
}
int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() {
int index = current_match_index_ * registers_per_match_;
if (num_matches_ == 0) {
// After a failed match we shift back by one result.
index -= registers_per_match_;
}
return &register_array_[index];
}
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_JSREGEXP_INL_H_
......@@ -6,7 +6,7 @@
#include "src/codegen/compilation-cache.h"
#include "src/heap/heap-inl.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/regexp-compiler.h"
#include "src/regexp/regexp-dotprinter.h"
#include "src/regexp/regexp-interpreter.h"
......@@ -290,28 +290,22 @@ bool RegExpImpl::CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
USE(ThrowRegExpException(isolate, re, pattern, compile_data.error));
return false;
}
RegExpEngine::CompilationResult result =
RegExpEngine::Compile(isolate, &zone, &compile_data, flags, pattern,
sample_subject, is_one_byte);
if (result.error_message != nullptr) {
// Unable to compile regexp.
if (FLAG_correctness_fuzzer_suppressions &&
strncmp(result.error_message, "Stack overflow", 15) == 0) {
FATAL("Aborting on stack overflow");
}
Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
CStrVector(result.error_message)).ToHandleChecked();
ThrowRegExpException(isolate, re, error_message);
const bool compilation_succeeded =
Compile(isolate, &zone, &compile_data, flags, pattern, sample_subject,
is_one_byte);
if (!compilation_succeeded) {
DCHECK(!compile_data.error.is_null());
ThrowRegExpException(isolate, re, compile_data.error);
return false;
}
Handle<FixedArray> data =
Handle<FixedArray>(FixedArray::cast(re->data()), isolate);
data->set(JSRegExp::code_index(is_one_byte), result.code);
data->set(JSRegExp::code_index(is_one_byte), compile_data.code);
SetIrregexpCaptureNameMap(*data, compile_data.capture_name_map);
int register_max = IrregexpMaxRegisterCount(*data);
if (result.num_registers > register_max) {
SetIrregexpMaxRegisterCount(*data, result.num_registers);
if (compile_data.register_count > register_max) {
SetIrregexpMaxRegisterCount(*data, compile_data.register_count);
}
return true;
......@@ -552,88 +546,59 @@ Handle<RegExpMatchInfo> RegExpImpl::SetLastMatchInfo(
return result;
}
RegExpImpl::GlobalCache::GlobalCache(Handle<JSRegExp> regexp,
Handle<String> subject, Isolate* isolate)
: register_array_(nullptr),
register_array_size_(0),
regexp_(regexp),
subject_(subject),
isolate_(isolate) {
bool interpreted = FLAG_regexp_interpret_all;
void RegExpImpl::DotPrintForTesting(const char* label, RegExpNode* node,
bool ignore_case) {
DotPrinter::DotPrint(label, node, ignore_case);
}
if (regexp_->TypeTag() == JSRegExp::ATOM) {
static const int kAtomRegistersPerMatch = 2;
registers_per_match_ = kAtomRegistersPerMatch;
// There is no distinction between interpreted and native for atom regexps.
interpreted = false;
} else {
registers_per_match_ =
RegExpImpl::IrregexpPrepare(isolate_, regexp_, subject_);
if (registers_per_match_ < 0) {
num_matches_ = -1; // Signal exception.
return;
}
}
namespace {
DCHECK(IsGlobal(regexp->GetFlags()));
if (!interpreted) {
register_array_size_ =
Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
max_matches_ = register_array_size_ / registers_per_match_;
} else {
// Global loop in interpreted regexp is not implemented. We choose
// the size of the offsets vector so that it can only store one match.
register_array_size_ = registers_per_match_;
max_matches_ = 1;
}
// Returns true if we've either generated too much irregex code within this
// isolate, or the pattern string is too long.
bool TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern) {
// Limit the space regexps take up on the heap. In order to limit this we
// would like to keep track of the amount of regexp code on the heap. This
// is not tracked, however. As a conservative approximation we track the
// total regexp code compiled including code that has subsequently been freed
// and the total executable memory at any point.
static constexpr size_t kRegExpExecutableMemoryLimit = 16 * MB;
static constexpr size_t kRegExpCompiledLimit = 1 * MB;
if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
register_array_ = NewArray<int32_t>(register_array_size_);
} else {
register_array_ = isolate->jsregexp_static_offsets_vector();
}
// Set state so that fetching the results the first time triggers a call
// to the compiled regexp.
current_match_index_ = max_matches_ - 1;
num_matches_ = max_matches_;
DCHECK_LE(2, registers_per_match_); // Each match has at least one capture.
DCHECK_GE(register_array_size_, registers_per_match_);
int32_t* last_match =
&register_array_[current_match_index_ * registers_per_match_];
last_match[0] = -1;
last_match[1] = 0;
Heap* heap = isolate->heap();
if (pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize) return true;
return (isolate->total_regexp_code_generated() > kRegExpCompiledLimit &&
heap->CommittedMemoryExecutable() > kRegExpExecutableMemoryLimit);
}
int RegExpImpl::GlobalCache::AdvanceZeroLength(int last_index) {
if (IsUnicode(regexp_->GetFlags()) && last_index + 1 < subject_->length() &&
unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
// Advance over the surrogate pair.
return last_index + 2;
}
return last_index + 1;
}
} // namespace
void RegExpEngine::DotPrint(const char* label, RegExpNode* node,
bool ignore_case) {
DotPrinter::DotPrint(label, node, ignore_case);
bool RegExpImpl::CompileForTesting(Isolate* isolate, Zone* zone,
RegExpCompileData* data,
JSRegExp::Flags flags,
Handle<String> pattern,
Handle<String> sample_subject,
bool is_one_byte) {
return Compile(isolate, zone, data, flags, pattern, sample_subject,
is_one_byte);
}
RegExpEngine::CompilationResult RegExpEngine::Compile(
Isolate* isolate, Zone* zone, RegExpCompileData* data,
bool RegExpImpl::Compile(Isolate* isolate, Zone* zone, RegExpCompileData* data,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
return CompilationResult::RegExpTooBig();
data->error =
isolate->factory()->NewStringFromAsciiChecked("RegExp too big");
return false;
}
bool is_sticky = IsSticky(flags);
bool is_global = IsGlobal(flags);
bool is_unicode = IsUnicode(flags);
RegExpCompiler compiler(isolate, zone, data->capture_count, is_one_byte);
if (compiler.optimize())
if (compiler.optimize()) {
compiler.set_optimize(!TooMuchRegExpCode(isolate, pattern));
}
// Sample some characters from the middle of the string.
static const int kSampleSize = 128;
......@@ -693,7 +658,9 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
Analysis analysis(isolate, is_one_byte);
analysis.EnsureAnalyzed(node);
if (analysis.has_failed()) {
return CompilationResult(analysis.error_message());
data->error =
isolate->factory()->NewStringFromAsciiChecked(analysis.error_message());
return false;
}
// Create the correct assembler for the architecture.
......@@ -763,20 +730,140 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
RegExpCompiler::CompilationResult result = compiler.Assemble(
isolate, macro_assembler.get(), node, data->capture_count, pattern);
return RegExpEngine::CompilationResult(result.error_message, result.code,
result.num_registers);
if (FLAG_correctness_fuzzer_suppressions &&
strncmp(result.error_message, "Stack overflow", 15) == 0) {
FATAL("Aborting on stack overflow");
}
if (result.error_message != nullptr) {
data->error =
isolate->factory()->NewStringFromAsciiChecked(result.error_message);
}
data->code = result.code;
data->register_count = result.num_registers;
return result.Succeeded();
}
bool RegExpEngine::TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern) {
Heap* heap = isolate->heap();
bool too_much = pattern->length() > RegExpImpl::kRegExpTooLargeToOptimize;
if (isolate->total_regexp_code_generated() >
RegExpImpl::kRegExpCompiledLimit &&
heap->CommittedMemoryExecutable() >
RegExpImpl::kRegExpExecutableMemoryLimit) {
too_much = true;
}
return too_much;
RegExpGlobalCache::RegExpGlobalCache(Handle<JSRegExp> regexp,
Handle<String> subject, Isolate* isolate)
: register_array_(nullptr),
register_array_size_(0),
regexp_(regexp),
subject_(subject),
isolate_(isolate) {
bool interpreted = FLAG_regexp_interpret_all;
if (regexp_->TypeTag() == JSRegExp::ATOM) {
static const int kAtomRegistersPerMatch = 2;
registers_per_match_ = kAtomRegistersPerMatch;
// There is no distinction between interpreted and native for atom regexps.
interpreted = false;
} else {
registers_per_match_ =
RegExpImpl::IrregexpPrepare(isolate_, regexp_, subject_);
if (registers_per_match_ < 0) {
num_matches_ = -1; // Signal exception.
return;
}
}
DCHECK(IsGlobal(regexp->GetFlags()));
if (!interpreted) {
register_array_size_ =
Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize);
max_matches_ = register_array_size_ / registers_per_match_;
} else {
// Global loop in interpreted regexp is not implemented. We choose
// the size of the offsets vector so that it can only store one match.
register_array_size_ = registers_per_match_;
max_matches_ = 1;
}
if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
register_array_ = NewArray<int32_t>(register_array_size_);
} else {
register_array_ = isolate->jsregexp_static_offsets_vector();
}
// Set state so that fetching the results the first time triggers a call
// to the compiled regexp.
current_match_index_ = max_matches_ - 1;
num_matches_ = max_matches_;
DCHECK_LE(2, registers_per_match_); // Each match has at least one capture.
DCHECK_GE(register_array_size_, registers_per_match_);
int32_t* last_match =
&register_array_[current_match_index_ * registers_per_match_];
last_match[0] = -1;
last_match[1] = 0;
}
RegExpGlobalCache::~RegExpGlobalCache() {
// Deallocate the register array if we allocated it in the constructor
// (as opposed to using the existing jsregexp_static_offsets_vector).
if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
DeleteArray(register_array_);
}
}
int RegExpGlobalCache::AdvanceZeroLength(int last_index) {
if (IsUnicode(regexp_->GetFlags()) && last_index + 1 < subject_->length() &&
unibrow::Utf16::IsLeadSurrogate(subject_->Get(last_index)) &&
unibrow::Utf16::IsTrailSurrogate(subject_->Get(last_index + 1))) {
// Advance over the surrogate pair.
return last_index + 2;
}
return last_index + 1;
}
int32_t* RegExpGlobalCache::FetchNext() {
current_match_index_++;
if (current_match_index_ >= num_matches_) {
// Current batch of results exhausted.
// Fail if last batch was not even fully filled.
if (num_matches_ < max_matches_) {
num_matches_ = 0; // Signal failed match.
return nullptr;
}
int32_t* last_match =
&register_array_[(current_match_index_ - 1) * registers_per_match_];
int last_end_index = last_match[1];
if (regexp_->TypeTag() == JSRegExp::ATOM) {
num_matches_ =
RegExpImpl::AtomExecRaw(isolate_, regexp_, subject_, last_end_index,
register_array_, register_array_size_);
} else {
int last_start_index = last_match[0];
if (last_start_index == last_end_index) {
// Zero-length match. Advance by one code point.
last_end_index = AdvanceZeroLength(last_end_index);
}
if (last_end_index > subject_->length()) {
num_matches_ = 0; // Signal failed match.
return nullptr;
}
num_matches_ = RegExpImpl::IrregexpExecRaw(
isolate_, regexp_, subject_, last_end_index, register_array_,
register_array_size_);
}
if (num_matches_ <= 0) return nullptr;
current_match_index_ = 0;
return register_array_;
} else {
return &register_array_[current_match_index_ * registers_per_match_];
}
}
int32_t* RegExpGlobalCache::LastSuccessfulMatch() {
int index = current_match_index_ * registers_per_match_;
if (num_matches_ == 0) {
// After a failed match we shift back by one result.
index -= registers_per_match_;
}
return &register_array_[index];
}
Object RegExpResultsCache::Lookup(Heap* heap, String key_string,
......
......@@ -13,46 +13,46 @@ namespace internal {
class RegExpNode;
class RegExpTree;
inline bool IgnoreCase(JSRegExp::Flags flags) {
return (flags & JSRegExp::kIgnoreCase) != 0;
}
// TODO(jgruber): Consider splitting between ParseData and CompileData.
struct RegExpCompileData {
// The parsed AST as produced by the RegExpParser.
RegExpTree* tree = nullptr;
// The compiled Node graph as produced by RegExpTree::ToNode methods.
RegExpNode* node = nullptr;
inline bool IsUnicode(JSRegExp::Flags flags) {
return (flags & JSRegExp::kUnicode) != 0;
}
// The generated code as produced by the compiler. Either a Code object (for
// irregexp native code) or a ByteArray (for irregexp bytecode).
Object code;
inline bool IsSticky(JSRegExp::Flags flags) {
return (flags & JSRegExp::kSticky) != 0;
}
// True, iff the pattern is a 'simple' atom with zero captures. In other
// words, the pattern consists of a string with no metacharacters and special
// regexp features, and can be implemented as a standard string search.
bool simple = true;
inline bool IsGlobal(JSRegExp::Flags flags) {
return (flags & JSRegExp::kGlobal) != 0;
}
// True, iff the pattern is anchored at the start of the string with '^'.
bool contains_anchor = false;
// Only use if the pattern contains named captures. If so, this contains a
// mapping of capture names to capture indices.
Handle<FixedArray> capture_name_map;
inline bool DotAll(JSRegExp::Flags flags) {
return (flags & JSRegExp::kDotAll) != 0;
}
// The error message. Only used if an error occurred during parsing or
// compilation.
Handle<String> error;
inline bool Multiline(JSRegExp::Flags flags) {
return (flags & JSRegExp::kMultiline) != 0;
}
// The number of capture groups, without the global capture \0.
int capture_count = 0;
inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
// Both unicode and ignore_case flags are set. We need to use ICU to find
// the closure over case equivalents.
return IsUnicode(flags) && IgnoreCase(flags);
}
// The number of registers used by the generated code.
int register_count = 0;
};
class RegExpImpl final {
class RegExpImpl final : public AllStatic {
public:
// Whether the irregexp engine generates native code or interpreter bytecode.
static bool UsesNativeRegExp() { return !FLAG_regexp_interpret_all; }
// Returns a string representation of a regular expression.
// Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
// This function calls the garbage collector if necessary.
static Handle<String> ToString(Handle<Object> value);
// Parses the RegExp pattern and prepares the JSRegExp object with
// generic data and choice of implementation - as well as what
// the implementation wants to store in the data field.
......@@ -67,6 +67,43 @@ class RegExpImpl final {
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int index, Handle<RegExpMatchInfo> last_match_info);
enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 };
// Prepare a RegExp for being executed one or more times (using
// IrregexpExecOnce) on the subject.
// This ensures that the regexp is compiled for the subject, and that
// the subject is flat.
// Returns the number of integer spaces required by IrregexpExecOnce
// as its "registers" argument. If the regexp cannot be compiled,
// an exception is set as pending, and this function returns negative.
static int IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject);
// Set last match info. If match is nullptr, then setting captures is
// omitted.
static Handle<RegExpMatchInfo> SetLastMatchInfo(
Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
Handle<String> subject, int capture_count, int32_t* match);
V8_EXPORT_PRIVATE static bool CompileForTesting(Isolate* isolate, Zone* zone,
RegExpCompileData* input,
JSRegExp::Flags flags,
Handle<String> pattern,
Handle<String> sample_subject,
bool is_one_byte);
V8_EXPORT_PRIVATE static void DotPrintForTesting(const char* label,
RegExpNode* node,
bool ignore_case);
static const int kRegExpTooLargeToOptimize = 20 * KB;
private:
// Returns a string representation of a regular expression.
// Implements RegExp.prototype.toString, see ECMA-262 section 15.10.6.4.
// This function calls the garbage collector if necessary.
static Handle<String> ToString(Handle<Object> value);
// Prepares a JSRegExp object with Irregexp-specific data.
static void IrregexpInitialize(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> pattern, JSRegExp::Flags flags,
......@@ -84,18 +121,6 @@ class RegExpImpl final {
Handle<String> subject, int index,
Handle<RegExpMatchInfo> last_match_info);
enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 };
// Prepare a RegExp for being executed one or more times (using
// IrregexpExecOnce) on the subject.
// This ensures that the regexp is compiled for the subject, and that
// the subject is flat.
// Returns the number of integer spaces required by IrregexpExecOnce
// as its "registers" argument. If the regexp cannot be compiled,
// an exception is set as pending, and this function returns negative.
static int IrregexpPrepare(Isolate* isolate, Handle<JSRegExp> regexp,
Handle<String> subject);
// Execute a regular expression on the subject, starting from index.
// If matching succeeds, return the number of matches. This can be larger
// than one in the case of global regular expressions.
......@@ -114,29 +139,50 @@ class RegExpImpl final {
Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
int index, Handle<RegExpMatchInfo> last_match_info);
// Set last match info. If match is nullptr, then setting captures is
// omitted.
static Handle<RegExpMatchInfo> SetLastMatchInfo(
Isolate* isolate, Handle<RegExpMatchInfo> last_match_info,
Handle<String> subject, int capture_count, int32_t* match);
static bool CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject, bool is_one_byte);
static inline bool EnsureCompiledIrregexp(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte);
class GlobalCache {
// Returns true on success, false on failure.
static bool Compile(Isolate* isolate, Zone* zone, RegExpCompileData* input,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte);
// For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray re);
static void SetIrregexpMaxRegisterCount(FixedArray re, int value);
static void SetIrregexpCaptureNameMap(FixedArray re,
Handle<FixedArray> value);
static int IrregexpNumberOfCaptures(FixedArray re);
static int IrregexpNumberOfRegisters(FixedArray re);
static ByteArray IrregexpByteCode(FixedArray re, bool is_one_byte);
static Code IrregexpNativeCode(FixedArray re, bool is_one_byte);
friend class RegExpGlobalCache;
};
// Uses a special global mode of irregexp-generated code to perform a global
// search and return multiple results at once. As such, this is essentially an
// iterator over multiple results (retrieved batch-wise in advance).
class RegExpGlobalCache final {
public:
GlobalCache(Handle<JSRegExp> regexp,
Handle<String> subject,
RegExpGlobalCache(Handle<JSRegExp> regexp, Handle<String> subject,
Isolate* isolate);
V8_INLINE ~GlobalCache();
~RegExpGlobalCache();
// Fetch the next entry in the cache for global regexp match results.
// This does not set the last match info. Upon failure, nullptr is
// returned. The cause can be checked with Result(). The previous result is
// still in available in memory when a failure happens.
V8_INLINE int32_t* FetchNext();
int32_t* FetchNext();
V8_INLINE int32_t* LastSuccessfulMatch();
int32_t* LastSuccessfulMatch();
V8_INLINE bool HasException() { return num_matches_ < 0; }
bool HasException() { return num_matches_ < 0; }
private:
int AdvanceZeroLength(int last_index);
......@@ -151,80 +197,11 @@ class RegExpImpl final {
Handle<JSRegExp> regexp_;
Handle<String> subject_;
Isolate* isolate_;
};
// For acting on the JSRegExp data FixedArray.
static int IrregexpMaxRegisterCount(FixedArray re);
static void SetIrregexpMaxRegisterCount(FixedArray re, int value);
static void SetIrregexpCaptureNameMap(FixedArray re,
Handle<FixedArray> value);
static int IrregexpNumberOfCaptures(FixedArray re);
static int IrregexpNumberOfRegisters(FixedArray re);
static ByteArray IrregexpByteCode(FixedArray re, bool is_one_byte);
static Code IrregexpNativeCode(FixedArray re, bool is_one_byte);
// Limit the space regexps take up on the heap. In order to limit this we
// would like to keep track of the amount of regexp code on the heap. This
// is not tracked, however. As a conservative approximation we track the
// total regexp code compiled including code that has subsequently been freed
// and the total executable memory at any point.
static const size_t kRegExpExecutableMemoryLimit = 16 * MB;
static const size_t kRegExpCompiledLimit = 1 * MB;
static const int kRegExpTooLargeToOptimize = 20 * KB;
private:
static bool CompileIrregexp(Isolate* isolate, Handle<JSRegExp> re,
Handle<String> sample_subject, bool is_one_byte);
static inline bool EnsureCompiledIrregexp(Isolate* isolate,
Handle<JSRegExp> re,
Handle<String> sample_subject,
bool is_one_byte);
};
struct RegExpCompileData {
RegExpCompileData()
: tree(nullptr),
node(nullptr),
simple(true),
contains_anchor(false),
capture_count(0) {}
RegExpTree* tree;
RegExpNode* node;
bool simple;
bool contains_anchor;
Handle<FixedArray> capture_name_map;
Handle<String> error;
int capture_count;
};
class RegExpEngine final : public AllStatic {
public:
struct CompilationResult {
explicit CompilationResult(const char* error_message)
: error_message(error_message) {}
CompilationResult(const char* error_message, Object code, int registers)
: error_message(error_message), code(code), num_registers(registers) {}
static CompilationResult RegExpTooBig() {
return CompilationResult("RegExp too big");
}
const char* const error_message = nullptr;
Object const code;
int const num_registers = 0;
};
V8_EXPORT_PRIVATE static CompilationResult Compile(
Isolate* isolate, Zone* zone, RegExpCompileData* input,
JSRegExp::Flags flags, Handle<String> pattern,
Handle<String> sample_subject, bool is_one_byte);
static bool TooMuchRegExpCode(Isolate* isolate, Handle<String> pattern);
V8_EXPORT_PRIVATE static void DotPrint(const char* label, RegExpNode* node,
bool ignore_case);
};
// Caches results for specific regexp queries on the isolate. At the time of
// writing, this is used during global calls to RegExp.prototype.exec and
// @@split.
class RegExpResultsCache final : public AllStatic {
public:
enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS };
......@@ -239,14 +216,15 @@ class RegExpResultsCache final : public AllStatic {
Handle<Object> key_pattern, Handle<FixedArray> value_array,
Handle<FixedArray> last_match_cache, ResultsCacheType type);
static void Clear(FixedArray cache);
static const int kRegExpResultsCacheSize = 0x100;
static constexpr int kRegExpResultsCacheSize = 0x100;
private:
static const int kArrayEntriesPerCacheEntry = 4;
static const int kStringOffset = 0;
static const int kPatternOffset = 1;
static const int kArrayOffset = 2;
static const int kLastMatchOffset = 3;
static constexpr int kStringOffset = 0;
static constexpr int kPatternOffset = 1;
static constexpr int kArrayOffset = 2;
static constexpr int kLastMatchOffset = 3;
static constexpr int kArrayEntriesPerCacheEntry = 4;
};
} // namespace internal
......
......@@ -7,7 +7,6 @@
#include "src/diagnostics/code-tracer.h"
#include "src/execution/isolate.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h" // TODO(jgruber): Only needed for IgnoreCase.
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-macro-assembler-tracer.h"
#include "src/strings/unicode-inl.h"
......
......@@ -45,6 +45,36 @@ constexpr int kPatternTooShortForBoyerMoore = 2;
} // namespace regexp_compiler_constants
inline bool IgnoreCase(JSRegExp::Flags flags) {
return (flags & JSRegExp::kIgnoreCase) != 0;
}
inline bool IsUnicode(JSRegExp::Flags flags) {
return (flags & JSRegExp::kUnicode) != 0;
}
inline bool IsSticky(JSRegExp::Flags flags) {
return (flags & JSRegExp::kSticky) != 0;
}
inline bool IsGlobal(JSRegExp::Flags flags) {
return (flags & JSRegExp::kGlobal) != 0;
}
inline bool DotAll(JSRegExp::Flags flags) {
return (flags & JSRegExp::kDotAll) != 0;
}
inline bool Multiline(JSRegExp::Flags flags) {
return (flags & JSRegExp::kMultiline) != 0;
}
inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
// Both unicode and ignore_case flags are set. We need to use ICU to find
// the closure over case equivalents.
return IsUnicode(flags) && IgnoreCase(flags);
}
// A set of unsigned integers that behaves especially well on small
// integers (< 32). May do zone-allocation.
class OutSet : public ZoneObject {
......@@ -645,6 +675,8 @@ class RegExpCompiler {
return CompilationResult("RegExp too big");
}
bool Succeeded() const { return error_message == nullptr; }
const char* const error_message = nullptr;
Object code;
int num_registers = 0;
......
......@@ -11,7 +11,8 @@
#include "src/logging/counters.h"
#include "src/numbers/conversions-inl.h"
#include "src/objects/js-array-inl.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-utils.h"
#include "src/runtime/runtime-utils.h"
#include "src/strings/string-builder-inl.h"
......@@ -638,7 +639,7 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
RegExpGlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
int32_t* current_match = global_cache.FetchNext();
......@@ -703,7 +704,7 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
RegExpGlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
int32_t* current_match = global_cache.FetchNext();
......@@ -1115,7 +1116,7 @@ static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
RegExpGlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
// Ensured in Runtime_RegExpExecMultiple.
......
......@@ -10,7 +10,6 @@
#include "src/objects/objects-inl.h"
#include "src/objects/slots.h"
#include "src/objects/smi.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/regexp/regexp-utils.h"
#include "src/runtime/runtime-utils.h"
#include "src/strings/string-builder-inl.h"
......
......@@ -547,7 +547,7 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
.ToHandleChecked();
Handle<String> sample_subject =
isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
RegExpImpl::CompileForTesting(isolate, zone, &compile_data, flags, pattern,
sample_subject, is_one_byte);
return compile_data.node;
}
......@@ -561,7 +561,7 @@ static void Execute(const char* input, bool multiline, bool unicode,
USE(node);
#ifdef DEBUG
if (dot_output) {
RegExpEngine::DotPrint(input, node, false);
RegExpImpl::DotPrintForTesting(input, node, false);
}
#endif // DEBUG
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment