Commit c7d57dd3 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[regexp] Reduce public API surface

This further reduces the number of things declared in the public
regexp API file, currently still named jsregexp.h.

* Move JSRegExp::Flags convenience functions to regexp-compiler.h.
* Set RegExpImpl methods private if possible (these will later be
  moved to a new hidden impl class).
* Merge RegExpEngine::CompilationResult into RegExpCompileData.
* Move remaining RegExpEngine methods to RegExpImpl and delete
  RegExpEngine.
* Extract RegExpGlobalCache.
* Document a few data structures.

Upcoming CLs will rename RegExpImpl to RegExp and jsregexp.h to
regexp.h. This should then be the only header included from other
directories.

Bug: v8:9359
Change-Id: I78c8f4cca495a2b95735a48b6181583bc3310bdf
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1662294Reviewed-by: 's avatarPeter Marshall <petermarshall@chromium.org>
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62218}
parent 8e53e4b2
......@@ -2677,7 +2677,6 @@ v8_source_set("v8_base_without_compiler") {
"src/profiler/tick-sample.h",
"src/profiler/tracing-cpu-profiler.cc",
"src/profiler/tracing-cpu-profiler.h",
"src/regexp/jsregexp-inl.h",
"src/regexp/jsregexp.cc",
"src/regexp/jsregexp.h",
"src/regexp/property-sequences.cc",
......
// Copyright 2013 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#ifndef V8_REGEXP_JSREGEXP_INL_H_
#define V8_REGEXP_JSREGEXP_INL_H_
#include "src/objects/js-regexp-inl.h"
#include "src/objects/objects.h"
#include "src/regexp/jsregexp.h"
#include "src/utils/allocation.h"
namespace v8 {
namespace internal {
RegExpImpl::GlobalCache::~GlobalCache() {
// Deallocate the register array if we allocated it in the constructor
// (as opposed to using the existing jsregexp_static_offsets_vector).
if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) {
DeleteArray(register_array_);
}
}
int32_t* RegExpImpl::GlobalCache::FetchNext() {
current_match_index_++;
if (current_match_index_ >= num_matches_) {
// Current batch of results exhausted.
// Fail if last batch was not even fully filled.
if (num_matches_ < max_matches_) {
num_matches_ = 0; // Signal failed match.
return nullptr;
}
int32_t* last_match =
&register_array_[(current_match_index_ - 1) * registers_per_match_];
int last_end_index = last_match[1];
if (regexp_->TypeTag() == JSRegExp::ATOM) {
num_matches_ =
RegExpImpl::AtomExecRaw(isolate_, regexp_, subject_, last_end_index,
register_array_, register_array_size_);
} else {
int last_start_index = last_match[0];
if (last_start_index == last_end_index) {
// Zero-length match. Advance by one code point.
last_end_index = AdvanceZeroLength(last_end_index);
}
if (last_end_index > subject_->length()) {
num_matches_ = 0; // Signal failed match.
return nullptr;
}
num_matches_ = RegExpImpl::IrregexpExecRaw(
isolate_, regexp_, subject_, last_end_index, register_array_,
register_array_size_);
}
if (num_matches_ <= 0) return nullptr;
current_match_index_ = 0;
return register_array_;
} else {
return &register_array_[current_match_index_ * registers_per_match_];
}
}
int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() {
int index = current_match_index_ * registers_per_match_;
if (num_matches_ == 0) {
// After a failed match we shift back by one result.
index -= registers_per_match_;
}
return &register_array_[index];
}
} // namespace internal
} // namespace v8
#endif // V8_REGEXP_JSREGEXP_INL_H_
This diff is collapsed.
This diff is collapsed.
......@@ -7,7 +7,6 @@
#include "src/diagnostics/code-tracer.h"
#include "src/execution/isolate.h"
#include "src/objects/objects-inl.h"
#include "src/regexp/jsregexp.h" // TODO(jgruber): Only needed for IgnoreCase.
#include "src/regexp/regexp-macro-assembler-arch.h"
#include "src/regexp/regexp-macro-assembler-tracer.h"
#include "src/strings/unicode-inl.h"
......
......@@ -45,6 +45,36 @@ constexpr int kPatternTooShortForBoyerMoore = 2;
} // namespace regexp_compiler_constants
inline bool IgnoreCase(JSRegExp::Flags flags) {
return (flags & JSRegExp::kIgnoreCase) != 0;
}
inline bool IsUnicode(JSRegExp::Flags flags) {
return (flags & JSRegExp::kUnicode) != 0;
}
inline bool IsSticky(JSRegExp::Flags flags) {
return (flags & JSRegExp::kSticky) != 0;
}
inline bool IsGlobal(JSRegExp::Flags flags) {
return (flags & JSRegExp::kGlobal) != 0;
}
inline bool DotAll(JSRegExp::Flags flags) {
return (flags & JSRegExp::kDotAll) != 0;
}
inline bool Multiline(JSRegExp::Flags flags) {
return (flags & JSRegExp::kMultiline) != 0;
}
inline bool NeedsUnicodeCaseEquivalents(JSRegExp::Flags flags) {
// Both unicode and ignore_case flags are set. We need to use ICU to find
// the closure over case equivalents.
return IsUnicode(flags) && IgnoreCase(flags);
}
// A set of unsigned integers that behaves especially well on small
// integers (< 32). May do zone-allocation.
class OutSet : public ZoneObject {
......@@ -645,6 +675,8 @@ class RegExpCompiler {
return CompilationResult("RegExp too big");
}
bool Succeeded() const { return error_message == nullptr; }
const char* const error_message = nullptr;
Object code;
int num_registers = 0;
......
......@@ -11,7 +11,8 @@
#include "src/logging/counters.h"
#include "src/numbers/conversions-inl.h"
#include "src/objects/js-array-inl.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/objects/js-regexp-inl.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-utils.h"
#include "src/runtime/runtime-utils.h"
#include "src/strings/string-builder-inl.h"
......@@ -638,7 +639,7 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithString(
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
RegExpGlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
int32_t* current_match = global_cache.FetchNext();
......@@ -703,7 +704,7 @@ V8_WARN_UNUSED_RESULT static Object StringReplaceGlobalRegExpWithEmptyString(
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
RegExpGlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
int32_t* current_match = global_cache.FetchNext();
......@@ -1115,7 +1116,7 @@ static Object SearchRegExpMultiple(Isolate* isolate, Handle<String> subject,
}
}
RegExpImpl::GlobalCache global_cache(regexp, subject, isolate);
RegExpGlobalCache global_cache(regexp, subject, isolate);
if (global_cache.HasException()) return ReadOnlyRoots(isolate).exception();
// Ensured in Runtime_RegExpExecMultiple.
......
......@@ -10,7 +10,6 @@
#include "src/objects/objects-inl.h"
#include "src/objects/slots.h"
#include "src/objects/smi.h"
#include "src/regexp/jsregexp-inl.h"
#include "src/regexp/regexp-utils.h"
#include "src/runtime/runtime-utils.h"
#include "src/strings/string-builder-inl.h"
......
......@@ -547,8 +547,8 @@ static RegExpNode* Compile(const char* input, bool multiline, bool unicode,
.ToHandleChecked();
Handle<String> sample_subject =
isolate->factory()->NewStringFromUtf8(CStrVector("")).ToHandleChecked();
RegExpEngine::Compile(isolate, zone, &compile_data, flags, pattern,
sample_subject, is_one_byte);
RegExpImpl::CompileForTesting(isolate, zone, &compile_data, flags, pattern,
sample_subject, is_one_byte);
return compile_data.node;
}
......@@ -561,7 +561,7 @@ static void Execute(const char* input, bool multiline, bool unicode,
USE(node);
#ifdef DEBUG
if (dot_output) {
RegExpEngine::DotPrint(input, node, false);
RegExpImpl::DotPrintForTesting(input, node, false);
}
#endif // DEBUG
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment