js-regexp.h 12.8 KB
Newer Older
1 2 3 4 5 6 7
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.


#include "include/v8-regexp.h"
#include "src/objects/contexts.h"
#include "src/objects/js-array.h"
#include "src/regexp/regexp-flags.h"
#include "torque-generated/bit-fields.h"
13 14 15 16 17 18 19

// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"

namespace v8 {
namespace internal {

20 21
#include "torque-generated/src/objects/js-regexp-tq.inc"

22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
// Regular expressions
// The regular expression holds a single reference to a FixedArray in
// the kDataOffset field.
// The FixedArray contains the following data:
// - tag : type of regexp implementation (not compiled yet, atom or irregexp)
// - reference to the original source string
// - reference to the original flag string
// If it is an atom regexp
// - a reference to a literal string to search for
// If it is an irregexp regexp:
// - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - a reference to code for UC16 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - max number of registers used by irregexp implementations.
// - number of capture registers (output values) of the regexp.
class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
40 41 42 43 44 45
  enum Type {
    NOT_COMPILED,  // Initial value. No data array has been set yet.
    ATOM,          // A simple string match.
    IRREGEXP,      // Compiled with Irregexp (code or bytecode).
    EXPERIMENTAL,  // Compiled to use the experimental linear time engine.

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
  V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(
      Isolate* isolate, Handle<String> source, Flags flags,
      uint32_t backtrack_limit = kNoBacktrackLimit);

  static MaybeHandle<JSRegExp> Initialize(
      Handle<JSRegExp> regexp, Handle<String> source, Flags flags,
      uint32_t backtrack_limit = kNoBacktrackLimit);
  static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
                                          Handle<String> source,
                                          Handle<String> flags_string);

  DECL_ACCESSORS(last_index, Object)

  // Instance fields accessors.
  inline String source() const;
  inline Flags flags() const;

  // Data array field accessors.

  inline Type type_tag() const;
  inline String atom_pattern() const;
  // This could be a Smi kUninitializedValue or Code.
  V8_EXPORT_PRIVATE Object code(bool is_latin1) const;
  V8_EXPORT_PRIVATE void set_code(bool is_unicode, Handle<Code> code);
  // This could be a Smi kUninitializedValue or ByteArray.
  V8_EXPORT_PRIVATE Object bytecode(bool is_latin1) const;
  // Sets the bytecode as well as initializing trampoline slots to the
  // RegExpInterpreterTrampoline.
  void set_bytecode_and_trampoline(Isolate* isolate,
                                   Handle<ByteArray> bytecode);
  inline int max_register_count() const;
  // Number of captures (without the match itself).
  inline int capture_count() const;
  inline Object capture_name_map();
  inline void set_capture_name_map(Handle<FixedArray> capture_name_map);
  uint32_t backtrack_limit() const;

85 86 87 88 89 90 91 92 93 94
  static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
    return static_cast<Flag>(f);
  static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
    return Flags{static_cast<int>(f)};
  static constexpr RegExpFlags AsRegExpFlags(Flags f) {
    return RegExpFlags{static_cast<int>(f)};

95 96
  static base::Optional<RegExpFlag> FlagFromChar(char c) {
    base::Optional<RegExpFlag> f = TryRegExpFlagFromChar(c);
    if (!f.has_value()) return f;
98 99
    if (f.value() == RegExpFlag::kLinear &&
        !FLAG_enable_experimental_regexp_engine) {
100 101 102
      return {};
    return f;
103 104

  STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
106 107 108 109 110 111
#define V(_, Camel, ...)                                             \
  STATIC_ASSERT(static_cast<int>(k##Camel) == v8::RegExp::k##Camel); \
  STATIC_ASSERT(static_cast<int>(k##Camel) ==                        \
#undef V
  STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
  STATIC_ASSERT(kFlagCount == kRegExpFlagCount);

115 116
  static base::Optional<Flags> FlagsFromString(Isolate* isolate,
                                               Handle<String> flags);

118 119 120
  V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate,
                                                          Flags flags);

121 122
  inline String EscapedPattern();

  bool CanTierUp();
Ana Peško's avatar
Ana Peško committed
  bool MarkedForTierUp();
125 126
  void ResetLastTierUpTick();
  void TierUpTick();
Ana Peško's avatar
Ana Peško committed
127 128
  void MarkTierUpForNextExec();

129 130 131 132
  bool ShouldProduceBytecode();
  inline bool HasCompiledCode() const;
  inline void DiscardCompiledCodeForSerialization();

  static constexpr bool TypeSupportsCaptures(Type t) {
134 135
    return t == IRREGEXP || t == EXPERIMENTAL;

  // Each capture (including the match itself) needs two registers.
138 139 140
  static constexpr int RegistersForCaptureCount(int count) {
    return (count + 1) * 2;

142 143 144 145 146 147 148
  static constexpr int code_index(bool is_latin1) {
    return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;

  static constexpr int bytecode_index(bool is_latin1) {
    return is_latin1 ? kIrregexpLatin1BytecodeIndex
                     : kIrregexpUC16BytecodeIndex;
149 150 151

  // Dispatched behavior.
153 154

Irina Yatsenko's avatar
Irina Yatsenko committed
155 156
  /* This is already an in-object field. */
  // TODO(v8:8944): improve handling of in-object fields
  static constexpr int kLastIndexOffset = kHeaderSize;

159 160 161
  // The initial value of the last_index field on a new JSRegExp instance.
  static constexpr int kInitialLastIndexValue = 0;

  // Indices in the data array.
163 164 165 166 167
  static constexpr int kTagIndex = 0;
  static constexpr int kSourceIndex = kTagIndex + 1;
  static constexpr int kFlagsIndex = kSourceIndex + 1;
  static constexpr int kFirstTypeSpecificIndex = kFlagsIndex + 1;
  static constexpr int kMinDataArrayLength = kFirstTypeSpecificIndex;

169 170 171
  // The data fields are used in different ways depending on the
  // value of the tag.
  // Atom regexps (literal strings).
172 173 174 175 176 177 178 179 180 181 182
  static constexpr int kAtomPatternIndex = kFirstTypeSpecificIndex;
  static constexpr int kAtomDataSize = kAtomPatternIndex + 1;

  // A Code object or a Smi marker value equal to kUninitializedValue.
  static constexpr int kIrregexpLatin1CodeIndex = kFirstTypeSpecificIndex;
  static constexpr int kIrregexpUC16CodeIndex = kIrregexpLatin1CodeIndex + 1;
  // A ByteArray object or a Smi marker value equal to kUninitializedValue.
  static constexpr int kIrregexpLatin1BytecodeIndex =
      kIrregexpUC16CodeIndex + 1;
  static constexpr int kIrregexpUC16BytecodeIndex =
      kIrregexpLatin1BytecodeIndex + 1;
183 184
  // Maximal number of registers used by either Latin1 or UC16.
  // Only used to check that there is enough stack space
185 186
  static constexpr int kIrregexpMaxRegisterCountIndex =
      kIrregexpUC16BytecodeIndex + 1;
  // Number of captures in the compiled regexp.
188 189
  static constexpr int kIrregexpCaptureCountIndex =
      kIrregexpMaxRegisterCountIndex + 1;
190 191
  // Maps names of named capture groups (at indices 2i) to their corresponding
  // (1-based) capture group indices (at indices 2i + 1).
192 193
  static constexpr int kIrregexpCaptureNameMapIndex =
      kIrregexpCaptureCountIndex + 1;
194 195 196 197
  // Tier-up ticks are set to the value of the tier-up ticks flag. The value is
  // decremented on each execution of the bytecode, so that the tier-up
  // happens once the ticks reach zero.
  // This value is ignored if the regexp-tier-up flag isn't turned on.
198 199
  static constexpr int kIrregexpTicksUntilTierUpIndex =
      kIrregexpCaptureNameMapIndex + 1;
200 201 202
  // A smi containing either the backtracking limit or kNoBacktrackLimit.
  // TODO(jgruber): If needed, this limit could be packed into other fields
  // above to save space.
203 204 205
  static constexpr int kIrregexpBacktrackLimit =
      kIrregexpTicksUntilTierUpIndex + 1;
  static constexpr int kIrregexpDataSize = kIrregexpBacktrackLimit + 1;

207 208
  // TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms
  // to the format of an IRREGEXP data array, with most fields set to some
  // default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP
210 211
  // regexps take the same code path in `RegExpExecInternal`, which reads off
  // various fields from the data array. `RegExpExecInternal` should probably
212 213
  // distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of
  // all the IRREGEXP only fields.
  static constexpr int kExperimentalDataSize = kIrregexpDataSize;

  // In-object fields.
217 218
  static constexpr int kLastIndexFieldIndex = 0;
  static constexpr int kInObjectFieldCount = 1;

  // The actual object size including in-object fields.
221 222 223
  static constexpr int Size() {
    return kHeaderSize + kInObjectFieldCount * kTaggedSize;

  // Descriptor array index to important methods in the prototype.
226 227 228 229 230 231
  static constexpr int kExecFunctionDescriptorIndex = 1;
  static constexpr int kSymbolMatchFunctionDescriptorIndex = 14;
  static constexpr int kSymbolMatchAllFunctionDescriptorIndex = 15;
  static constexpr int kSymbolReplaceFunctionDescriptorIndex = 16;
  static constexpr int kSymbolSearchFunctionDescriptorIndex = 17;
  static constexpr int kSymbolSplitFunctionDescriptorIndex = 18;

  // The uninitialized value for a regexp code object.
  static constexpr int kUninitializedValue = -1;

236 237 238
  // If the backtrack limit is set to this marker value, no limit is applied.
  static constexpr uint32_t kNoBacktrackLimit = 0;

239 240 241 242
  // The heuristic value for the length of the subject string for which we
  // tier-up to the compiler immediately, instead of using the interpreter.
  static constexpr int kTierUpForSubjectLengthValue = 1000;

243 244 245 246 247 248 249
  // Maximum number of captures allowed.
  static constexpr int kMaxCaptures = 1 << 16;

  inline Object DataAt(int index) const;
  inline void SetDataAt(int index, Object value);

251 252 253 254 255 256 257 258 259 260


// JSRegExpResult is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "index" and "input"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
261 262
class JSRegExpResult
    : public TorqueGeneratedJSRegExpResult<JSRegExpResult, JSArray> {
264 265 266 267
  // TODO(joshualitt): We would like to add printers and verifiers to
  // JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same
  // instance type as JSArray.

  // Indices of in-object properties.
269 270 271
  static constexpr int kIndexIndex = 0;
  static constexpr int kInputIndex = 1;
  static constexpr int kGroupsIndex = 2;

  // Private internal only fields.
274 275 276 277
  static constexpr int kNamesIndex = 3;
  static constexpr int kRegExpInputIndex = 4;
  static constexpr int kRegExpLastIndex = 5;
  static constexpr int kInObjectPropertyCount = 6;

  static constexpr int kMapIndexInContext = Context::REGEXP_RESULT_MAP_INDEX;

282 283

284 285 286
class JSRegExpResultWithIndices
    : public TorqueGeneratedJSRegExpResultWithIndices<JSRegExpResultWithIndices,
                                                      JSRegExpResult> {
287 288 289 290
      JSRegExpResult::kInObjectPropertyCount == 6,
      "JSRegExpResultWithIndices must be a subclass of JSRegExpResult");
291 292
  static constexpr int kIndicesIndex = 6;
  static constexpr int kInObjectPropertyCount = 7;

295 296

297 298 299 300 301 302
// JSRegExpResultIndices is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "group"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
303 304 305
class JSRegExpResultIndices
    : public TorqueGeneratedJSRegExpResultIndices<JSRegExpResultIndices,
                                                  JSArray> {
306 307 308 309 310 311
  static Handle<JSRegExpResultIndices> BuildIndices(
      Isolate* isolate, Handle<RegExpMatchInfo> match_info,
      Handle<Object> maybe_names);

  // Indices of in-object properties.
312 313
  static constexpr int kGroupsIndex = 0;
  static constexpr int kInObjectPropertyCount = 1;
314 315

  // Descriptor index of groups.
  static constexpr int kGroupsDescriptorIndex = 1;

319 320 321 322 323 324 325 326

}  // namespace internal
}  // namespace v8

#include "src/objects/object-macros-undef.h"

#endif  // V8_OBJECTS_JS_REGEXP_H_