js-regexp.h 12.8 KB
Newer Older
1 2 3 4 5 6 7
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_OBJECTS_JS_REGEXP_H_
#define V8_OBJECTS_JS_REGEXP_H_

8
#include "include/v8-regexp.h"
9
#include "src/objects/contexts.h"
10
#include "src/objects/js-array.h"
11
#include "src/regexp/regexp-flags.h"
12
#include "torque-generated/bit-fields.h"
13 14 15 16 17 18 19

// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"

namespace v8 {
namespace internal {

20 21
#include "torque-generated/src/objects/js-regexp-tq.inc"

22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
// Regular expressions
// The regular expression holds a single reference to a FixedArray in
// the kDataOffset field.
// The FixedArray contains the following data:
// - tag : type of regexp implementation (not compiled yet, atom or irregexp)
// - reference to the original source string
// - reference to the original flag string
// If it is an atom regexp
// - a reference to a literal string to search for
// If it is an irregexp regexp:
// - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - a reference to code for UC16 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - max number of registers used by irregexp implementations.
// - number of capture registers (output values) of the regexp.
38
class JSRegExp : public TorqueGeneratedJSRegExp<JSRegExp, JSObject> {
39
 public:
40 41 42 43 44 45
  enum Type {
    NOT_COMPILED,  // Initial value. No data array has been set yet.
    ATOM,          // A simple string match.
    IRREGEXP,      // Compiled with Irregexp (code or bytecode).
    EXPERIMENTAL,  // Compiled to use the experimental linear time engine.
  };
46
  DEFINE_TORQUE_GENERATED_JS_REG_EXP_FLAGS()
47

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
  V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(
      Isolate* isolate, Handle<String> source, Flags flags,
      uint32_t backtrack_limit = kNoBacktrackLimit);

  static MaybeHandle<JSRegExp> Initialize(
      Handle<JSRegExp> regexp, Handle<String> source, Flags flags,
      uint32_t backtrack_limit = kNoBacktrackLimit);
  static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
                                          Handle<String> source,
                                          Handle<String> flags_string);

  DECL_ACCESSORS(last_index, Object)

  // Instance fields accessors.
  inline String source() const;
  inline Flags flags() const;

  // Data array field accessors.

  inline Type type_tag() const;
  inline String atom_pattern() const;
  // This could be a Smi kUninitializedValue or Code.
  V8_EXPORT_PRIVATE Object code(bool is_latin1) const;
  V8_EXPORT_PRIVATE void set_code(bool is_unicode, Handle<Code> code);
  // This could be a Smi kUninitializedValue or ByteArray.
  V8_EXPORT_PRIVATE Object bytecode(bool is_latin1) const;
  // Sets the bytecode as well as initializing trampoline slots to the
  // RegExpInterpreterTrampoline.
  void set_bytecode_and_trampoline(Isolate* isolate,
                                   Handle<ByteArray> bytecode);
  inline int max_register_count() const;
  // Number of captures (without the match itself).
  inline int capture_count() const;
  inline Object capture_name_map();
  inline void set_capture_name_map(Handle<FixedArray> capture_name_map);
  uint32_t backtrack_limit() const;

85 86 87 88 89 90 91 92 93 94
  static constexpr Flag AsJSRegExpFlag(RegExpFlag f) {
    return static_cast<Flag>(f);
  }
  static constexpr Flags AsJSRegExpFlags(RegExpFlags f) {
    return Flags{static_cast<int>(f)};
  }
  static constexpr RegExpFlags AsRegExpFlags(Flags f) {
    return RegExpFlags{static_cast<int>(f)};
  }

95 96
  static base::Optional<RegExpFlag> FlagFromChar(char c) {
    base::Optional<RegExpFlag> f = TryRegExpFlagFromChar(c);
97
    if (!f.has_value()) return f;
98 99
    if (f.value() == RegExpFlag::kLinear &&
        !FLAG_enable_experimental_regexp_engine) {
100 101 102
      return {};
    }
    return f;
103 104
  }

105
  STATIC_ASSERT(static_cast<int>(kNone) == v8::RegExp::kNone);
106 107 108 109 110 111
#define V(_, Camel, ...)                                             \
  STATIC_ASSERT(static_cast<int>(k##Camel) == v8::RegExp::k##Camel); \
  STATIC_ASSERT(static_cast<int>(k##Camel) ==                        \
                static_cast<int>(RegExpFlag::k##Camel));
  REGEXP_FLAG_LIST(V)
#undef V
112
  STATIC_ASSERT(kFlagCount == v8::RegExp::kFlagCount);
113
  STATIC_ASSERT(kFlagCount == kRegExpFlagCount);
114

115 116
  static base::Optional<Flags> FlagsFromString(Isolate* isolate,
                                               Handle<String> flags);
117

118 119 120
  V8_EXPORT_PRIVATE static Handle<String> StringFromFlags(Isolate* isolate,
                                                          Flags flags);

121 122
  inline String EscapedPattern();

123
  bool CanTierUp();
Ana Peško's avatar
Ana Peško committed
124
  bool MarkedForTierUp();
125 126
  void ResetLastTierUpTick();
  void TierUpTick();
Ana Peško's avatar
Ana Peško committed
127 128
  void MarkTierUpForNextExec();

129 130 131 132
  bool ShouldProduceBytecode();
  inline bool HasCompiledCode() const;
  inline void DiscardCompiledCodeForSerialization();

133
  static constexpr bool TypeSupportsCaptures(Type t) {
134 135
    return t == IRREGEXP || t == EXPERIMENTAL;
  }
136

137
  // Each capture (including the match itself) needs two registers.
138 139 140
  static constexpr int RegistersForCaptureCount(int count) {
    return (count + 1) * 2;
  }
141

142 143 144 145 146 147 148
  static constexpr int code_index(bool is_latin1) {
    return is_latin1 ? kIrregexpLatin1CodeIndex : kIrregexpUC16CodeIndex;
  }

  static constexpr int bytecode_index(bool is_latin1) {
    return is_latin1 ? kIrregexpLatin1BytecodeIndex
                     : kIrregexpUC16BytecodeIndex;
149 150 151
  }

  // Dispatched behavior.
152
  DECL_PRINTER(JSRegExp)
153 154
  DECL_VERIFIER(JSRegExp)

Irina Yatsenko's avatar
Irina Yatsenko committed
155 156
  /* This is already an in-object field. */
  // TODO(v8:8944): improve handling of in-object fields
157
  static constexpr int kLastIndexOffset = kHeaderSize;
158

159 160 161
  // The initial value of the last_index field on a new JSRegExp instance.
  static constexpr int kInitialLastIndexValue = 0;

162
  // Indices in the data array.
163 164 165 166 167
  static constexpr int kTagIndex = 0;
  static constexpr int kSourceIndex = kTagIndex + 1;
  static constexpr int kFlagsIndex = kSourceIndex + 1;
  static constexpr int kFirstTypeSpecificIndex = kFlagsIndex + 1;
  static constexpr int kMinDataArrayLength = kFirstTypeSpecificIndex;
168

169 170 171
  // The data fields are used in different ways depending on the
  // value of the tag.
  // Atom regexps (literal strings).
172 173 174 175 176 177 178 179 180 181 182
  static constexpr int kAtomPatternIndex = kFirstTypeSpecificIndex;
  static constexpr int kAtomDataSize = kAtomPatternIndex + 1;

  // A Code object or a Smi marker value equal to kUninitializedValue.
  static constexpr int kIrregexpLatin1CodeIndex = kFirstTypeSpecificIndex;
  static constexpr int kIrregexpUC16CodeIndex = kIrregexpLatin1CodeIndex + 1;
  // A ByteArray object or a Smi marker value equal to kUninitializedValue.
  static constexpr int kIrregexpLatin1BytecodeIndex =
      kIrregexpUC16CodeIndex + 1;
  static constexpr int kIrregexpUC16BytecodeIndex =
      kIrregexpLatin1BytecodeIndex + 1;
183 184
  // Maximal number of registers used by either Latin1 or UC16.
  // Only used to check that there is enough stack space
185 186
  static constexpr int kIrregexpMaxRegisterCountIndex =
      kIrregexpUC16BytecodeIndex + 1;
187
  // Number of captures in the compiled regexp.
188 189
  static constexpr int kIrregexpCaptureCountIndex =
      kIrregexpMaxRegisterCountIndex + 1;
190 191
  // Maps names of named capture groups (at indices 2i) to their corresponding
  // (1-based) capture group indices (at indices 2i + 1).
192 193
  static constexpr int kIrregexpCaptureNameMapIndex =
      kIrregexpCaptureCountIndex + 1;
194 195 196 197
  // Tier-up ticks are set to the value of the tier-up ticks flag. The value is
  // decremented on each execution of the bytecode, so that the tier-up
  // happens once the ticks reach zero.
  // This value is ignored if the regexp-tier-up flag isn't turned on.
198 199
  static constexpr int kIrregexpTicksUntilTierUpIndex =
      kIrregexpCaptureNameMapIndex + 1;
200 201 202
  // A smi containing either the backtracking limit or kNoBacktrackLimit.
  // TODO(jgruber): If needed, this limit could be packed into other fields
  // above to save space.
203 204 205
  static constexpr int kIrregexpBacktrackLimit =
      kIrregexpTicksUntilTierUpIndex + 1;
  static constexpr int kIrregexpDataSize = kIrregexpBacktrackLimit + 1;
206

207 208
  // TODO(mbid,v8:10765): At the moment the EXPERIMENTAL data array conforms
  // to the format of an IRREGEXP data array, with most fields set to some
209
  // default/uninitialized value. This is because EXPERIMENTAL and IRREGEXP
210 211
  // regexps take the same code path in `RegExpExecInternal`, which reads off
  // various fields from the data array. `RegExpExecInternal` should probably
212 213
  // distinguish between EXPERIMENTAL and IRREGEXP, and then we can get rid of
  // all the IRREGEXP only fields.
214
  static constexpr int kExperimentalDataSize = kIrregexpDataSize;
215

216
  // In-object fields.
217 218
  static constexpr int kLastIndexFieldIndex = 0;
  static constexpr int kInObjectFieldCount = 1;
219

220
  // The actual object size including in-object fields.
221 222 223
  static constexpr int Size() {
    return kHeaderSize + kInObjectFieldCount * kTaggedSize;
  }
224

225
  // Descriptor array index to important methods in the prototype.
226 227 228 229 230 231
  static constexpr int kExecFunctionDescriptorIndex = 1;
  static constexpr int kSymbolMatchFunctionDescriptorIndex = 14;
  static constexpr int kSymbolMatchAllFunctionDescriptorIndex = 15;
  static constexpr int kSymbolReplaceFunctionDescriptorIndex = 16;
  static constexpr int kSymbolSearchFunctionDescriptorIndex = 17;
  static constexpr int kSymbolSplitFunctionDescriptorIndex = 18;
232

233
  // The uninitialized value for a regexp code object.
234
  static constexpr int kUninitializedValue = -1;
235

236 237 238
  // If the backtrack limit is set to this marker value, no limit is applied.
  static constexpr uint32_t kNoBacktrackLimit = 0;

239 240 241 242
  // The heuristic value for the length of the subject string for which we
  // tier-up to the compiler immediately, instead of using the interpreter.
  static constexpr int kTierUpForSubjectLengthValue = 1000;

243 244 245 246 247 248 249
  // Maximum number of captures allowed.
  static constexpr int kMaxCaptures = 1 << 16;

 private:
  inline Object DataAt(int index) const;
  inline void SetDataAt(int index, Object value);

250
  TQ_OBJECT_CONSTRUCTORS(JSRegExp)
251 252 253 254 255 256 257 258 259 260
};

DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)

// JSRegExpResult is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "index" and "input"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
261 262
class JSRegExpResult
    : public TorqueGeneratedJSRegExpResult<JSRegExpResult, JSArray> {
263
 public:
264 265 266 267
  // TODO(joshualitt): We would like to add printers and verifiers to
  // JSRegExpResult, and maybe JSRegExpResultIndices, but both have the same
  // instance type as JSArray.

268
  // Indices of in-object properties.
269 270 271
  static constexpr int kIndexIndex = 0;
  static constexpr int kInputIndex = 1;
  static constexpr int kGroupsIndex = 2;
272

273
  // Private internal only fields.
274 275 276 277
  static constexpr int kNamesIndex = 3;
  static constexpr int kRegExpInputIndex = 4;
  static constexpr int kRegExpLastIndex = 5;
  static constexpr int kInObjectPropertyCount = 6;
278

279
  static constexpr int kMapIndexInContext = Context::REGEXP_RESULT_MAP_INDEX;
280

281
  TQ_OBJECT_CONSTRUCTORS(JSRegExpResult)
282 283
};

284 285 286
class JSRegExpResultWithIndices
    : public TorqueGeneratedJSRegExpResultWithIndices<JSRegExpResultWithIndices,
                                                      JSRegExpResult> {
287 288 289 290
 public:
  static_assert(
      JSRegExpResult::kInObjectPropertyCount == 6,
      "JSRegExpResultWithIndices must be a subclass of JSRegExpResult");
291 292
  static constexpr int kIndicesIndex = 6;
  static constexpr int kInObjectPropertyCount = 7;
293

294
  TQ_OBJECT_CONSTRUCTORS(JSRegExpResultWithIndices)
295 296
};

297 298 299 300 301 302
// JSRegExpResultIndices is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "group"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
303 304 305
class JSRegExpResultIndices
    : public TorqueGeneratedJSRegExpResultIndices<JSRegExpResultIndices,
                                                  JSArray> {
306 307 308 309 310 311
 public:
  static Handle<JSRegExpResultIndices> BuildIndices(
      Isolate* isolate, Handle<RegExpMatchInfo> match_info,
      Handle<Object> maybe_names);

  // Indices of in-object properties.
312 313
  static constexpr int kGroupsIndex = 0;
  static constexpr int kInObjectPropertyCount = 1;
314 315

  // Descriptor index of groups.
316
  static constexpr int kGroupsDescriptorIndex = 1;
317

318
  TQ_OBJECT_CONSTRUCTORS(JSRegExpResultIndices)
319 320 321 322 323 324 325 326
};

}  // namespace internal
}  // namespace v8

#include "src/objects/object-macros-undef.h"

#endif  // V8_OBJECTS_JS_REGEXP_H_