js-regexp.h 7.98 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef V8_OBJECTS_JS_REGEXP_H_
#define V8_OBJECTS_JS_REGEXP_H_

#include "src/objects/js-array.h"

// Has to be the last include (doesn't have include guards):
#include "src/objects/object-macros.h"

namespace v8 {
namespace internal {

// Regular expressions
// The regular expression holds a single reference to a FixedArray in
// the kDataOffset field.
// The FixedArray contains the following data:
// - tag : type of regexp implementation (not compiled yet, atom or irregexp)
// - reference to the original source string
// - reference to the original flag string
// If it is an atom regexp
// - a reference to a literal string to search for
// If it is an irregexp regexp:
// - a reference to code for Latin1 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - a reference to code for UC16 inputs (bytecode or compiled), or a smi
// used for tracking the last usage (used for regexp code flushing).
// - max number of registers used by irregexp implementations.
// - number of capture registers (output values) of the regexp.
class JSRegExp : public JSObject {
 public:
  // Meaning of Type:
  // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp yet.
  // ATOM: A simple string to match against using an indexOf operation.
  // IRREGEXP: Compiled with Irregexp.
  enum Type { NOT_COMPILED, ATOM, IRREGEXP };
39 40 41 42 43 44 45 46 47 48
  struct FlagShiftBit {
    static const int kGlobal = 0;
    static const int kIgnoreCase = 1;
    static const int kMultiline = 2;
    static const int kSticky = 3;
    static const int kUnicode = 4;
    static const int kDotAll = 5;
    static const int kInvalid = 7;
  };
  enum Flag : uint8_t {
49
    kNone = 0,
50 51 52 53 54 55
    kGlobal = 1 << FlagShiftBit::kGlobal,
    kIgnoreCase = 1 << FlagShiftBit::kIgnoreCase,
    kMultiline = 1 << FlagShiftBit::kMultiline,
    kSticky = 1 << FlagShiftBit::kSticky,
    kUnicode = 1 << FlagShiftBit::kUnicode,
    kDotAll = 1 << FlagShiftBit::kDotAll,
56
    // Update FlagCount when adding new flags.
57
    kInvalid = 1 << FlagShiftBit::kInvalid,  // Not included in FlagCount.
58 59
  };
  typedef base::Flags<Flag> Flags;
60
  static constexpr int FlagCount() { return 6; }
61

62 63 64
  static int FlagShiftBits(Flag flag) {
    switch (flag) {
      case kGlobal:
65
        return FlagShiftBit::kGlobal;
66
      case kIgnoreCase:
67
        return FlagShiftBit::kIgnoreCase;
68
      case kMultiline:
69
        return FlagShiftBit::kMultiline;
70
      case kSticky:
71
        return FlagShiftBit::kSticky;
72
      case kUnicode:
73
        return FlagShiftBit::kUnicode;
74
      case kDotAll:
75
        return FlagShiftBit::kDotAll;
76 77 78 79 80 81
      default:
        STATIC_ASSERT(FlagCount() == 6);
        UNREACHABLE();
    }
  }

82 83 84 85 86
  DECL_ACCESSORS(data, Object)
  DECL_ACCESSORS(flags, Object)
  DECL_ACCESSORS(last_index, Object)
  DECL_ACCESSORS(source, Object)

87 88
  V8_EXPORT_PRIVATE static MaybeHandle<JSRegExp> New(Isolate* isolate,
                                                     Handle<String> source,
89 90 91 92 93 94 95 96 97
                                                     Flags flags);
  static Handle<JSRegExp> Copy(Handle<JSRegExp> regexp);

  static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
                                          Handle<String> source, Flags flags);
  static MaybeHandle<JSRegExp> Initialize(Handle<JSRegExp> regexp,
                                          Handle<String> source,
                                          Handle<String> flags_string);

98
  inline Type TypeTag() const;
99 100 101
  // Number of captures (without the match itself).
  inline int CaptureCount();
  inline Flags GetFlags();
102
  inline String Pattern();
103 104
  inline Object CaptureNameMap();
  inline Object DataAt(int index) const;
105
  // Set implementation data after the object has been prepared.
106
  inline void SetDataAt(int index, Object value);
107 108 109 110 111 112 113 114 115

  static int code_index(bool is_latin1) {
    if (is_latin1) {
      return kIrregexpLatin1CodeIndex;
    } else {
      return kIrregexpUC16CodeIndex;
    }
  }

116 117 118
  inline bool HasCompiledCode() const;
  inline void DiscardCompiledCodeForSerialization();

119
  DECL_CAST(JSRegExp)
120 121

  // Dispatched behavior.
122
  DECL_PRINTER(JSRegExp)
123 124
  DECL_VERIFIER(JSRegExp)

125 126 127 128 129 130 131 132 133 134 135 136
// Layout description.
#define JS_REGEXP_FIELDS(V)                 \
  V(kDataOffset, kTaggedSize)               \
  V(kSourceOffset, kTaggedSize)             \
  V(kFlagsOffset, kTaggedSize)              \
  /* Total size. */                         \
  V(kSize, 0)                               \
  /* This is already an in-object field. */ \
  V(kLastIndexOffset, 0)

  DEFINE_FIELD_OFFSET_CONSTANTS(JSObject::kHeaderSize, JS_REGEXP_FIELDS)
#undef JS_REGEXP_FIELDS
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172

  // Indices in the data array.
  static const int kTagIndex = 0;
  static const int kSourceIndex = kTagIndex + 1;
  static const int kFlagsIndex = kSourceIndex + 1;
  static const int kDataIndex = kFlagsIndex + 1;
  // The data fields are used in different ways depending on the
  // value of the tag.
  // Atom regexps (literal strings).
  static const int kAtomPatternIndex = kDataIndex;

  static const int kAtomDataSize = kAtomPatternIndex + 1;

  // Irregexp compiled code or bytecode for Latin1. If compilation
  // fails, this fields hold an exception object that should be
  // thrown if the regexp is used again.
  static const int kIrregexpLatin1CodeIndex = kDataIndex;
  // Irregexp compiled code or bytecode for UC16.  If compilation
  // fails, this fields hold an exception object that should be
  // thrown if the regexp is used again.
  static const int kIrregexpUC16CodeIndex = kDataIndex + 1;
  // Maximal number of registers used by either Latin1 or UC16.
  // Only used to check that there is enough stack space
  static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2;
  // Number of captures in the compiled regexp.
  static const int kIrregexpCaptureCountIndex = kDataIndex + 3;
  // Maps names of named capture groups (at indices 2i) to their corresponding
  // (1-based) capture group indices (at indices 2i + 1).
  static const int kIrregexpCaptureNameMapIndex = kDataIndex + 4;

  static const int kIrregexpDataSize = kIrregexpCaptureNameMapIndex + 1;

  // In-object fields.
  static const int kLastIndexFieldIndex = 0;
  static const int kInObjectFieldCount = 1;

173
  // Descriptor array index to important methods in the prototype.
174
  static const int kExecFunctionDescriptorIndex = 1;
175 176 177 178 179
  static const int kSymbolMatchFunctionDescriptorIndex = 13;
  static const int kSymbolReplaceFunctionDescriptorIndex = 14;
  static const int kSymbolSearchFunctionDescriptorIndex = 15;
  static const int kSymbolSplitFunctionDescriptorIndex = 16;
  static const int kSymbolMatchAllFunctionDescriptorIndex = 17;
180

181 182
  // The uninitialized value for a regexp code object.
  static const int kUninitializedValue = -1;
183 184

  OBJECT_CONSTRUCTORS(JSRegExp, JSObject)
185 186 187 188 189 190 191 192 193 194 195 196
};

DEFINE_OPERATORS_FOR_FLAGS(JSRegExp::Flags)

// JSRegExpResult is just a JSArray with a specific initial map.
// This initial map adds in-object properties for "index" and "input"
// properties, as assigned by RegExp.prototype.exec, which allows
// faster creation of RegExp exec results.
// This class just holds constants used when creating the result.
// After creation the result must be treated as a JSArray in all regards.
class JSRegExpResult : public JSArray {
 public:
197
// Layout description.
198
#define REG_EXP_RESULT_FIELDS(V) \
199 200 201 202
  V(kIndexOffset, kTaggedSize)   \
  V(kInputOffset, kTaggedSize)   \
  V(kGroupsOffset, kTaggedSize)  \
  /* Total size. */              \
203 204 205 206 207
  V(kSize, 0)

  DEFINE_FIELD_OFFSET_CONSTANTS(JSArray::kSize, REG_EXP_RESULT_FIELDS)
#undef REG_EXP_RESULT_FIELDS

208 209 210
  // Indices of in-object properties.
  static const int kIndexIndex = 0;
  static const int kInputIndex = 1;
211 212
  static const int kGroupsIndex = 2;
  static const int kInObjectPropertyCount = 3;
213 214 215 216 217 218 219 220 221 222 223

 private:
  DISALLOW_IMPLICIT_CONSTRUCTORS(JSRegExpResult);
};

}  // namespace internal
}  // namespace v8

#include "src/objects/object-macros-undef.h"

#endif  // V8_OBJECTS_JS_REGEXP_H_