experimental.cc 11.9 KB
Newer Older
1 2 3 4 5 6
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/regexp/experimental/experimental.h"

7
#include "src/common/assert-scope.h"
8
#include "src/objects/js-regexp-inl.h"
9 10
#include "src/regexp/experimental/experimental-compiler.h"
#include "src/regexp/experimental/experimental-interpreter.h"
11 12
#include "src/regexp/regexp-parser.h"
#include "src/utils/ostreams.h"
13 14 15 16

namespace v8 {
namespace internal {

17
bool ExperimentalRegExp::CanBeHandled(RegExpTree* tree, RegExpFlags flags,
18
                                      int capture_count) {
19 20
  DCHECK(FLAG_enable_experimental_regexp_engine ||
         FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
21
  return ExperimentalRegExpCompiler::CanBeHandled(tree, flags, capture_count);
22 23
}

24
void ExperimentalRegExp::Initialize(Isolate* isolate, Handle<JSRegExp> re,
25 26
                                    Handle<String> source, RegExpFlags flags,
                                    int capture_count) {
27
  DCHECK(FLAG_enable_experimental_regexp_engine);
28
  if (FLAG_trace_experimental_regexp_engine) {
29 30
    StdoutStream{} << "Initializing experimental regexp " << *source
                   << std::endl;
31 32
  }

33 34
  isolate->factory()->SetRegExpExperimentalData(
      re, source, JSRegExp::AsJSRegExpFlags(flags), capture_count);
35 36
}

37 38
bool ExperimentalRegExp::IsCompiled(Handle<JSRegExp> re, Isolate* isolate) {
  DCHECK(FLAG_enable_experimental_regexp_engine);
39
  DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL);
40 41 42 43
#ifdef VERIFY_HEAP
  re->JSRegExpVerify(isolate);
#endif

44 45
  static constexpr bool kIsLatin1 = true;
  return re->bytecode(kIsLatin1) != Smi::FromInt(JSRegExp::kUninitializedValue);
46 47
}

48
template <class T>
49
Handle<ByteArray> VectorToByteArray(Isolate* isolate, base::Vector<T> data) {
50
  STATIC_ASSERT(std::is_trivial<T>::value);
51

52 53
  int byte_length = sizeof(T) * data.length();
  Handle<ByteArray> byte_array = isolate->factory()->NewByteArray(byte_length);
54
  DisallowGarbageCollection no_gc;
55 56 57 58 59
  MemCopy(byte_array->GetDataStartAddress(), data.begin(), byte_length);
  return byte_array;
}

namespace {
60

61 62 63 64 65 66 67 68
struct CompilationResult {
  Handle<ByteArray> bytecode;
  Handle<FixedArray> capture_name_map;
};

// Compiles source pattern, but doesn't change the regexp object.
base::Optional<CompilationResult> CompileImpl(Isolate* isolate,
                                              Handle<JSRegExp> regexp) {
69 70
  Zone zone(isolate->allocator(), ZONE_NAME);

71
  Handle<String> source(regexp->source(), isolate);
72

73 74 75
  // Parse and compile the regexp source.
  RegExpCompileData parse_result;
  DCHECK(!isolate->has_pending_exception());
76

77
  bool parse_success = RegExpParser::ParseRegExpFromHeapString(
78
      isolate, &zone, source, JSRegExp::AsRegExpFlags(regexp->flags()),
79
      &parse_result);
80 81 82
  if (!parse_success) {
    // The pattern was already parsed successfully during initialization, so
    // the only way parsing can fail now is because of stack overflow.
83 84 85 86
    DCHECK_EQ(parse_result.error, RegExpError::kStackOverflow);
    USE(RegExp::ThrowRegExpException(isolate, regexp, source,
                                     parse_result.error));
    return base::nullopt;
87
  }
88

89
  ZoneList<RegExpInstruction> bytecode = ExperimentalRegExpCompiler::Compile(
90
      parse_result.tree, JSRegExp::AsRegExpFlags(regexp->flags()), &zone);
91

92 93
  CompilationResult result;
  result.bytecode = VectorToByteArray(isolate, bytecode.ToVector());
94 95
  result.capture_name_map =
      RegExp::CreateCaptureNameMap(isolate, parse_result.named_captures);
96 97 98 99 100 101
  return result;
}

}  // namespace

bool ExperimentalRegExp::Compile(Isolate* isolate, Handle<JSRegExp> re) {
102
  DCHECK(FLAG_enable_experimental_regexp_engine);
103
  DCHECK_EQ(re->type_tag(), JSRegExp::EXPERIMENTAL);
104 105 106 107
#ifdef VERIFY_HEAP
  re->JSRegExpVerify(isolate);
#endif

108
  Handle<String> source(re->source(), isolate);
109 110
  if (FLAG_trace_experimental_regexp_engine) {
    StdoutStream{} << "Compiling experimental regexp " << *source << std::endl;
111 112
  }

113 114 115 116 117 118
  base::Optional<CompilationResult> compilation_result =
      CompileImpl(isolate, re);
  if (!compilation_result.has_value()) {
    DCHECK(isolate->has_pending_exception());
    return false;
  }
119

120 121
  re->set_bytecode_and_trampoline(isolate, compilation_result->bytecode);
  re->set_capture_name_map(compilation_result->capture_name_map);
122 123

  return true;
124 125
}

126
base::Vector<RegExpInstruction> AsInstructionSequence(ByteArray raw_bytes) {
127 128 129 130
  RegExpInstruction* inst_begin =
      reinterpret_cast<RegExpInstruction*>(raw_bytes.GetDataStartAddress());
  int inst_num = raw_bytes.length() / sizeof(RegExpInstruction);
  DCHECK_EQ(sizeof(RegExpInstruction) * inst_num, raw_bytes.length());
131
  return base::Vector<RegExpInstruction>(inst_begin, inst_num);
132
}
133

134 135 136 137 138 139
namespace {

int32_t ExecRawImpl(Isolate* isolate, RegExp::CallOrigin call_origin,
                    ByteArray bytecode, String subject, int capture_count,
                    int32_t* output_registers, int32_t output_register_count,
                    int32_t subject_index) {
140
  DisallowGarbageCollection no_gc;
141 142
  // TODO(cbruni): remove once gcmole is fixed.
  DisableGCMole no_gc_mole;
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160

  int register_count_per_match =
      JSRegExp::RegistersForCaptureCount(capture_count);

  int32_t result;
  do {
    DCHECK(subject.IsFlat());
    Zone zone(isolate->allocator(), ZONE_NAME);
    result = ExperimentalRegExpInterpreter::FindMatches(
        isolate, call_origin, bytecode, register_count_per_match, subject,
        subject_index, output_registers, output_register_count, &zone);
  } while (result == RegExp::kInternalRegExpRetry &&
           call_origin == RegExp::kFromRuntime);
  return result;
}

}  // namespace

161
// Returns the number of matches.
162 163 164 165
int32_t ExperimentalRegExp::ExecRaw(Isolate* isolate,
                                    RegExp::CallOrigin call_origin,
                                    JSRegExp regexp, String subject,
                                    int32_t* output_registers,
166 167 168
                                    int32_t output_register_count,
                                    int32_t subject_index) {
  DCHECK(FLAG_enable_experimental_regexp_engine);
169
  DisallowGarbageCollection no_gc;
170 171

  if (FLAG_trace_experimental_regexp_engine) {
172 173
    StdoutStream{} << "Executing experimental regexp " << regexp.source()
                   << std::endl;
174 175
  }

176 177
  static constexpr bool kIsLatin1 = true;
  ByteArray bytecode = ByteArray::cast(regexp.bytecode(kIsLatin1));
178

179
  return ExecRawImpl(isolate, call_origin, bytecode, subject,
180
                     regexp.capture_count(), output_registers,
181
                     output_register_count, subject_index);
182 183 184 185 186
}

int32_t ExperimentalRegExp::MatchForCallFromJs(
    Address subject, int32_t start_position, Address input_start,
    Address input_end, int* output_registers, int32_t output_register_count,
187
    RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) {
188
  DCHECK(FLAG_enable_experimental_regexp_engine);
189 190 191 192
  DCHECK_NOT_NULL(isolate);
  DCHECK_NOT_NULL(output_registers);
  DCHECK(call_origin == RegExp::CallOrigin::kFromJs);

193
  DisallowGarbageCollection no_gc;
194 195 196 197 198 199 200 201
  DisallowJavascriptExecution no_js(isolate);
  DisallowHandleAllocation no_handles;
  DisallowHandleDereference no_deref;

  String subject_string = String::cast(Object(subject));

  JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));

202 203
  return ExecRaw(isolate, RegExp::kFromJs, regexp_obj, subject_string,
                 output_registers, output_register_count, start_position);
204 205 206 207
}

MaybeHandle<Object> ExperimentalRegExp::Exec(
    Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
208 209
    int subject_index, Handle<RegExpMatchInfo> last_match_info,
    RegExp::ExecQuirks exec_quirks) {
210
  DCHECK(FLAG_enable_experimental_regexp_engine);
211
  DCHECK_EQ(regexp->type_tag(), JSRegExp::EXPERIMENTAL);
212 213 214 215
#ifdef VERIFY_HEAP
  regexp->JSRegExpVerify(isolate);
#endif

216 217 218
  if (!IsCompiled(regexp, isolate) && !Compile(isolate, regexp)) {
    DCHECK(isolate->has_pending_exception());
    return MaybeHandle<Object>();
219 220
  }

221 222
  DCHECK(IsCompiled(regexp, isolate));

223 224
  subject = String::Flatten(isolate, subject);

225
  int capture_count = regexp->capture_count();
226 227 228 229 230 231 232 233 234 235
  int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);

  int32_t* output_registers;
  std::unique_ptr<int32_t[]> output_registers_release;
  if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
    output_registers = isolate->jsregexp_static_offsets_vector();
  } else {
    output_registers = NewArray<int32_t>(output_register_count);
    output_registers_release.reset(output_registers);
  }
236

237 238 239
  int num_matches =
      ExecRaw(isolate, RegExp::kFromRuntime, *regexp, *subject,
              output_registers, output_register_count, subject_index);
240

241
  if (num_matches > 0) {
242
    DCHECK_EQ(num_matches, 1);
243 244 245 246 247
    if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
      if (output_registers[0] >= subject->length()) {
        return isolate->factory()->null_value();
      }
    }
248 249
    return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
                                    capture_count, output_registers);
250 251 252 253 254 255
  } else if (num_matches == 0) {
    return isolate->factory()->null_value();
  } else {
    DCHECK_LT(num_matches, 0);
    DCHECK(isolate->has_pending_exception());
    return MaybeHandle<Object>();
256 257 258
  }
}

259 260 261
int32_t ExperimentalRegExp::OneshotExecRaw(Isolate* isolate,
                                           Handle<JSRegExp> regexp,
                                           Handle<String> subject,
262 263 264 265 266 267 268
                                           int32_t* output_registers,
                                           int32_t output_register_count,
                                           int32_t subject_index) {
  DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);

  if (FLAG_trace_experimental_regexp_engine) {
    StdoutStream{} << "Experimental execution (oneshot) of regexp "
269
                   << regexp->source() << std::endl;
270 271 272
  }

  base::Optional<CompilationResult> compilation_result =
273
      CompileImpl(isolate, regexp);
274 275
  if (!compilation_result.has_value()) return RegExp::kInternalRegExpException;

276
  DisallowGarbageCollection no_gc;
277
  return ExecRawImpl(isolate, RegExp::kFromRuntime,
278
                     *compilation_result->bytecode, *subject,
279
                     regexp->capture_count(), output_registers,
280 281 282 283 284
                     output_register_count, subject_index);
}

MaybeHandle<Object> ExperimentalRegExp::OneshotExec(
    Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject,
285 286
    int subject_index, Handle<RegExpMatchInfo> last_match_info,
    RegExp::ExecQuirks exec_quirks) {
287
  DCHECK(FLAG_enable_experimental_regexp_engine_on_excessive_backtracks);
288
  DCHECK_NE(regexp->type_tag(), JSRegExp::NOT_COMPILED);
289

290
  int capture_count = regexp->capture_count();
291 292 293 294 295 296 297 298 299 300 301
  int output_register_count = JSRegExp::RegistersForCaptureCount(capture_count);

  int32_t* output_registers;
  std::unique_ptr<int32_t[]> output_registers_release;
  if (output_register_count <= Isolate::kJSRegexpStaticOffsetsVectorSize) {
    output_registers = isolate->jsregexp_static_offsets_vector();
  } else {
    output_registers = NewArray<int32_t>(output_register_count);
    output_registers_release.reset(output_registers);
  }

302
  int num_matches = OneshotExecRaw(isolate, regexp, subject, output_registers,
303 304 305 306
                                   output_register_count, subject_index);

  if (num_matches > 0) {
    DCHECK_EQ(num_matches, 1);
307 308 309 310 311
    if (exec_quirks == RegExp::ExecQuirks::kTreatMatchAtEndAsFailure) {
      if (output_registers[0] >= subject->length()) {
        return isolate->factory()->null_value();
      }
    }
312 313 314 315 316 317 318 319 320 321 322
    return RegExp::SetLastMatchInfo(isolate, last_match_info, subject,
                                    capture_count, output_registers);
  } else if (num_matches == 0) {
    return isolate->factory()->null_value();
  } else {
    DCHECK_LT(num_matches, 0);
    DCHECK(isolate->has_pending_exception());
    return MaybeHandle<Object>();
  }
}

323 324
}  // namespace internal
}  // namespace v8