regexp-interpreter.cc 40 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4 5 6

// A simple interpreter for the Irregexp byte code.

7
#include "src/regexp/regexp-interpreter.h"
8

9
#include "src/ast/ast.h"
10
#include "src/base/small-vector.h"
11
#include "src/logging/counters.h"
Ana Peško's avatar
Ana Peško committed
12
#include "src/objects/js-regexp-inl.h"
13
#include "src/objects/objects-inl.h"
14
#include "src/regexp/regexp-bytecodes.h"
15
#include "src/regexp/regexp-macro-assembler.h"
16
#include "src/regexp/regexp-stack.h"  // For kMaximumStackSize.
17
#include "src/regexp/regexp.h"
18
#include "src/strings/unicode.h"
19
#include "src/utils/utils.h"
20

21
#ifdef V8_INTL_SUPPORT
22
#include "unicode/uchar.h"
23
#endif  // V8_INTL_SUPPORT
24

25 26 27 28 29 30 31
// Use token threaded dispatch iff the compiler supports computed gotos and the
// build argument v8_enable_regexp_interpreter_threaded_dispatch was set.
#if V8_HAS_COMPUTED_GOTO && \
    defined(V8_ENABLE_REGEXP_INTERPRETER_THREADED_DISPATCH)
#define V8_USE_COMPUTED_GOTO 1
#endif  // V8_HAS_COMPUTED_GOTO

32 33
namespace v8 {
namespace internal {
34

35 36 37
namespace {

bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
38
                          Vector<const uc16> subject, bool unicode) {
39 40 41 42 43
  Address offset_a =
      reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
  Address offset_b =
      reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
  size_t length = len * kUC16Size;
44 45 46 47 48 49 50

  bool result = unicode
                    ? RegExpMacroAssembler::CaseInsensitiveCompareUnicode(
                          offset_a, offset_b, length, isolate)
                    : RegExpMacroAssembler::CaseInsensitiveCompareNonUnicode(
                          offset_a, offset_b, length, isolate);
  return result == 1;
51 52
}

53
bool BackRefMatchesNoCase(Isolate* isolate, int from, int current, int len,
54
                          Vector<const uint8_t> subject, bool unicode) {
55
  // For Latin1 characters the unicode flag makes no difference.
56 57 58 59
  for (int i = 0; i < len; i++) {
    unsigned int old_char = subject[from++];
    unsigned int new_char = subject[current++];
    if (old_char == new_char) continue;
60 61 62
    // Convert both characters to lower case.
    old_char |= 0x20;
    new_char |= 0x20;
63
    if (old_char != new_char) return false;
64 65 66 67 68
    // Not letters in the ASCII range and Latin-1 range.
    if (!(old_char - 'a' <= 'z' - 'a') &&
        !(old_char - 224 <= 254 - 224 && old_char != 247)) {
      return false;
    }
69 70 71 72
  }
  return true;
}

73
#ifdef DEBUG
74 75 76 77
void MaybeTraceInterpreter(const byte* code_base, const byte* pc,
                           int stack_depth, int current_position,
                           uint32_t current_char, int bytecode_length,
                           const char* bytecode_name) {
78
  if (FLAG_trace_regexp_bytecodes) {
79
    const bool printable = std::isprint(current_char);
80
    const char* format =
81
        printable
82 83
            ? "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = "
            : "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = ";
84
    PrintF(format, pc - code_base, stack_depth, current_position, current_char,
85 86
           printable ? current_char : '.');

87
    RegExpBytecodeDisassembleSingle(code_base, pc);
88 89
  }
}
90
#endif  // DEBUG
91

92
int32_t Load32Aligned(const byte* pc) {
93
  DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 3);
94
  return *reinterpret_cast<const int32_t*>(pc);
95 96
}

97 98
// TODO(jgruber): Rename to Load16AlignedUnsigned.
uint32_t Load16Aligned(const byte* pc) {
99
  DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
100
  return *reinterpret_cast<const uint16_t*>(pc);
101 102
}

103 104 105 106 107
int32_t Load16AlignedSigned(const byte* pc) {
  DCHECK_EQ(0, reinterpret_cast<intptr_t>(pc) & 1);
  return *reinterpret_cast<const int16_t*>(pc);
}

108 109 110 111 112 113 114 115 116 117 118 119
// Helpers to access the packed argument. Takes the 32 bits containing the
// current bytecode, where the 8 LSB contain the bytecode and the rest contains
// a packed 24-bit argument.
// TODO(jgruber): Specify signed-ness in bytecode signature declarations, and
// police restrictions during bytecode generation.
int32_t LoadPacked24Signed(int32_t bytecode_and_packed_arg) {
  return bytecode_and_packed_arg >> BYTECODE_SHIFT;
}
uint32_t LoadPacked24Unsigned(int32_t bytecode_and_packed_arg) {
  return static_cast<uint32_t>(bytecode_and_packed_arg) >> BYTECODE_SHIFT;
}

120
// A simple abstraction over the backtracking stack used by the interpreter.
121 122 123 124
//
// Despite the name 'backtracking' stack, it's actually used as a generic stack
// that stores both program counters (= offsets into the bytecode) and generic
// integer values.
125 126
class BacktrackStack {
 public:
127
  BacktrackStack() = default;
128

129 130 131 132
  V8_WARN_UNUSED_RESULT bool push(int v) {
    data_.emplace_back(v);
    return (static_cast<int>(data_.size()) <= kMaxSize);
  }
133 134 135 136 137 138 139 140
  int peek() const {
    DCHECK(!data_.empty());
    return data_.back();
  }
  int pop() {
    int v = peek();
    data_.pop_back();
    return v;
141 142
  }

143 144 145 146
  // The 'sp' is the index of the first empty element in the stack.
  int sp() const { return static_cast<int>(data_.size()); }
  void set_sp(int new_sp) {
    DCHECK_LE(new_sp, sp());
147
    data_.resize_no_init(new_sp);
148
  }
149 150

 private:
151 152 153 154
  // Semi-arbitrary. Should be large enough for common cases to remain in the
  // static stack-allocated backing store, but small enough not to waste space.
  static constexpr int kStaticCapacity = 64;

155 156 157 158 159
  using ValueT = int;
  base::SmallVector<ValueT, kStaticCapacity> data_;

  static constexpr int kMaxSize =
      RegExpStack::kMaximumStackSize / sizeof(ValueT);
160 161 162 163

  DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
};

164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
// Registers used during interpreter execution. These consist of output
// registers in indices [0, output_register_count[ which will contain matcher
// results as a {start,end} index tuple for each capture (where the whole match
// counts as implicit capture 0); and internal registers in indices
// [output_register_count, total_register_count[.
class InterpreterRegisters {
 public:
  using RegisterT = int;

  InterpreterRegisters(int total_register_count, RegisterT* output_registers,
                       int output_register_count)
      : registers_(total_register_count),
        output_registers_(output_registers),
        output_register_count_(output_register_count) {
    // TODO(jgruber): Use int32_t consistently for registers. Currently, CSA
    // uses int32_t while runtime uses int.
    STATIC_ASSERT(sizeof(int) == sizeof(int32_t));
    DCHECK_GE(output_register_count, 2);  // At least 2 for the match itself.
    DCHECK_GE(total_register_count, output_register_count);
    DCHECK_LE(total_register_count, RegExpMacroAssembler::kMaxRegisterCount);
    DCHECK_NOT_NULL(output_registers);

    // Initialize the output register region to -1 signifying 'no match'.
    std::memset(registers_.data(), -1,
                output_register_count * sizeof(RegisterT));
  }

  const RegisterT& operator[](size_t index) const { return registers_[index]; }
  RegisterT& operator[](size_t index) { return registers_[index]; }

  void CopyToOutputRegisters() {
    MemCopy(output_registers_, registers_.data(),
            output_register_count_ * sizeof(RegisterT));
  }

 private:
  static constexpr int kStaticCapacity = 64;  // Arbitrary.
  base::SmallVector<RegisterT, kStaticCapacity> registers_;
  RegisterT* const output_registers_;
  const int output_register_count_;
};

206 207
IrregexpInterpreter::Result ThrowStackOverflow(Isolate* isolate,
                                               RegExp::CallOrigin call_origin) {
208
  CHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
209 210 211 212 213 214 215
  // We abort interpreter execution after the stack overflow is thrown, and thus
  // allow allocation here despite the outer DisallowHeapAllocationScope.
  AllowHeapAllocation yes_gc;
  isolate->StackOverflow();
  return IrregexpInterpreter::EXCEPTION;
}

216 217 218 219 220 221 222 223 224 225 226
// Only throws if called from the runtime, otherwise just returns the EXCEPTION
// status code.
IrregexpInterpreter::Result MaybeThrowStackOverflow(
    Isolate* isolate, RegExp::CallOrigin call_origin) {
  if (call_origin == RegExp::CallOrigin::kFromRuntime) {
    return ThrowStackOverflow(isolate, call_origin);
  } else {
    return IrregexpInterpreter::EXCEPTION;
  }
}

227 228 229 230 231 232
template <typename Char>
void UpdateCodeAndSubjectReferences(
    Isolate* isolate, Handle<ByteArray> code_array,
    Handle<String> subject_string, ByteArray* code_array_out,
    const byte** code_base_out, const byte** pc_out, String* subject_string_out,
    Vector<const Char>* subject_string_vector_out) {
233 234
  DisallowHeapAllocation no_gc;

235 236 237 238 239 240
  if (*code_base_out != code_array->GetDataStartAddress()) {
    *code_array_out = *code_array;
    const intptr_t pc_offset = *pc_out - *code_base_out;
    DCHECK_GT(pc_offset, 0);
    *code_base_out = code_array->GetDataStartAddress();
    *pc_out = *code_base_out + pc_offset;
241 242
  }

243 244 245 246
  DCHECK(subject_string->IsFlat());
  *subject_string_out = *subject_string;
  *subject_string_vector_out = subject_string->GetCharVector<Char>(no_gc);
}
247

248 249 250 251 252 253 254 255
// Runs all pending interrupts and updates unhandlified object references if
// necessary.
template <typename Char>
IrregexpInterpreter::Result HandleInterrupts(
    Isolate* isolate, RegExp::CallOrigin call_origin, ByteArray* code_array_out,
    String* subject_string_out, const byte** code_base_out,
    Vector<const Char>* subject_string_vector_out, const byte** pc_out) {
  DisallowHeapAllocation no_gc;
256

257 258 259 260 261 262 263 264 265 266
  StackLimitCheck check(isolate);
  bool js_has_overflowed = check.JsHasOverflowed();

  if (call_origin == RegExp::CallOrigin::kFromJs) {
    // Direct calls from JavaScript can be interrupted in two ways:
    // 1. A real stack overflow, in which case we let the caller throw the
    //    exception.
    // 2. The stack guard was used to interrupt execution for another purpose,
    //    forcing the call through the runtime system.
    if (js_has_overflowed) {
267
      return IrregexpInterpreter::EXCEPTION;
268
    } else if (check.InterruptRequested()) {
269
      return IrregexpInterpreter::RETRY;
270
    }
271 272 273 274 275 276 277 278
  } else {
    DCHECK(call_origin == RegExp::CallOrigin::kFromRuntime);
    // Prepare for possible GC.
    HandleScope handles(isolate);
    Handle<ByteArray> code_handle(*code_array_out, isolate);
    Handle<String> subject_handle(*subject_string_out, isolate);

    if (js_has_overflowed) {
279
      return ThrowStackOverflow(isolate, call_origin);
280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
    } else if (check.InterruptRequested()) {
      const bool was_one_byte =
          String::IsOneByteRepresentationUnderneath(*subject_string_out);
      Object result;
      {
        AllowHeapAllocation yes_gc;
        result = isolate->stack_guard()->HandleInterrupts();
      }
      if (result.IsException(isolate)) {
        return IrregexpInterpreter::EXCEPTION;
      }

      // If we changed between a LATIN1 and a UC16 string, we need to restart
      // regexp matching with the appropriate template instantiation of
      // RawMatch.
      if (String::IsOneByteRepresentationUnderneath(*subject_handle) !=
          was_one_byte) {
        return IrregexpInterpreter::RETRY;
      }

      UpdateCodeAndSubjectReferences(
          isolate, code_handle, subject_handle, code_array_out, code_base_out,
          pc_out, subject_string_out, subject_string_vector_out);
    }
304 305 306 307 308
  }

  return IrregexpInterpreter::SUCCESS;
}

309 310 311 312 313 314 315
bool CheckBitInTable(const uint32_t current_char, const byte* const table) {
  int mask = RegExpMacroAssembler::kTableMask;
  int b = table[(current_char & mask) >> kBitsPerByteLog2];
  int bit = (current_char & (kBitsPerByte - 1));
  return (b & (1 << bit)) != 0;
}

316 317 318 319 320 321
// Returns true iff 0 <= index < length.
bool IndexIsInBounds(int index, int length) {
  DCHECK_GE(length, 0);
  return static_cast<uintptr_t>(index) < static_cast<uintptr_t>(length);
}

322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345
// If computed gotos are supported by the compiler, we can get addresses to
// labels directly in C/C++. Every bytecode handler has its own label and we
// store the addresses in a dispatch table indexed by bytecode. To execute the
// next handler we simply jump (goto) directly to its address.
#if V8_USE_COMPUTED_GOTO
#define BC_LABEL(name) BC_##name:
#define DECODE()                                                   \
  do {                                                             \
    next_insn = Load32Aligned(next_pc);                            \
    next_handler_addr = dispatch_table[next_insn & BYTECODE_MASK]; \
  } while (false)
#define DISPATCH()  \
  pc = next_pc;     \
  insn = next_insn; \
  goto* next_handler_addr
// Without computed goto support, we fall back to a simple switch-based
// dispatch (A large switch statement inside a loop with a case for every
// bytecode).
#else  // V8_USE_COMPUTED_GOTO
#define BC_LABEL(name) case BC_##name:
#define DECODE() next_insn = Load32Aligned(next_pc)
#define DISPATCH()  \
  pc = next_pc;     \
  insn = next_insn; \
346
  goto switch_dispatch_continuation
347 348 349 350 351 352 353 354 355
#endif  // V8_USE_COMPUTED_GOTO

// ADVANCE/SET_PC_FROM_OFFSET are separated from DISPATCH, because ideally some
// instructions can be executed between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH.
// We want those two macros as far apart as possible, because the goto in
// DISPATCH is dependent on a memory load in ADVANCE/SET_PC_FROM_OFFSET. If we
// don't hit the cache and have to fetch the next handler address from physical
// memory, instructions between ADVANCE/SET_PC_FROM_OFFSET and DISPATCH can
// potentially be executed unconditionally, reducing memory stall.
356 357
#define ADVANCE(name)                             \
  next_pc = pc + RegExpBytecodeLength(BC_##name); \
358 359 360 361 362
  DECODE()
#define SET_PC_FROM_OFFSET(offset) \
  next_pc = code_base + offset;    \
  DECODE()

363
// Current position mutations.
364 365 366 367
#define SET_CURRENT_POSITION(value)                        \
  do {                                                     \
    current = (value);                                     \
    DCHECK(base::IsInRange(current, 0, subject.length())); \
368 369 370
  } while (false)
#define ADVANCE_CURRENT_POSITION(by) SET_CURRENT_POSITION(current + (by))

371
#ifdef DEBUG
372 373 374 375
#define BYTECODE(name)                                                \
  BC_LABEL(name)                                                      \
  MaybeTraceInterpreter(code_base, pc, backtrack_stack.sp(), current, \
                        current_char, RegExpBytecodeLength(BC_##name), #name);
376 377 378 379
#else
#define BYTECODE(name) BC_LABEL(name)
#endif  // DEBUG

380
template <typename Char>
381 382 383 384 385 386
IrregexpInterpreter::Result RawMatch(
    Isolate* isolate, ByteArray code_array, String subject_string,
    Vector<const Char> subject, int* output_registers,
    int output_register_count, int total_register_count, int current,
    uint32_t current_char, RegExp::CallOrigin call_origin,
    const uint32_t backtrack_limit) {
387 388
  DisallowHeapAllocation no_gc;

389
#if V8_USE_COMPUTED_GOTO
390 391 392 393 394 395 396 397 398 399 400 401 402 403 404

// We have to make sure that no OOB access to the dispatch table is possible and
// all values are valid label addresses.
// Otherwise jumps to arbitrary addresses could potentially happen.
// This is ensured as follows:
// Every index to the dispatch table gets masked using BYTECODE_MASK in
// DECODE(). This way we can only get values between 0 (only the least
// significant byte of an integer is used) and kRegExpPaddedBytecodeCount - 1
// (BYTECODE_MASK is defined to be exactly this value).
// All entries from kRegExpBytecodeCount to kRegExpPaddedBytecodeCount have to
// be filled with BREAKs (invalid operation).

// Fill dispatch table from last defined bytecode up to the next power of two
// with BREAK (invalid operation).
// TODO(pthier): Find a way to fill up automatically (at compile time)
405
// 59 real bytecodes -> 5 fillers
406 407 408 409 410
#define BYTECODE_FILLER_ITERATOR(V) \
  V(BREAK) /* 1 */                  \
  V(BREAK) /* 2 */                  \
  V(BREAK) /* 3 */                  \
  V(BREAK) /* 4 */                  \
411
  V(BREAK) /* 5 */
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431

#define COUNT(...) +1
  static constexpr int kRegExpBytecodeFillerCount =
      BYTECODE_FILLER_ITERATOR(COUNT);
#undef COUNT

  // Make sure kRegExpPaddedBytecodeCount is actually the closest possible power
  // of two.
  DCHECK_EQ(kRegExpPaddedBytecodeCount,
            base::bits::RoundUpToPowerOfTwo32(kRegExpBytecodeCount));

  // Make sure every bytecode we get by using BYTECODE_MASK is well defined.
  STATIC_ASSERT(kRegExpBytecodeCount <= kRegExpPaddedBytecodeCount);
  STATIC_ASSERT(kRegExpBytecodeCount + kRegExpBytecodeFillerCount ==
                kRegExpPaddedBytecodeCount);

#define DECLARE_DISPATCH_TABLE_ENTRY(name, ...) &&BC_##name,
  static const void* const dispatch_table[kRegExpPaddedBytecodeCount] = {
      BYTECODE_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)
          BYTECODE_FILLER_ITERATOR(DECLARE_DISPATCH_TABLE_ENTRY)};
432
#undef DECLARE_DISPATCH_TABLE_ENTRY
433 434 435
#undef BYTECODE_FILLER_ITERATOR

#endif  // V8_USE_COMPUTED_GOTO
436

437
  const byte* pc = code_array.GetDataStartAddress();
438 439
  const byte* code_base = pc;

440 441
  InterpreterRegisters registers(total_register_count, output_registers,
                                 output_register_count);
442
  BacktrackStack backtrack_stack;
443

444 445
  uint32_t backtrack_count = 0;

446 447 448 449 450
#ifdef DEBUG
  if (FLAG_trace_regexp_bytecodes) {
    PrintF("\n\nStart bytecode interpreter\n\n");
  }
#endif
451

452
  while (true) {
453 454 455 456 457 458 459 460 461
    const byte* next_pc = pc;
    int32_t insn;
    int32_t next_insn;
#if V8_USE_COMPUTED_GOTO
    const void* next_handler_addr;
    DECODE();
    DISPATCH();
#else
    insn = Load32Aligned(pc);
462
    switch (insn & BYTECODE_MASK) {
463 464 465 466
#endif  // V8_USE_COMPUTED_GOTO
    BYTECODE(BREAK) { UNREACHABLE(); }
    BYTECODE(PUSH_CP) {
      ADVANCE(PUSH_CP);
467 468 469
      if (!backtrack_stack.push(current)) {
        return MaybeThrowStackOverflow(isolate, call_origin);
      }
470 471 472 473
      DISPATCH();
    }
    BYTECODE(PUSH_BT) {
      ADVANCE(PUSH_BT);
474 475 476
      if (!backtrack_stack.push(Load32Aligned(pc + 4))) {
        return MaybeThrowStackOverflow(isolate, call_origin);
      }
477 478 479 480
      DISPATCH();
    }
    BYTECODE(PUSH_REGISTER) {
      ADVANCE(PUSH_REGISTER);
481
      if (!backtrack_stack.push(registers[LoadPacked24Unsigned(insn)])) {
482 483
        return MaybeThrowStackOverflow(isolate, call_origin);
      }
484 485 486 487
      DISPATCH();
    }
    BYTECODE(SET_REGISTER) {
      ADVANCE(SET_REGISTER);
488
      registers[LoadPacked24Unsigned(insn)] = Load32Aligned(pc + 4);
489 490 491 492
      DISPATCH();
    }
    BYTECODE(ADVANCE_REGISTER) {
      ADVANCE(ADVANCE_REGISTER);
493
      registers[LoadPacked24Unsigned(insn)] += Load32Aligned(pc + 4);
494 495 496 497
      DISPATCH();
    }
    BYTECODE(SET_REGISTER_TO_CP) {
      ADVANCE(SET_REGISTER_TO_CP);
498
      registers[LoadPacked24Unsigned(insn)] = current + Load32Aligned(pc + 4);
499 500 501 502
      DISPATCH();
    }
    BYTECODE(SET_CP_TO_REGISTER) {
      ADVANCE(SET_CP_TO_REGISTER);
503
      SET_CURRENT_POSITION(registers[LoadPacked24Unsigned(insn)]);
504 505 506 507
      DISPATCH();
    }
    BYTECODE(SET_REGISTER_TO_SP) {
      ADVANCE(SET_REGISTER_TO_SP);
508
      registers[LoadPacked24Unsigned(insn)] = backtrack_stack.sp();
509 510 511 512
      DISPATCH();
    }
    BYTECODE(SET_SP_TO_REGISTER) {
      ADVANCE(SET_SP_TO_REGISTER);
513
      backtrack_stack.set_sp(registers[LoadPacked24Unsigned(insn)]);
514 515 516 517
      DISPATCH();
    }
    BYTECODE(POP_CP) {
      ADVANCE(POP_CP);
518
      SET_CURRENT_POSITION(backtrack_stack.pop());
519 520 521
      DISPATCH();
    }
    BYTECODE(POP_BT) {
522 523 524 525 526 527
      STATIC_ASSERT(JSRegExp::kNoBacktrackLimit == 0);
      if (++backtrack_count == backtrack_limit) {
        // Exceeded limits are treated as a failed match.
        return IrregexpInterpreter::FAILURE;
      }

528 529 530 531 532
      IrregexpInterpreter::Result return_code =
          HandleInterrupts(isolate, call_origin, &code_array, &subject_string,
                           &code_base, &subject, &pc);
      if (return_code != IrregexpInterpreter::SUCCESS) return return_code;

533
      SET_PC_FROM_OFFSET(backtrack_stack.pop());
534 535 536 537
      DISPATCH();
    }
    BYTECODE(POP_REGISTER) {
      ADVANCE(POP_REGISTER);
538
      registers[LoadPacked24Unsigned(insn)] = backtrack_stack.pop();
539 540
      DISPATCH();
    }
541 542 543 544 545 546 547 548
    BYTECODE(FAIL) {
      isolate->counters()->regexp_backtracks()->AddSample(
          static_cast<int>(backtrack_count));
      return IrregexpInterpreter::FAILURE;
    }
    BYTECODE(SUCCEED) {
      isolate->counters()->regexp_backtracks()->AddSample(
          static_cast<int>(backtrack_count));
549
      registers.CopyToOutputRegisters();
550 551
      return IrregexpInterpreter::SUCCESS;
    }
552 553
    BYTECODE(ADVANCE_CP) {
      ADVANCE(ADVANCE_CP);
554
      ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
555 556 557 558 559 560 561 562
      DISPATCH();
    }
    BYTECODE(GOTO) {
      SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      DISPATCH();
    }
    BYTECODE(ADVANCE_CP_AND_GOTO) {
      SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
563
      ADVANCE_CURRENT_POSITION(LoadPacked24Signed(insn));
564 565 566 567 568 569 570 571
      DISPATCH();
    }
    BYTECODE(CHECK_GREEDY) {
      if (current == backtrack_stack.peek()) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
        backtrack_stack.pop();
      } else {
        ADVANCE(CHECK_GREEDY);
572
      }
573 574 575
      DISPATCH();
    }
    BYTECODE(LOAD_CURRENT_CHAR) {
576
      int pos = current + LoadPacked24Signed(insn);
577 578 579 580
      if (pos >= subject.length() || pos < 0) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(LOAD_CURRENT_CHAR);
erik.corry@gmail.com's avatar
erik.corry@gmail.com committed
581
        current_char = subject[pos];
582
      }
583 584 585 586
      DISPATCH();
    }
    BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
      ADVANCE(LOAD_CURRENT_CHAR_UNCHECKED);
587
      int pos = current + LoadPacked24Signed(insn);
588 589 590 591
      current_char = subject[pos];
      DISPATCH();
    }
    BYTECODE(LOAD_2_CURRENT_CHARS) {
592
      int pos = current + LoadPacked24Signed(insn);
593 594 595 596
      if (pos + 2 > subject.length() || pos < 0) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(LOAD_2_CURRENT_CHARS);
597 598 599
        Char next = subject[pos + 1];
        current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
      }
600 601 602 603
      DISPATCH();
    }
    BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
      ADVANCE(LOAD_2_CURRENT_CHARS_UNCHECKED);
604
      int pos = current + LoadPacked24Signed(insn);
605 606 607 608 609 610
      Char next = subject[pos + 1];
      current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
      DISPATCH();
    }
    BYTECODE(LOAD_4_CURRENT_CHARS) {
      DCHECK_EQ(1, sizeof(Char));
611
      int pos = current + LoadPacked24Signed(insn);
612 613 614 615
      if (pos + 4 > subject.length() || pos < 0) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(LOAD_4_CURRENT_CHARS);
616 617 618
        Char next1 = subject[pos + 1];
        Char next2 = subject[pos + 2];
        Char next3 = subject[pos + 3];
619 620
        current_char =
            (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
621
      }
622 623 624 625 626
      DISPATCH();
    }
    BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
      ADVANCE(LOAD_4_CURRENT_CHARS_UNCHECKED);
      DCHECK_EQ(1, sizeof(Char));
627
      int pos = current + LoadPacked24Signed(insn);
628 629 630 631 632 633 634 635 636 637 638 639 640
      Char next1 = subject[pos + 1];
      Char next2 = subject[pos + 2];
      Char next3 = subject[pos + 3];
      current_char =
          (subject[pos] | (next1 << 8) | (next2 << 16) | (next3 << 24));
      DISPATCH();
    }
    BYTECODE(CHECK_4_CHARS) {
      uint32_t c = Load32Aligned(pc + 4);
      if (c == current_char) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(CHECK_4_CHARS);
641
      }
642 643 644
      DISPATCH();
    }
    BYTECODE(CHECK_CHAR) {
645
      uint32_t c = LoadPacked24Unsigned(insn);
646 647 648 649
      if (c == current_char) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(CHECK_CHAR);
650
      }
651 652 653 654 655 656 657 658 659 660 661 662
      DISPATCH();
    }
    BYTECODE(CHECK_NOT_4_CHARS) {
      uint32_t c = Load32Aligned(pc + 4);
      if (c != current_char) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(CHECK_NOT_4_CHARS);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_NOT_CHAR) {
663
      uint32_t c = LoadPacked24Unsigned(insn);
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680
      if (c != current_char) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(CHECK_NOT_CHAR);
      }
      DISPATCH();
    }
    BYTECODE(AND_CHECK_4_CHARS) {
      uint32_t c = Load32Aligned(pc + 4);
      if (c == (current_char & Load32Aligned(pc + 8))) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
      } else {
        ADVANCE(AND_CHECK_4_CHARS);
      }
      DISPATCH();
    }
    BYTECODE(AND_CHECK_CHAR) {
681
      uint32_t c = LoadPacked24Unsigned(insn);
682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
      if (c == (current_char & Load32Aligned(pc + 4))) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(AND_CHECK_CHAR);
      }
      DISPATCH();
    }
    BYTECODE(AND_CHECK_NOT_4_CHARS) {
      uint32_t c = Load32Aligned(pc + 4);
      if (c != (current_char & Load32Aligned(pc + 8))) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
      } else {
        ADVANCE(AND_CHECK_NOT_4_CHARS);
      }
      DISPATCH();
    }
    BYTECODE(AND_CHECK_NOT_CHAR) {
699
      uint32_t c = LoadPacked24Unsigned(insn);
700 701 702 703 704 705 706 707
      if (c != (current_char & Load32Aligned(pc + 4))) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(AND_CHECK_NOT_CHAR);
      }
      DISPATCH();
    }
    BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
708
      uint32_t c = LoadPacked24Unsigned(insn);
709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738
      uint32_t minus = Load16Aligned(pc + 4);
      uint32_t mask = Load16Aligned(pc + 6);
      if (c != ((current_char - minus) & mask)) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(MINUS_AND_CHECK_NOT_CHAR);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_CHAR_IN_RANGE) {
      uint32_t from = Load16Aligned(pc + 4);
      uint32_t to = Load16Aligned(pc + 6);
      if (from <= current_char && current_char <= to) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(CHECK_CHAR_IN_RANGE);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
      uint32_t from = Load16Aligned(pc + 4);
      uint32_t to = Load16Aligned(pc + 6);
      if (from > current_char || current_char > to) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(CHECK_CHAR_NOT_IN_RANGE);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_BIT_IN_TABLE) {
739
      if (CheckBitInTable(current_char, pc + 8)) {
740 741 742 743 744 745 746
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(CHECK_BIT_IN_TABLE);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_LT) {
747
      uint32_t limit = LoadPacked24Unsigned(insn);
748 749 750 751 752 753 754 755
      if (current_char < limit) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(CHECK_LT);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_GT) {
756
      uint32_t limit = LoadPacked24Unsigned(insn);
757 758 759 760 761 762 763 764
      if (current_char > limit) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(CHECK_GT);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_REGISTER_LT) {
765
      if (registers[LoadPacked24Unsigned(insn)] < Load32Aligned(pc + 4)) {
766 767 768 769 770 771 772
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(CHECK_REGISTER_LT);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_REGISTER_GE) {
773
      if (registers[LoadPacked24Unsigned(insn)] >= Load32Aligned(pc + 4)) {
774 775 776 777 778 779 780
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      } else {
        ADVANCE(CHECK_REGISTER_GE);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_REGISTER_EQ_POS) {
781
      if (registers[LoadPacked24Unsigned(insn)] == current) {
782 783 784 785 786 787 788
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(CHECK_REGISTER_EQ_POS);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_NOT_REGS_EQUAL) {
789
      if (registers[LoadPacked24Unsigned(insn)] ==
790 791 792 793 794 795 796 797
          registers[Load32Aligned(pc + 4)]) {
        ADVANCE(CHECK_NOT_REGS_EQUAL);
      } else {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
      }
      DISPATCH();
    }
    BYTECODE(CHECK_NOT_BACK_REF) {
798 799
      int from = registers[LoadPacked24Unsigned(insn)];
      int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
800 801 802 803 804
      if (from >= 0 && len > 0) {
        if (current + len > subject.length() ||
            CompareChars(&subject[from], &subject[current], len) != 0) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
          DISPATCH();
805
        }
806
        ADVANCE_CURRENT_POSITION(len);
807 808 809 810 811
      }
      ADVANCE(CHECK_NOT_BACK_REF);
      DISPATCH();
    }
    BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
812 813
      int from = registers[LoadPacked24Unsigned(insn)];
      int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
814 815 816 817 818
      if (from >= 0 && len > 0) {
        if (current - len < 0 ||
            CompareChars(&subject[from], &subject[current - len], len) != 0) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
          DISPATCH();
819
        }
820
        SET_CURRENT_POSITION(current - len);
821 822 823 824 825
      }
      ADVANCE(CHECK_NOT_BACK_REF_BACKWARD);
      DISPATCH();
    }
    BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE) {
826 827 828 829 830 831 832 833 834 835 836 837
      int from = registers[LoadPacked24Unsigned(insn)];
      int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
      if (from >= 0 && len > 0) {
        if (current + len > subject.length() ||
            !BackRefMatchesNoCase(isolate, from, current, len, subject, true)) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
          DISPATCH();
        }
        ADVANCE_CURRENT_POSITION(len);
      }
      ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE);
      DISPATCH();
838 839
    }
    BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
840 841
      int from = registers[LoadPacked24Unsigned(insn)];
      int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
842 843
      if (from >= 0 && len > 0) {
        if (current + len > subject.length() ||
844 845
            !BackRefMatchesNoCase(isolate, from, current, len, subject,
                                  false)) {
846 847
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
          DISPATCH();
848
        }
849
        ADVANCE_CURRENT_POSITION(len);
850
      }
851 852 853 854
      ADVANCE(CHECK_NOT_BACK_REF_NO_CASE);
      DISPATCH();
    }
    BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD) {
855 856 857 858 859 860 861 862 863 864 865 866 867
      int from = registers[LoadPacked24Unsigned(insn)];
      int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
      if (from >= 0 && len > 0) {
        if (current - len < 0 ||
            !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
                                  true)) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
          DISPATCH();
        }
        SET_CURRENT_POSITION(current - len);
      }
      ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD);
      DISPATCH();
868 869
    }
    BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
870 871
      int from = registers[LoadPacked24Unsigned(insn)];
      int len = registers[LoadPacked24Unsigned(insn) + 1] - from;
872 873
      if (from >= 0 && len > 0) {
        if (current - len < 0 ||
874 875
            !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
                                  false)) {
876 877
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
          DISPATCH();
878
        }
879
        SET_CURRENT_POSITION(current - len);
880 881 882 883 884
      }
      ADVANCE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD);
      DISPATCH();
    }
    BYTECODE(CHECK_AT_START) {
885
      if (current + LoadPacked24Signed(insn) == 0) {
886 887 888 889 890 891 892
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(CHECK_AT_START);
      }
      DISPATCH();
    }
    BYTECODE(CHECK_NOT_AT_START) {
893
      if (current + LoadPacked24Signed(insn) == 0) {
894 895 896 897 898 899 900 901
        ADVANCE(CHECK_NOT_AT_START);
      } else {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      }
      DISPATCH();
    }
    BYTECODE(SET_CURRENT_POSITION_FROM_END) {
      ADVANCE(SET_CURRENT_POSITION_FROM_END);
902
      int by = LoadPacked24Unsigned(insn);
903
      if (subject.length() - current > by) {
904
        SET_CURRENT_POSITION(subject.length() - by);
905 906 907 908 909
        current_char = subject[current - 1];
      }
      DISPATCH();
    }
    BYTECODE(CHECK_CURRENT_POSITION) {
910
      int pos = current + LoadPacked24Signed(insn);
911 912 913 914
      if (pos > subject.length() || pos < 0) {
        SET_PC_FROM_OFFSET(Load32Aligned(pc + 4));
      } else {
        ADVANCE(CHECK_CURRENT_POSITION);
915
      }
916 917
      DISPATCH();
    }
918
    BYTECODE(SKIP_UNTIL_CHAR) {
919
      int32_t load_offset = LoadPacked24Signed(insn);
920
      int32_t advance = Load16AlignedSigned(pc + 4);
921
      uint32_t c = Load16Aligned(pc + 6);
922
      while (IndexIsInBounds(current + load_offset, subject.length())) {
923 924 925 926 927
        current_char = subject[current + load_offset];
        if (c == current_char) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 8));
          DISPATCH();
        }
928
        ADVANCE_CURRENT_POSITION(advance);
929 930 931 932 933
      }
      SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
      DISPATCH();
    }
    BYTECODE(SKIP_UNTIL_CHAR_AND) {
934
      int32_t load_offset = LoadPacked24Signed(insn);
935
      int32_t advance = Load16AlignedSigned(pc + 4);
936 937 938 939 940 941 942 943 944 945
      uint16_t c = Load16Aligned(pc + 6);
      uint32_t mask = Load32Aligned(pc + 8);
      int32_t maximum_offset = Load32Aligned(pc + 12);
      while (static_cast<uintptr_t>(current + maximum_offset) <=
             static_cast<uintptr_t>(subject.length())) {
        current_char = subject[current + load_offset];
        if (c == (current_char & mask)) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
          DISPATCH();
        }
946
        ADVANCE_CURRENT_POSITION(advance);
947 948 949 950 951
      }
      SET_PC_FROM_OFFSET(Load32Aligned(pc + 20));
      DISPATCH();
    }
    BYTECODE(SKIP_UNTIL_CHAR_POS_CHECKED) {
952
      int32_t load_offset = LoadPacked24Signed(insn);
953
      int32_t advance = Load16AlignedSigned(pc + 4);
954 955 956 957 958 959 960 961 962
      uint16_t c = Load16Aligned(pc + 6);
      int32_t maximum_offset = Load32Aligned(pc + 8);
      while (static_cast<uintptr_t>(current + maximum_offset) <=
             static_cast<uintptr_t>(subject.length())) {
        current_char = subject[current + load_offset];
        if (c == current_char) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
          DISPATCH();
        }
963
        ADVANCE_CURRENT_POSITION(advance);
964 965 966 967 968
      }
      SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
      DISPATCH();
    }
    BYTECODE(SKIP_UNTIL_BIT_IN_TABLE) {
969
      int32_t load_offset = LoadPacked24Signed(insn);
970
      int32_t advance = Load16AlignedSigned(pc + 4);
971
      const byte* table = pc + 8;
972
      while (IndexIsInBounds(current + load_offset, subject.length())) {
973 974 975 976 977
        current_char = subject[current + load_offset];
        if (CheckBitInTable(current_char, table)) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
          DISPATCH();
        }
978
        ADVANCE_CURRENT_POSITION(advance);
979 980 981 982 983
      }
      SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
      DISPATCH();
    }
    BYTECODE(SKIP_UNTIL_GT_OR_NOT_BIT_IN_TABLE) {
984
      int32_t load_offset = LoadPacked24Signed(insn);
985
      int32_t advance = Load16AlignedSigned(pc + 4);
986 987
      uint16_t limit = Load16Aligned(pc + 6);
      const byte* table = pc + 8;
988
      while (IndexIsInBounds(current + load_offset, subject.length())) {
989 990 991 992 993 994 995 996 997
        current_char = subject[current + load_offset];
        if (current_char > limit) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
          DISPATCH();
        }
        if (!CheckBitInTable(current_char, table)) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 24));
          DISPATCH();
        }
998
        ADVANCE_CURRENT_POSITION(advance);
999 1000 1001 1002 1003
      }
      SET_PC_FROM_OFFSET(Load32Aligned(pc + 28));
      DISPATCH();
    }
    BYTECODE(SKIP_UNTIL_CHAR_OR_CHAR) {
1004
      int32_t load_offset = LoadPacked24Signed(insn);
1005
      int32_t advance = Load32Aligned(pc + 4);
1006 1007
      uint16_t c = Load16Aligned(pc + 8);
      uint16_t c2 = Load16Aligned(pc + 10);
1008
      while (IndexIsInBounds(current + load_offset, subject.length())) {
1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
        current_char = subject[current + load_offset];
        // The two if-statements below are split up intentionally, as combining
        // them seems to result in register allocation behaving quite
        // differently and slowing down the resulting code.
        if (c == current_char) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
          DISPATCH();
        }
        if (c2 == current_char) {
          SET_PC_FROM_OFFSET(Load32Aligned(pc + 12));
          DISPATCH();
        }
1021
        ADVANCE_CURRENT_POSITION(advance);
1022 1023 1024 1025
      }
      SET_PC_FROM_OFFSET(Load32Aligned(pc + 16));
      DISPATCH();
    }
1026 1027 1028 1029
#if V8_USE_COMPUTED_GOTO
// Lint gets confused a lot if we just use !V8_USE_COMPUTED_GOTO or ifndef
// V8_USE_COMPUTED_GOTO here.
#else
1030 1031 1032
      default:
        UNREACHABLE();
    }
1033 1034 1035
  // Label we jump to in DISPATCH(). There must be no instructions between the
  // end of the switch, this label and the end of the loop.
  switch_dispatch_continuation : {}
1036
#endif  // V8_USE_COMPUTED_GOTO
1037 1038 1039
  }
}

1040
#undef BYTECODE
1041 1042
#undef ADVANCE_CURRENT_POSITION
#undef SET_CURRENT_POSITION
1043 1044 1045 1046 1047 1048
#undef DISPATCH
#undef DECODE
#undef SET_PC_FROM_OFFSET
#undef ADVANCE
#undef BC_LABEL
#undef V8_USE_COMPUTED_GOTO
1049

1050 1051 1052
}  // namespace

// static
1053
IrregexpInterpreter::Result IrregexpInterpreter::Match(
1054 1055 1056 1057
    Isolate* isolate, JSRegExp regexp, String subject_string,
    int* output_registers, int output_register_count, int start_position,
    RegExp::CallOrigin call_origin) {
  if (FLAG_regexp_tier_up) regexp.TierUpTick();
Ana Peško's avatar
Ana Peško committed
1058 1059

  bool is_one_byte = String::IsOneByteRepresentationUnderneath(subject_string);
1060
  ByteArray code_array = ByteArray::cast(regexp.Bytecode(is_one_byte));
1061
  int total_register_count = regexp.MaxRegisterCount();
Ana Peško's avatar
Ana Peško committed
1062

1063 1064 1065
  return MatchInternal(isolate, code_array, subject_string, output_registers,
                       output_register_count, total_register_count,
                       start_position, call_origin, regexp.BacktrackLimit());
Ana Peško's avatar
Ana Peško committed
1066 1067 1068
}

IrregexpInterpreter::Result IrregexpInterpreter::MatchInternal(
1069
    Isolate* isolate, ByteArray code_array, String subject_string,
1070 1071 1072
    int* output_registers, int output_register_count, int total_register_count,
    int start_position, RegExp::CallOrigin call_origin,
    uint32_t backtrack_limit) {
1073
  DCHECK(subject_string.IsFlat());
1074

1075 1076
  // Note: Heap allocation *is* allowed in two situations if calling from
  // Runtime:
1077 1078 1079 1080
  // 1. When creating & throwing a stack overflow exception. The interpreter
  //    aborts afterwards, and thus possible-moved objects are never used.
  // 2. When handling interrupts. We manually relocate unhandlified references
  //    after interrupts have run.
1081
  DisallowHeapAllocation no_gc;
1082

1083
  uc16 previous_char = '\n';
1084
  String::FlatContent subject_content = subject_string.GetFlatContent(no_gc);
1085
  if (subject_content.IsOneByte()) {
1086
    Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
1087
    if (start_position != 0) previous_char = subject_vector[start_position - 1];
1088
    return RawMatch(isolate, code_array, subject_string, subject_vector,
1089 1090 1091
                    output_registers, output_register_count,
                    total_register_count, start_position, previous_char,
                    call_origin, backtrack_limit);
1092
  } else {
1093
    DCHECK(subject_content.IsTwoByte());
1094
    Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
1095
    if (start_position != 0) previous_char = subject_vector[start_position - 1];
1096
    return RawMatch(isolate, code_array, subject_string, subject_vector,
1097 1098 1099
                    output_registers, output_register_count,
                    total_register_count, start_position, previous_char,
                    call_origin, backtrack_limit);
1100
  }
1101 1102
}

1103 1104
#ifndef COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER

1105 1106 1107
// This method is called through an external reference from RegExpExecInternal
// builtin.
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromJs(
1108 1109 1110
    Address subject, int32_t start_position, Address, Address,
    int* output_registers, int32_t output_register_count, Address,
    RegExp::CallOrigin call_origin, Isolate* isolate, Address regexp) {
1111
  DCHECK_NOT_NULL(isolate);
1112
  DCHECK_NOT_NULL(output_registers);
1113
  DCHECK(call_origin == RegExp::CallOrigin::kFromJs);
1114 1115 1116 1117 1118

  DisallowHeapAllocation no_gc;
  DisallowJavascriptExecution no_js(isolate);

  String subject_string = String::cast(Object(subject));
Ana Peško's avatar
Ana Peško committed
1119
  JSRegExp regexp_obj = JSRegExp::cast(Object(regexp));
1120

1121 1122 1123 1124 1125 1126
  if (regexp_obj.MarkedForTierUp()) {
    // Returning RETRY will re-enter through runtime, where actual recompilation
    // for tier-up takes place.
    return IrregexpInterpreter::RETRY;
  }

1127 1128
  return Match(isolate, regexp_obj, subject_string, output_registers,
               output_register_count, start_position, call_origin);
1129 1130
}

1131 1132
#endif  // !COMPILING_IRREGEXP_FOR_EXTERNAL_EMBEDDER

1133
IrregexpInterpreter::Result IrregexpInterpreter::MatchForCallFromRuntime(
Ana Peško's avatar
Ana Peško committed
1134
    Isolate* isolate, Handle<JSRegExp> regexp, Handle<String> subject_string,
1135 1136 1137 1138
    int* output_registers, int output_register_count, int start_position) {
  return Match(isolate, *regexp, *subject_string, output_registers,
               output_register_count, start_position,
               RegExp::CallOrigin::kFromRuntime);
1139 1140
}

1141 1142
}  // namespace internal
}  // namespace v8