interpreter-irregexp.cc 20.6 KB
Newer Older
1
// Copyright 2011 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4 5 6

// A simple interpreter for the Irregexp byte code.

7 8
#ifdef V8_INTERPRETED_REGEXP

9
#include "src/regexp/interpreter-irregexp.h"
10

11
#include "src/ast/ast.h"
12
#include "src/objects-inl.h"
13 14 15
#include "src/regexp/bytecodes-irregexp.h"
#include "src/regexp/jsregexp.h"
#include "src/regexp/regexp-macro-assembler.h"
16 17
#include "src/unicode.h"
#include "src/utils.h"
18

19 20 21 22
#ifdef V8_I18N_SUPPORT
#include "unicode/uchar.h"
#endif  // V8_I18N_SUPPORT

23 24
namespace v8 {
namespace internal {
25

26
typedef unibrow::Mapping<unibrow::Ecma262Canonicalize> Canonicalize;
27

28 29 30 31 32 33 34 35 36 37
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
                                 int len, Vector<const uc16> subject,
                                 bool unicode) {
  Address offset_a =
      reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(from)));
  Address offset_b =
      reinterpret_cast<Address>(const_cast<uc16*>(&subject.at(current)));
  size_t length = len * kUC16Size;
  return RegExpMacroAssembler::CaseInsensitiveCompareUC16(
             offset_a, offset_b, length, unicode ? nullptr : isolate) == 1;
38 39 40
}


41 42 43 44
static bool BackRefMatchesNoCase(Isolate* isolate, int from, int current,
                                 int len, Vector<const uint8_t> subject,
                                 bool unicode) {
  // For Latin1 characters the unicode flag makes no difference.
45 46 47 48
  for (int i = 0; i < len; i++) {
    unsigned int old_char = subject[from++];
    unsigned int new_char = subject[current++];
    if (old_char == new_char) continue;
49 50 51
    // Convert both characters to lower case.
    old_char |= 0x20;
    new_char |= 0x20;
52
    if (old_char != new_char) return false;
53 54 55 56 57
    // Not letters in the ASCII range and Latin-1 range.
    if (!(old_char - 'a' <= 'z' - 'a') &&
        !(old_char - 224 <= 254 - 224 && old_char != 247)) {
      return false;
    }
58 59 60 61 62
  }
  return true;
}


63 64 65 66 67
#ifdef DEBUG
static void TraceInterpreter(const byte* code_base,
                             const byte* pc,
                             int stack_depth,
                             int current_position,
68
                             uint32_t current_char,
69 70 71
                             int bytecode_length,
                             const char* bytecode_name) {
  if (FLAG_trace_regexp_bytecodes) {
72 73 74 75 76 77
    bool printable = (current_char < 127 && current_char >= 32);
    const char* format =
        printable ?
        "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
        "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
    PrintF(format,
78 79 80
           pc - code_base,
           stack_depth,
           current_position,
81 82
           current_char,
           printable ? current_char : '.',
83
           bytecode_name);
84
    for (int i = 0; i < bytecode_length; i++) {
85 86
      printf(", %02x", pc[i]);
    }
87 88 89 90 91 92 93 94 95
    printf(" ");
    for (int i = 1; i < bytecode_length; i++) {
      unsigned char b = pc[i];
      if (b < 127 && b >= 32) {
        printf("%c", b);
      } else {
        printf(".");
      }
    }
96 97 98 99 100
    printf("\n");
  }
}


101 102 103 104 105 106 107 108
#define BYTECODE(name)                                                      \
  case BC_##name:                                                           \
    TraceInterpreter(code_base,                                             \
                     pc,                                                    \
                     static_cast<int>(backtrack_sp - backtrack_stack_base), \
                     current,                                               \
                     current_char,                                          \
                     BC_##name##_LENGTH,                                    \
109
                     #name);
110
#else
111
#define BYTECODE(name)                                                      \
112
  case BC_##name:
113 114 115
#endif


116
static int32_t Load32Aligned(const byte* pc) {
117
  DCHECK((reinterpret_cast<intptr_t>(pc) & 3) == 0);
118 119 120 121 122
  return *reinterpret_cast<const int32_t *>(pc);
}


static int32_t Load16Aligned(const byte* pc) {
123
  DCHECK((reinterpret_cast<intptr_t>(pc) & 1) == 0);
124 125 126
  return *reinterpret_cast<const uint16_t *>(pc);
}

127

128 129 130 131 132 133
// A simple abstraction over the backtracking stack used by the interpreter.
// This backtracking stack does not grow automatically, but it ensures that the
// the memory held by the stack is released or remembered in a cache if the
// matching terminates.
class BacktrackStack {
 public:
134
  BacktrackStack() { data_ = NewArray<int>(kBacktrackStackSize); }
135 136

  ~BacktrackStack() {
137
    DeleteArray(data_);
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
  }

  int* data() const { return data_; }

  int max_size() const { return kBacktrackStackSize; }

 private:
  static const int kBacktrackStackSize = 10000;

  int* data_;

  DISALLOW_COPY_AND_ASSIGN(BacktrackStack);
};


153
template <typename Char>
154 155 156 157 158 159
static RegExpImpl::IrregexpResult RawMatch(Isolate* isolate,
                                           const byte* code_base,
                                           Vector<const Char> subject,
                                           int* registers,
                                           int current,
                                           uint32_t current_char) {
160
  const byte* pc = code_base;
161 162 163
  // BacktrackStack ensures that the memory allocated for the backtracking stack
  // is returned to the system or cached if there is no stack being cached at
  // the moment.
164
  BacktrackStack backtrack_stack;
165
  int* backtrack_stack_base = backtrack_stack.data();
166
  int* backtrack_sp = backtrack_stack_base;
167
  int backtrack_stack_space = backtrack_stack.max_size();
168 169 170 171 172 173
#ifdef DEBUG
  if (FLAG_trace_regexp_bytecodes) {
    PrintF("\n\nStart bytecode interpreter\n\n");
  }
#endif
  while (true) {
174 175
    int32_t insn = Load32Aligned(pc);
    switch (insn & BYTECODE_MASK) {
176 177
      BYTECODE(BREAK)
        UNREACHABLE();
178
        return RegExpImpl::RE_FAILURE;
179 180
      BYTECODE(PUSH_CP)
        if (--backtrack_stack_space < 0) {
181
          return RegExpImpl::RE_EXCEPTION;
182
        }
183
        *backtrack_sp++ = current;
184 185 186 187
        pc += BC_PUSH_CP_LENGTH;
        break;
      BYTECODE(PUSH_BT)
        if (--backtrack_stack_space < 0) {
188
          return RegExpImpl::RE_EXCEPTION;
189
        }
190
        *backtrack_sp++ = Load32Aligned(pc + 4);
191 192 193 194
        pc += BC_PUSH_BT_LENGTH;
        break;
      BYTECODE(PUSH_REGISTER)
        if (--backtrack_stack_space < 0) {
195
          return RegExpImpl::RE_EXCEPTION;
196
        }
197
        *backtrack_sp++ = registers[insn >> BYTECODE_SHIFT];
198 199 200
        pc += BC_PUSH_REGISTER_LENGTH;
        break;
      BYTECODE(SET_REGISTER)
201
        registers[insn >> BYTECODE_SHIFT] = Load32Aligned(pc + 4);
202 203 204
        pc += BC_SET_REGISTER_LENGTH;
        break;
      BYTECODE(ADVANCE_REGISTER)
205
        registers[insn >> BYTECODE_SHIFT] += Load32Aligned(pc + 4);
206 207 208
        pc += BC_ADVANCE_REGISTER_LENGTH;
        break;
      BYTECODE(SET_REGISTER_TO_CP)
209
        registers[insn >> BYTECODE_SHIFT] = current + Load32Aligned(pc + 4);
210 211 212
        pc += BC_SET_REGISTER_TO_CP_LENGTH;
        break;
      BYTECODE(SET_CP_TO_REGISTER)
213
        current = registers[insn >> BYTECODE_SHIFT];
214 215 216
        pc += BC_SET_CP_TO_REGISTER_LENGTH;
        break;
      BYTECODE(SET_REGISTER_TO_SP)
217 218
        registers[insn >> BYTECODE_SHIFT] =
            static_cast<int>(backtrack_sp - backtrack_stack_base);
219 220 221
        pc += BC_SET_REGISTER_TO_SP_LENGTH;
        break;
      BYTECODE(SET_SP_TO_REGISTER)
222
        backtrack_sp = backtrack_stack_base + registers[insn >> BYTECODE_SHIFT];
223
        backtrack_stack_space = backtrack_stack.max_size() -
224
            static_cast<int>(backtrack_sp - backtrack_stack_base);
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240
        pc += BC_SET_SP_TO_REGISTER_LENGTH;
        break;
      BYTECODE(POP_CP)
        backtrack_stack_space++;
        --backtrack_sp;
        current = *backtrack_sp;
        pc += BC_POP_CP_LENGTH;
        break;
      BYTECODE(POP_BT)
        backtrack_stack_space++;
        --backtrack_sp;
        pc = code_base + *backtrack_sp;
        break;
      BYTECODE(POP_REGISTER)
        backtrack_stack_space++;
        --backtrack_sp;
241
        registers[insn >> BYTECODE_SHIFT] = *backtrack_sp;
242 243 244
        pc += BC_POP_REGISTER_LENGTH;
        break;
      BYTECODE(FAIL)
245
        return RegExpImpl::RE_FAILURE;
246
      BYTECODE(SUCCEED)
247
        return RegExpImpl::RE_SUCCESS;
248
      BYTECODE(ADVANCE_CP)
249
        current += insn >> BYTECODE_SHIFT;
250 251 252
        pc += BC_ADVANCE_CP_LENGTH;
        break;
      BYTECODE(GOTO)
253
        pc = code_base + Load32Aligned(pc + 4);
254
        break;
255 256 257 258
      BYTECODE(ADVANCE_CP_AND_GOTO)
        current += insn >> BYTECODE_SHIFT;
        pc = code_base + Load32Aligned(pc + 4);
        break;
erik.corry@gmail.com's avatar
erik.corry@gmail.com committed
259 260 261 262
      BYTECODE(CHECK_GREEDY)
        if (current == backtrack_sp[-1]) {
          backtrack_sp--;
          backtrack_stack_space++;
263
          pc = code_base + Load32Aligned(pc + 4);
erik.corry@gmail.com's avatar
erik.corry@gmail.com committed
264 265 266 267
        } else {
          pc += BC_CHECK_GREEDY_LENGTH;
        }
        break;
268
      BYTECODE(LOAD_CURRENT_CHAR) {
269
        int pos = current + (insn >> BYTECODE_SHIFT);
270
        if (pos >= subject.length() || pos < 0) {
271
          pc = code_base + Load32Aligned(pc + 4);
272 273 274 275 276 277
        } else {
          current_char = subject[pos];
          pc += BC_LOAD_CURRENT_CHAR_LENGTH;
        }
        break;
      }
erik.corry@gmail.com's avatar
erik.corry@gmail.com committed
278
      BYTECODE(LOAD_CURRENT_CHAR_UNCHECKED) {
279
        int pos = current + (insn >> BYTECODE_SHIFT);
erik.corry@gmail.com's avatar
erik.corry@gmail.com committed
280 281 282 283
        current_char = subject[pos];
        pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
        break;
      }
284
      BYTECODE(LOAD_2_CURRENT_CHARS) {
285
        int pos = current + (insn >> BYTECODE_SHIFT);
286
        if (pos + 2 > subject.length() || pos < 0) {
287
          pc = code_base + Load32Aligned(pc + 4);
288 289 290 291 292 293 294 295 296
        } else {
          Char next = subject[pos + 1];
          current_char =
              (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
          pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
        }
        break;
      }
      BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
297
        int pos = current + (insn >> BYTECODE_SHIFT);
298 299 300 301 302 303
        Char next = subject[pos + 1];
        current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
        pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
        break;
      }
      BYTECODE(LOAD_4_CURRENT_CHARS) {
304
        DCHECK(sizeof(Char) == 1);
305
        int pos = current + (insn >> BYTECODE_SHIFT);
306
        if (pos + 4 > subject.length() || pos < 0) {
307
          pc = code_base + Load32Aligned(pc + 4);
308 309 310 311 312 313 314 315 316 317 318 319 320
        } else {
          Char next1 = subject[pos + 1];
          Char next2 = subject[pos + 2];
          Char next3 = subject[pos + 3];
          current_char = (subject[pos] |
                          (next1 << 8) |
                          (next2 << 16) |
                          (next3 << 24));
          pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
        }
        break;
      }
      BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
321
        DCHECK(sizeof(Char) == 1);
322
        int pos = current + (insn >> BYTECODE_SHIFT);
323 324 325 326 327 328 329 330 331 332
        Char next1 = subject[pos + 1];
        Char next2 = subject[pos + 2];
        Char next3 = subject[pos + 3];
        current_char = (subject[pos] |
                        (next1 << 8) |
                        (next2 << 16) |
                        (next3 << 24));
        pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
        break;
      }
333 334 335 336 337 338 339 340 341
      BYTECODE(CHECK_4_CHARS) {
        uint32_t c = Load32Aligned(pc + 4);
        if (c == current_char) {
          pc = code_base + Load32Aligned(pc + 8);
        } else {
          pc += BC_CHECK_4_CHARS_LENGTH;
        }
        break;
      }
342
      BYTECODE(CHECK_CHAR) {
343
        uint32_t c = (insn >> BYTECODE_SHIFT);
344
        if (c == current_char) {
345
          pc = code_base + Load32Aligned(pc + 4);
346 347 348 349 350
        } else {
          pc += BC_CHECK_CHAR_LENGTH;
        }
        break;
      }
351 352 353 354 355 356 357 358 359
      BYTECODE(CHECK_NOT_4_CHARS) {
        uint32_t c = Load32Aligned(pc + 4);
        if (c != current_char) {
          pc = code_base + Load32Aligned(pc + 8);
        } else {
          pc += BC_CHECK_NOT_4_CHARS_LENGTH;
        }
        break;
      }
360
      BYTECODE(CHECK_NOT_CHAR) {
361
        uint32_t c = (insn >> BYTECODE_SHIFT);
362
        if (c != current_char) {
363
          pc = code_base + Load32Aligned(pc + 4);
364 365 366 367 368
        } else {
          pc += BC_CHECK_NOT_CHAR_LENGTH;
        }
        break;
      }
369 370 371 372 373 374 375 376 377
      BYTECODE(AND_CHECK_4_CHARS) {
        uint32_t c = Load32Aligned(pc + 4);
        if (c == (current_char & Load32Aligned(pc + 8))) {
          pc = code_base + Load32Aligned(pc + 12);
        } else {
          pc += BC_AND_CHECK_4_CHARS_LENGTH;
        }
        break;
      }
378
      BYTECODE(AND_CHECK_CHAR) {
379 380 381
        uint32_t c = (insn >> BYTECODE_SHIFT);
        if (c == (current_char & Load32Aligned(pc + 4))) {
          pc = code_base + Load32Aligned(pc + 8);
382
        } else {
383
          pc += BC_AND_CHECK_CHAR_LENGTH;
384 385 386
        }
        break;
      }
387 388 389 390 391 392 393 394 395
      BYTECODE(AND_CHECK_NOT_4_CHARS) {
        uint32_t c = Load32Aligned(pc + 4);
        if (c != (current_char & Load32Aligned(pc + 8))) {
          pc = code_base + Load32Aligned(pc + 12);
        } else {
          pc += BC_AND_CHECK_NOT_4_CHARS_LENGTH;
        }
        break;
      }
396
      BYTECODE(AND_CHECK_NOT_CHAR) {
397 398 399
        uint32_t c = (insn >> BYTECODE_SHIFT);
        if (c != (current_char & Load32Aligned(pc + 4))) {
          pc = code_base + Load32Aligned(pc + 8);
400 401 402 403 404 405
        } else {
          pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
        }
        break;
      }
      BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
406 407 408
        uint32_t c = (insn >> BYTECODE_SHIFT);
        uint32_t minus = Load16Aligned(pc + 4);
        uint32_t mask = Load16Aligned(pc + 6);
409
        if (c != ((current_char - minus) & mask)) {
410
          pc = code_base + Load32Aligned(pc + 8);
411
        } else {
412
          pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
413 414 415
        }
        break;
      }
416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446
      BYTECODE(CHECK_CHAR_IN_RANGE) {
        uint32_t from = Load16Aligned(pc + 4);
        uint32_t to = Load16Aligned(pc + 6);
        if (from <= current_char && current_char <= to) {
          pc = code_base + Load32Aligned(pc + 8);
        } else {
          pc += BC_CHECK_CHAR_IN_RANGE_LENGTH;
        }
        break;
      }
      BYTECODE(CHECK_CHAR_NOT_IN_RANGE) {
        uint32_t from = Load16Aligned(pc + 4);
        uint32_t to = Load16Aligned(pc + 6);
        if (from > current_char || current_char > to) {
          pc = code_base + Load32Aligned(pc + 8);
        } else {
          pc += BC_CHECK_CHAR_NOT_IN_RANGE_LENGTH;
        }
        break;
      }
      BYTECODE(CHECK_BIT_IN_TABLE) {
        int mask = RegExpMacroAssembler::kTableMask;
        byte b = pc[8 + ((current_char & mask) >> kBitsPerByteLog2)];
        int bit = (current_char & (kBitsPerByte - 1));
        if ((b & (1 << bit)) != 0) {
          pc = code_base + Load32Aligned(pc + 4);
        } else {
          pc += BC_CHECK_BIT_IN_TABLE_LENGTH;
        }
        break;
      }
447
      BYTECODE(CHECK_LT) {
448
        uint32_t limit = (insn >> BYTECODE_SHIFT);
449
        if (current_char < limit) {
450
          pc = code_base + Load32Aligned(pc + 4);
451 452 453 454 455 456
        } else {
          pc += BC_CHECK_LT_LENGTH;
        }
        break;
      }
      BYTECODE(CHECK_GT) {
457
        uint32_t limit = (insn >> BYTECODE_SHIFT);
458
        if (current_char > limit) {
459
          pc = code_base + Load32Aligned(pc + 4);
460 461 462 463 464 465
        } else {
          pc += BC_CHECK_GT_LENGTH;
        }
        break;
      }
      BYTECODE(CHECK_REGISTER_LT)
466 467
        if (registers[insn >> BYTECODE_SHIFT] < Load32Aligned(pc + 4)) {
          pc = code_base + Load32Aligned(pc + 8);
468 469 470 471 472
        } else {
          pc += BC_CHECK_REGISTER_LT_LENGTH;
        }
        break;
      BYTECODE(CHECK_REGISTER_GE)
473 474
        if (registers[insn >> BYTECODE_SHIFT] >= Load32Aligned(pc + 4)) {
          pc = code_base + Load32Aligned(pc + 8);
475 476 477 478
        } else {
          pc += BC_CHECK_REGISTER_GE_LENGTH;
        }
        break;
479
      BYTECODE(CHECK_REGISTER_EQ_POS)
480 481
        if (registers[insn >> BYTECODE_SHIFT] == current) {
          pc = code_base + Load32Aligned(pc + 4);
482 483 484 485
        } else {
          pc += BC_CHECK_REGISTER_EQ_POS_LENGTH;
        }
        break;
486
      BYTECODE(CHECK_NOT_REGS_EQUAL)
erik.corry@gmail.com's avatar
erik.corry@gmail.com committed
487 488
        if (registers[insn >> BYTECODE_SHIFT] ==
            registers[Load32Aligned(pc + 4)]) {
489 490
          pc += BC_CHECK_NOT_REGS_EQUAL_LENGTH;
        } else {
491
          pc = code_base + Load32Aligned(pc + 8);
492 493
        }
        break;
494
      BYTECODE(CHECK_NOT_BACK_REF) {
495 496
        int from = registers[insn >> BYTECODE_SHIFT];
        int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
497 498 499 500 501
        if (from >= 0 && len > 0) {
          if (current + len > subject.length() ||
              CompareChars(&subject[from], &subject[current], len) != 0) {
            pc = code_base + Load32Aligned(pc + 4);
            break;
502 503 504 505 506 507
          }
          current += len;
        }
        pc += BC_CHECK_NOT_BACK_REF_LENGTH;
        break;
      }
508 509 510 511 512 513 514 515 516 517 518 519 520 521
      BYTECODE(CHECK_NOT_BACK_REF_BACKWARD) {
        int from = registers[insn >> BYTECODE_SHIFT];
        int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
        if (from >= 0 && len > 0) {
          if (current - len < 0 ||
              CompareChars(&subject[from], &subject[current - len], len) != 0) {
            pc = code_base + Load32Aligned(pc + 4);
            break;
          }
          current -= len;
        }
        pc += BC_CHECK_NOT_BACK_REF_BACKWARD_LENGTH;
        break;
      }
522
      BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE)
523
      BYTECODE(CHECK_NOT_BACK_REF_NO_CASE) {
524 525
        bool unicode =
            (insn & BYTECODE_MASK) == BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE;
526 527
        int from = registers[insn >> BYTECODE_SHIFT];
        int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
528 529
        if (from >= 0 && len > 0) {
          if (current + len > subject.length() ||
530 531
              !BackRefMatchesNoCase(isolate, from, current, len, subject,
                                    unicode)) {
532 533 534 535
            pc = code_base + Load32Aligned(pc + 4);
            break;
          }
          current += len;
536
        }
537 538 539
        pc += BC_CHECK_NOT_BACK_REF_NO_CASE_LENGTH;
        break;
      }
540
      BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD)
541
      BYTECODE(CHECK_NOT_BACK_REF_NO_CASE_BACKWARD) {
542 543
        bool unicode = (insn & BYTECODE_MASK) ==
                       BC_CHECK_NOT_BACK_REF_NO_CASE_UNICODE_BACKWARD;
544 545 546 547
        int from = registers[insn >> BYTECODE_SHIFT];
        int len = registers[(insn >> BYTECODE_SHIFT) + 1] - from;
        if (from >= 0 && len > 0) {
          if (current - len < 0 ||
548 549
              !BackRefMatchesNoCase(isolate, from, current - len, len, subject,
                                    unicode)) {
550
            pc = code_base + Load32Aligned(pc + 4);
551
            break;
552
          }
553
          current -= len;
554
        }
555
        pc += BC_CHECK_NOT_BACK_REF_NO_CASE_BACKWARD_LENGTH;
556 557
        break;
      }
558 559
      BYTECODE(CHECK_AT_START)
        if (current == 0) {
560
          pc = code_base + Load32Aligned(pc + 4);
561 562 563 564
        } else {
          pc += BC_CHECK_AT_START_LENGTH;
        }
        break;
565
      BYTECODE(CHECK_NOT_AT_START)
566
        if (current + (insn >> BYTECODE_SHIFT) == 0) {
567 568
          pc += BC_CHECK_NOT_AT_START_LENGTH;
        } else {
569
          pc = code_base + Load32Aligned(pc + 4);
570 571
        }
        break;
572 573 574 575 576 577 578 579 580
      BYTECODE(SET_CURRENT_POSITION_FROM_END) {
        int by = static_cast<uint32_t>(insn) >> BYTECODE_SHIFT;
        if (subject.length() - current > by) {
          current = subject.length() - by;
          current_char = subject[current - 1];
        }
        pc += BC_SET_CURRENT_POSITION_FROM_END_LENGTH;
        break;
      }
581 582 583 584 585 586 587 588
      default:
        UNREACHABLE();
        break;
    }
  }
}


589 590 591 592 593 594
RegExpImpl::IrregexpResult IrregexpInterpreter::Match(
    Isolate* isolate,
    Handle<ByteArray> code_array,
    Handle<String> subject,
    int* registers,
    int start_position) {
595
  DCHECK(subject->IsFlat());
596

597
  DisallowHeapAllocation no_gc;
598
  const byte* code_base = code_array->GetDataStartAddress();
599
  uc16 previous_char = '\n';
lrn@chromium.org's avatar
lrn@chromium.org committed
600
  String::FlatContent subject_content = subject->GetFlatContent();
601
  if (subject_content.IsOneByte()) {
602
    Vector<const uint8_t> subject_vector = subject_content.ToOneByteVector();
603
    if (start_position != 0) previous_char = subject_vector[start_position - 1];
604 605
    return RawMatch(isolate,
                    code_base,
606 607 608 609 610
                    subject_vector,
                    registers,
                    start_position,
                    previous_char);
  } else {
611
    DCHECK(subject_content.IsTwoByte());
612
    Vector<const uc16> subject_vector = subject_content.ToUC16Vector();
613
    if (start_position != 0) previous_char = subject_vector[start_position - 1];
614 615
    return RawMatch(isolate,
                    code_base,
616 617 618 619 620
                    subject_vector,
                    registers,
                    start_position,
                    previous_char);
  }
621 622
}

623 624
}  // namespace internal
}  // namespace v8
625 626

#endif  // V8_INTERPRETED_REGEXP