regexp-macro-assembler-x64.cc 47.2 KB
Newer Older
1
// Copyright 2012 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5
#include "src/v8.h"
6

7
#if V8_TARGET_ARCH_X64
8

9 10 11 12
#include "src/cpu-profiler.h"
#include "src/log.h"
#include "src/macro-assembler.h"
#include "src/regexp-macro-assembler.h"
13 14 15
#include "src/regexp-stack.h"
#include "src/serialize.h"
#include "src/unicode.h"
16
#include "src/x64/regexp-macro-assembler-x64.h"
17 18 19 20

namespace v8 {
namespace internal {

21
#ifndef V8_INTERPRETED_REGEXP
lrn@chromium.org's avatar
lrn@chromium.org committed
22

23 24
/*
 * This assembler uses the following register assignment convention
25
 * - rdx : Currently loaded character(s) as Latin1 or UC16.  Must be loaded
26 27 28 29
 *         using LoadCurrentCharacter before using any of the dispatch methods.
 *         Temporarily stores the index of capture start after a matching pass
 *         for a global regexp.
 * - rdi : Current position in input, as negative offset from end of string.
30
 *         Please notice that this is the byte offset, not the character
31
 *         offset!  Is always a 32-bit signed (negative) offset, but must be
32
 *         maintained sign-extended to 64 bits, since it is used as index.
33
 * - rsi : End of input (points to byte after last character in input),
34
 *         so that rsi+rdi points to the current character.
35
 * - rbp : Frame pointer.  Used to access arguments, local variables and
36
 *         RegExp registers.
37 38 39
 * - rsp : Points to tip of C stack.
 * - rcx : Points to tip of backtrack stack.  The backtrack stack contains
 *         only 32-bit values.  Most are offsets from some base (e.g., character
40
 *         positions from end of string or code location from Code* pointer).
41
 * - r8  : Code object pointer.  Used to convert between absolute and
42 43
 *         code-object-relative addresses.
 *
44
 * The registers rax, rbx, r9 and r11 are free to use for computations.
45
 * If changed to use r12+, they should be saved as callee-save registers.
46 47 48 49
 * The macro assembler special registers r12 and r13 (kSmiConstantRegister,
 * kRootRegister) aren't special during execution of RegExp code (they don't
 * hold the values assumed when creating JS code), so no Smi or Root related
 * macro operations can be used.
50 51 52 53 54
 *
 * Each call to a C++ method should retain these registers.
 *
 * The stack will have the following content, in some order, indexable from the
 * frame pointer (see, e.g., kStackHighEnd):
55
 *    - Isolate* isolate     (address of the current isolate)
56 57
 *    - direct_call          (if 1, direct call from JavaScript code, if 0 call
 *                            through the runtime system)
58
 *    - stack_area_base      (high end of the memory area to use as
59
 *                            backtracking stack)
60
 *    - capture array size   (may fit multiple sets of matches)
61
 *    - int* capture_array   (int[num_saved_registers_], for output).
62 63
 *    - end of input         (address of end of string)
 *    - start of input       (address of first character in string)
64 65
 *    - start index          (character index of start)
 *    - String* input_string (input string)
66 67
 *    - return address
 *    - backup of callee save registers (rbx, possibly rsi and rdi).
68
 *    - success counter      (only useful for global regexp to count matches)
69
 *    - Offset of location before start of input (effectively character
70
 *      position -1).  Used to initialize capture registers to a non-position.
71 72
 *    - At start of string (if 1, we are starting at the start of the
 *      string, otherwise 0)
73 74 75 76 77 78
 *    - register 0  rbp[-n]   (Only positions must be stored in the first
 *    - register 1  rbp[-n-8]  num_saved_registers_ registers)
 *    - ...
 *
 * The first num_saved_registers_ registers are initialized to point to
 * "character -1" in the string (i.e., char_size() bytes before the first
79
 * character of the string).  The remaining registers starts out uninitialized.
80 81 82 83 84
 *
 * The first seven values must be provided by the calling code by
 * calling the code's entry address cast to a function pointer with the
 * following signature:
 * int (*match)(String* input_string,
85
 *              int start_index,
86 87 88 89
 *              Address start,
 *              Address end,
 *              int* capture_output_array,
 *              bool at_start,
90 91
 *              byte* stack_area_base,
 *              bool direct_call)
92 93
 */

94
#define __ ACCESS_MASM((&masm_))
95 96 97

RegExpMacroAssemblerX64::RegExpMacroAssemblerX64(
    Mode mode,
98 99 100
    int registers_to_save,
    Zone* zone)
    : NativeRegExpMacroAssembler(zone),
101
      masm_(zone->isolate(), NULL, kRegExpCodeSize),
102
      no_root_array_scope_(&masm_),
103
      code_relative_fixup_positions_(4, zone),
104 105 106 107 108 109 110 111
      mode_(mode),
      num_registers_(registers_to_save),
      num_saved_registers_(registers_to_save),
      entry_label_(),
      start_label_(),
      success_label_(),
      backtrack_label_(),
      exit_label_() {
112
  DCHECK_EQ(0, registers_to_save % 2);
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
  __ jmp(&entry_label_);   // We'll write the entry code when we know more.
  __ bind(&start_label_);  // And then continue from here.
}


RegExpMacroAssemblerX64::~RegExpMacroAssemblerX64() {
  // Unuse labels in case we throw away the assembler without calling GetCode.
  entry_label_.Unuse();
  start_label_.Unuse();
  success_label_.Unuse();
  backtrack_label_.Unuse();
  exit_label_.Unuse();
  check_preempt_label_.Unuse();
  stack_overflow_label_.Unuse();
}


int RegExpMacroAssemblerX64::stack_limit_slack()  {
  return RegExpStack::kStackLimitSlack;
}


void RegExpMacroAssemblerX64::AdvanceCurrentPosition(int by) {
  if (by != 0) {
    __ addq(rdi, Immediate(by * char_size()));
  }
}


void RegExpMacroAssemblerX64::AdvanceRegister(int reg, int by) {
143 144
  DCHECK(reg >= 0);
  DCHECK(reg < num_registers_);
145
  if (by != 0) {
146
    __ addp(register_location(reg), Immediate(by));
147 148 149 150 151 152 153 154
  }
}


void RegExpMacroAssemblerX64::Backtrack() {
  CheckPreemption();
  // Pop Code* offset from backtrack stack, add Code* and jump to location.
  Pop(rbx);
155
  __ addp(rbx, code_object_pointer());
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
  __ jmp(rbx);
}


void RegExpMacroAssemblerX64::Bind(Label* label) {
  __ bind(label);
}


void RegExpMacroAssemblerX64::CheckCharacter(uint32_t c, Label* on_equal) {
  __ cmpl(current_character(), Immediate(c));
  BranchOrBacktrack(equal, on_equal);
}


void RegExpMacroAssemblerX64::CheckCharacterGT(uc16 limit, Label* on_greater) {
  __ cmpl(current_character(), Immediate(limit));
  BranchOrBacktrack(greater, on_greater);
}


void RegExpMacroAssemblerX64::CheckAtStart(Label* on_at_start) {
  Label not_at_start;
  // Did we start the match at the start of the string at all?
180
  __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
181
  BranchOrBacktrack(not_equal, &not_at_start);
182
  // If we did, are we still at the start of the input?
183
  __ leap(rax, Operand(rsi, rdi, times_1, 0));
184
  __ cmpp(rax, Operand(rbp, kInputStart));
185 186 187 188 189 190 191
  BranchOrBacktrack(equal, on_at_start);
  __ bind(&not_at_start);
}


void RegExpMacroAssemblerX64::CheckNotAtStart(Label* on_not_at_start) {
  // Did we start the match at the start of the string at all?
192
  __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
193
  BranchOrBacktrack(not_equal, on_not_at_start);
194
  // If we did, are we still at the start of the input?
195
  __ leap(rax, Operand(rsi, rdi, times_1, 0));
196
  __ cmpp(rax, Operand(rbp, kInputStart));
197 198 199 200 201 202 203 204 205 206 207 208 209
  BranchOrBacktrack(not_equal, on_not_at_start);
}


void RegExpMacroAssemblerX64::CheckCharacterLT(uc16 limit, Label* on_less) {
  __ cmpl(current_character(), Immediate(limit));
  BranchOrBacktrack(less, on_less);
}


void RegExpMacroAssemblerX64::CheckGreedyLoop(Label* on_equal) {
  Label fallthrough;
  __ cmpl(rdi, Operand(backtrack_stackpointer(), 0));
210
  __ j(not_equal, &fallthrough);
211 212 213 214 215 216 217 218 219 220
  Drop();
  BranchOrBacktrack(no_condition, on_equal);
  __ bind(&fallthrough);
}


void RegExpMacroAssemblerX64::CheckNotBackReferenceIgnoreCase(
    int start_reg,
    Label* on_no_match) {
  Label fallthrough;
221 222
  ReadPositionFromRegister(rdx, start_reg);  // Offset of start of capture
  ReadPositionFromRegister(rbx, start_reg + 1);  // Offset of end of capture
223
  __ subp(rbx, rdx);  // Length of capture.
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238

  // -----------------------
  // rdx  = Start offset of capture.
  // rbx = Length of capture

  // If length is negative, this code will fail (it's a symptom of a partial or
  // illegal capture where start of capture after end of capture).
  // This must not happen (no back-reference can reference a capture that wasn't
  // closed before in the reg-exp, and we must not generate code that can cause
  // this condition).

  // If length is zero, either the capture is empty or it is nonparticipating.
  // In either case succeed immediately.
  __ j(equal, &fallthrough);

239 240 241 242 243 244 245 246
  // -----------------------
  // rdx - Start of capture
  // rbx - length of capture
  // Check that there are sufficient characters left in the input.
  __ movl(rax, rdi);
  __ addl(rax, rbx);
  BranchOrBacktrack(greater, on_no_match);

247
  if (mode_ == LATIN1) {
248 249 250 251 252
    Label loop_increment;
    if (on_no_match == NULL) {
      on_no_match = &backtrack_label_;
    }

253 254
    __ leap(r9, Operand(rsi, rdx, times_1, 0));
    __ leap(r11, Operand(rsi, rdi, times_1, 0));
255
    __ addp(rbx, r9);  // End of capture
256 257 258 259 260 261 262 263 264 265 266 267
    // ---------------------
    // r11 - current input character address
    // r9 - current capture character address
    // rbx - end of capture

    Label loop;
    __ bind(&loop);
    __ movzxbl(rdx, Operand(r9, 0));
    __ movzxbl(rax, Operand(r11, 0));
    // al - input character
    // dl - capture character
    __ cmpb(rax, rdx);
268
    __ j(equal, &loop_increment);
269 270 271 272

    // Mismatch, try case-insensitive match (converting letters to lower-case).
    // I.e., if or-ing with 0x20 makes values equal and in range 'a'-'z', it's
    // a match.
273 274
    __ orp(rax, Immediate(0x20));  // Convert match character to lower-case.
    __ orp(rdx, Immediate(0x20));  // Convert capture character to lower-case.
275 276 277 278
    __ cmpb(rax, rdx);
    __ j(not_equal, on_no_match);  // Definitely not equal.
    __ subb(rax, Immediate('a'));
    __ cmpb(rax, Immediate('z' - 'a'));
279 280 281 282 283 284 285
    __ j(below_equal, &loop_increment);  // In range 'a'-'z'.
    // Latin-1: Check for values in range [224,254] but not 247.
    __ subb(rax, Immediate(224 - 'a'));
    __ cmpb(rax, Immediate(254 - 224));
    __ j(above, on_no_match);  // Weren't Latin-1 letters.
    __ cmpb(rax, Immediate(247 - 224));  // Check for 247.
    __ j(equal, on_no_match);
286 287
    __ bind(&loop_increment);
    // Increment pointers into match and capture strings.
288 289
    __ addp(r11, Immediate(1));
    __ addp(r9, Immediate(1));
290
    // Compare to end of capture, and loop if not done.
291
    __ cmpp(r9, rbx);
292 293 294
    __ j(below, &loop);

    // Compute new value of character position after the matched part.
295
    __ movp(rdi, r11);
296 297
    __ subq(rdi, rsi);
  } else {
298
    DCHECK(mode_ == UC16);
299
    // Save important/volatile registers before calling C function.
300
#ifndef _WIN64
301
    // Caller save on Linux and callee save in Windows.
302 303
    __ pushq(rsi);
    __ pushq(rdi);
304
#endif
305
    __ pushq(backtrack_stackpointer());
306

307
    static const int num_arguments = 4;
308
    __ PrepareCallCFunction(num_arguments);
309 310 311 312 313

    // Put arguments into parameter registers. Parameters are
    //   Address byte_offset1 - Address captured substring's start.
    //   Address byte_offset2 - Address of current character position.
    //   size_t byte_length - length of capture in bytes(!)
314
    //   Isolate* isolate
315
#ifdef _WIN64
316
    // Compute and set byte_offset1 (start of capture).
317
    __ leap(rcx, Operand(rsi, rdx, times_1, 0));
318
    // Set byte_offset2.
319
    __ leap(rdx, Operand(rsi, rdi, times_1, 0));
320
    // Set byte_length.
321
    __ movp(r8, rbx);
322
    // Isolate.
323
    __ LoadAddress(r9, ExternalReference::isolate_address(isolate()));
324 325
#else  // AMD64 calling convention
    // Compute byte_offset2 (current position = rsi+rdi).
326
    __ leap(rax, Operand(rsi, rdi, times_1, 0));
327
    // Compute and set byte_offset1 (start of capture).
328
    __ leap(rdi, Operand(rsi, rdx, times_1, 0));
329
    // Set byte_offset2.
330
    __ movp(rsi, rax);
331
    // Set byte_length.
332
    __ movp(rdx, rbx);
333
    // Isolate.
334
    __ LoadAddress(rcx, ExternalReference::isolate_address(isolate()));
335
#endif
336 337 338 339 340

    { // NOLINT: Can't find a way to open this scope without confusing the
      // linter.
      AllowExternalCallThatCantCauseGC scope(&masm_);
      ExternalReference compare =
341
          ExternalReference::re_case_insensitive_compare_uc16(isolate());
342 343
      __ CallCFunction(compare, num_arguments);
    }
344 345

    // Restore original values before reacting on result value.
346
    __ Move(code_object_pointer(), masm_.CodeObject());
347
    __ popq(backtrack_stackpointer());
348
#ifndef _WIN64
349 350
    __ popq(rdi);
    __ popq(rsi);
351 352 353
#endif

    // Check if function returned non-zero for success or zero for failure.
354
    __ testp(rax, rax);
355 356 357 358 359 360 361 362 363 364 365 366 367 368 369
    BranchOrBacktrack(zero, on_no_match);
    // On success, increment position by length of capture.
    // Requires that rbx is callee save (true for both Win64 and AMD64 ABIs).
    __ addq(rdi, rbx);
  }
  __ bind(&fallthrough);
}


void RegExpMacroAssemblerX64::CheckNotBackReference(
    int start_reg,
    Label* on_no_match) {
  Label fallthrough;

  // Find length of back-referenced capture.
370 371
  ReadPositionFromRegister(rdx, start_reg);  // Offset of start of capture
  ReadPositionFromRegister(rax, start_reg + 1);  // Offset of end of capture
372
  __ subp(rax, rdx);  // Length to check.
373 374 375 376

  // Fail on partial or illegal capture (start of capture after end of capture).
  // This must not happen (no back-reference can reference a capture that wasn't
  // closed before in the reg-exp).
377
  __ Check(greater_equal, kInvalidCaptureReferenced);
378 379 380 381 382 383 384 385 386 387 388 389 390 391

  // Succeed on empty capture (including non-participating capture)
  __ j(equal, &fallthrough);

  // -----------------------
  // rdx - Start of capture
  // rax - length of capture

  // Check that there are sufficient characters left in the input.
  __ movl(rbx, rdi);
  __ addl(rbx, rax);
  BranchOrBacktrack(greater, on_no_match);

  // Compute pointers to match string and capture string
392
  __ leap(rbx, Operand(rsi, rdi, times_1, 0));  // Start of match.
393
  __ addp(rdx, rsi);  // Start of capture.
394
  __ leap(r9, Operand(rdx, rax, times_1, 0));  // End of capture
395 396 397 398 399 400 401 402

  // -----------------------
  // rbx - current capture character address.
  // rbx - current input character address .
  // r9 - end of input to match (capture length after rbx).

  Label loop;
  __ bind(&loop);
403
  if (mode_ == LATIN1) {
404 405 406
    __ movzxbl(rax, Operand(rdx, 0));
    __ cmpb(rax, Operand(rbx, 0));
  } else {
407
    DCHECK(mode_ == UC16);
408 409 410 411 412
    __ movzxwl(rax, Operand(rdx, 0));
    __ cmpw(rax, Operand(rbx, 0));
  }
  BranchOrBacktrack(not_equal, on_no_match);
  // Increment pointers into capture and match string.
413 414
  __ addp(rbx, Immediate(char_size()));
  __ addp(rdx, Immediate(char_size()));
415
  // Check if we have reached end of match area.
416
  __ cmpp(rdx, r9);
417 418 419 420
  __ j(below, &loop);

  // Success.
  // Set current character position to position after match.
421
  __ movp(rdi, rbx);
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
  __ subq(rdi, rsi);

  __ bind(&fallthrough);
}


void RegExpMacroAssemblerX64::CheckNotCharacter(uint32_t c,
                                                Label* on_not_equal) {
  __ cmpl(current_character(), Immediate(c));
  BranchOrBacktrack(not_equal, on_not_equal);
}


void RegExpMacroAssemblerX64::CheckCharacterAfterAnd(uint32_t c,
                                                     uint32_t mask,
                                                     Label* on_equal) {
438 439 440 441
  if (c == 0) {
    __ testl(current_character(), Immediate(mask));
  } else {
    __ movl(rax, Immediate(mask));
442
    __ andp(rax, current_character());
443 444
    __ cmpl(rax, Immediate(c));
  }
445 446 447 448 449 450 451
  BranchOrBacktrack(equal, on_equal);
}


void RegExpMacroAssemblerX64::CheckNotCharacterAfterAnd(uint32_t c,
                                                        uint32_t mask,
                                                        Label* on_not_equal) {
452 453 454 455
  if (c == 0) {
    __ testl(current_character(), Immediate(mask));
  } else {
    __ movl(rax, Immediate(mask));
456
    __ andp(rax, current_character());
457 458
    __ cmpl(rax, Immediate(c));
  }
459 460 461 462 463 464 465 466 467
  BranchOrBacktrack(not_equal, on_not_equal);
}


void RegExpMacroAssemblerX64::CheckNotCharacterAfterMinusAnd(
    uc16 c,
    uc16 minus,
    uc16 mask,
    Label* on_not_equal) {
468
  DCHECK(minus < String::kMaxUtf16CodeUnit);
469
  __ leap(rax, Operand(current_character(), -minus));
470
  __ andp(rax, Immediate(mask));
471 472 473 474 475
  __ cmpl(rax, Immediate(c));
  BranchOrBacktrack(not_equal, on_not_equal);
}


476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
void RegExpMacroAssemblerX64::CheckCharacterInRange(
    uc16 from,
    uc16 to,
    Label* on_in_range) {
  __ leal(rax, Operand(current_character(), -from));
  __ cmpl(rax, Immediate(to - from));
  BranchOrBacktrack(below_equal, on_in_range);
}


void RegExpMacroAssemblerX64::CheckCharacterNotInRange(
    uc16 from,
    uc16 to,
    Label* on_not_in_range) {
  __ leal(rax, Operand(current_character(), -from));
  __ cmpl(rax, Immediate(to - from));
  BranchOrBacktrack(above, on_not_in_range);
}


void RegExpMacroAssemblerX64::CheckBitInTable(
    Handle<ByteArray> table,
    Label* on_bit_set) {
  __ Move(rax, table);
  Register index = current_character();
501
  if (mode_ != LATIN1 || kTableMask != String::kMaxOneByteCharCode) {
502
    __ movp(rbx, current_character());
503
    __ andp(rbx, Immediate(kTableMask));
504 505 506 507 508 509 510 511
    index = rbx;
  }
  __ cmpb(FieldOperand(rax, index, times_1, ByteArray::kHeaderSize),
          Immediate(0));
  BranchOrBacktrack(not_equal, on_bit_set);
}


512 513 514
bool RegExpMacroAssemblerX64::CheckSpecialCharacterClass(uc16 type,
                                                         Label* on_no_match) {
  // Range checks (c in min..max) are generally implemented by an unsigned
515
  // (c - min) <= (max - min) check, using the sequence:
516
  //   leap(rax, Operand(current_character(), -min)) or sub(rax, Immediate(min))
517
  //   cmp(rax, Immediate(max - min))
518 519 520
  switch (type) {
  case 's':
    // Match space-characters
521
    if (mode_ == LATIN1) {
522
      // One byte space characters are '\t'..'\r', ' ' and \u00a0.
523 524
      Label success;
      __ cmpl(current_character(), Immediate(' '));
525
      __ j(equal, &success, Label::kNear);
526
      // Check range 0x09..0x0d
527
      __ leap(rax, Operand(current_character(), -'\t'));
528
      __ cmpl(rax, Immediate('\r' - '\t'));
529 530 531 532
      __ j(below_equal, &success, Label::kNear);
      // \u00a0 (NBSP).
      __ cmpl(rax, Immediate(0x00a0 - '\t'));
      BranchOrBacktrack(not_equal, on_no_match);
533 534 535 536 537
      __ bind(&success);
      return true;
    }
    return false;
  case 'S':
538
    // The emitted code for generic character classes is good enough.
539 540 541
    return false;
  case 'd':
    // Match ASCII digits ('0'..'9')
542
    __ leap(rax, Operand(current_character(), -'0'));
543
    __ cmpl(rax, Immediate('9' - '0'));
544 545 546 547
    BranchOrBacktrack(above, on_no_match);
    return true;
  case 'D':
    // Match non ASCII-digits
548
    __ leap(rax, Operand(current_character(), -'0'));
549
    __ cmpl(rax, Immediate('9' - '0'));
550 551 552 553
    BranchOrBacktrack(below_equal, on_no_match);
    return true;
  case '.': {
    // Match non-newlines (not 0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
554
    __ movl(rax, current_character());
555
    __ xorp(rax, Immediate(0x01));
556
    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
557 558
    __ subl(rax, Immediate(0x0b));
    __ cmpl(rax, Immediate(0x0c - 0x0b));
559 560 561 562 563
    BranchOrBacktrack(below_equal, on_no_match);
    if (mode_ == UC16) {
      // Compare original value to 0x2028 and 0x2029, using the already
      // computed (current_char ^ 0x01 - 0x0b). I.e., check for
      // 0x201d (0x2028 - 0x0b) or 0x201e.
564 565
      __ subl(rax, Immediate(0x2028 - 0x0b));
      __ cmpl(rax, Immediate(0x2029 - 0x2028));
566 567 568 569
      BranchOrBacktrack(below_equal, on_no_match);
    }
    return true;
  }
570 571 572
  case 'n': {
    // Match newlines (0x0a('\n'), 0x0d('\r'), 0x2028 and 0x2029)
    __ movl(rax, current_character());
573
    __ xorp(rax, Immediate(0x01));
574 575 576
    // See if current character is '\n'^1 or '\r'^1, i.e., 0x0b or 0x0c
    __ subl(rax, Immediate(0x0b));
    __ cmpl(rax, Immediate(0x0c - 0x0b));
577
    if (mode_ == LATIN1) {
578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
      BranchOrBacktrack(above, on_no_match);
    } else {
      Label done;
      BranchOrBacktrack(below_equal, &done);
      // Compare original value to 0x2028 and 0x2029, using the already
      // computed (current_char ^ 0x01 - 0x0b). I.e., check for
      // 0x201d (0x2028 - 0x0b) or 0x201e.
      __ subl(rax, Immediate(0x2028 - 0x0b));
      __ cmpl(rax, Immediate(0x2029 - 0x2028));
      BranchOrBacktrack(above, on_no_match);
      __ bind(&done);
    }
    return true;
  }
  case 'w': {
593 594
    if (mode_ != LATIN1) {
      // Table is 256 entries, so all Latin1 characters can be tested.
595 596 597
      __ cmpl(current_character(), Immediate('z'));
      BranchOrBacktrack(above, on_no_match);
    }
598
    __ Move(rbx, ExternalReference::re_word_character_map());
599
    DCHECK_EQ(0, word_character_map[0]);  // Character '\0' is not a word char.
600 601 602
    __ testb(Operand(rbx, current_character(), times_1, 0),
             current_character());
    BranchOrBacktrack(zero, on_no_match);
603 604 605
    return true;
  }
  case 'W': {
606
    Label done;
607 608
    if (mode_ != LATIN1) {
      // Table is 256 entries, so all Latin1 characters can be tested.
609
      __ cmpl(current_character(), Immediate('z'));
610
      __ j(above, &done);
611
    }
612
    __ Move(rbx, ExternalReference::re_word_character_map());
613
    DCHECK_EQ(0, word_character_map[0]);  // Character '\0' is not a word char.
614 615 616
    __ testb(Operand(rbx, current_character(), times_1, 0),
             current_character());
    BranchOrBacktrack(not_zero, on_no_match);
617
    if (mode_ != LATIN1) {
618 619
      __ bind(&done);
    }
620 621
    return true;
  }
622

623 624 625
  case '*':
    // Match any character.
    return true;
626
  // No custom implementation (yet): s(UC16), S(UC16).
627 628 629 630 631 632 633
  default:
    return false;
  }
}


void RegExpMacroAssemblerX64::Fail() {
634 635 636 637
  STATIC_ASSERT(FAILURE == 0);  // Return value for failure is zero.
  if (!global()) {
    __ Set(rax, FAILURE);
  }
638 639 640 641
  __ jmp(&exit_label_);
}


642
Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
643
  Label return_rax;
644 645 646 647
  // Finalize code - write the entry point code now we know how many
  // registers we need.
  // Entry code:
  __ bind(&entry_label_);
648 649 650 651 652 653

  // Tell the system that we have a stack frame.  Because the type is MANUAL, no
  // is generated.
  FrameScope scope(&masm_, StackFrame::MANUAL);

  // Actually emit code to start a new stack frame.
654
  __ pushq(rbp);
655
  __ movp(rbp, rsp);
656 657
  // Save parameters and callee-save registers. Order here should correspond
  //  to order of kBackup_ebx etc.
658
#ifdef _WIN64
659 660 661
  // MSVC passes arguments in rcx, rdx, r8, r9, with backing stack slots.
  // Store register parameters in pre-allocated stack slots,
  __ movq(Operand(rbp, kInputString), rcx);
662
  __ movq(Operand(rbp, kStartIndex), rdx);  // Passed as int32 in edx.
663 664 665
  __ movq(Operand(rbp, kInputStart), r8);
  __ movq(Operand(rbp, kInputEnd), r9);
  // Callee-save on Win64.
666 667 668
  __ pushq(rsi);
  __ pushq(rdi);
  __ pushq(rbx);
669 670 671
#else
  // GCC passes arguments in rdi, rsi, rdx, rcx, r8, r9 (and then on stack).
  // Push register parameters on stack for reference.
672 673 674 675 676 677
  DCHECK_EQ(kInputString, -1 * kRegisterSize);
  DCHECK_EQ(kStartIndex, -2 * kRegisterSize);
  DCHECK_EQ(kInputStart, -3 * kRegisterSize);
  DCHECK_EQ(kInputEnd, -4 * kRegisterSize);
  DCHECK_EQ(kRegisterOutput, -5 * kRegisterSize);
  DCHECK_EQ(kNumOutputRegisters, -6 * kRegisterSize);
678 679 680 681 682 683 684 685
  __ pushq(rdi);
  __ pushq(rsi);
  __ pushq(rdx);
  __ pushq(rcx);
  __ pushq(r8);
  __ pushq(r9);

  __ pushq(rbx);  // Callee-save
686
#endif
687

688 689
  __ Push(Immediate(0));  // Number of successful matches in a global regexp.
  __ Push(Immediate(0));  // Make room for "input start - 1" constant.
690 691 692 693 694

  // Check if we have space on the stack for registers.
  Label stack_limit_hit;
  Label stack_ok;

695
  ExternalReference stack_limit =
696
      ExternalReference::address_of_stack_limit(isolate());
697
  __ movp(rcx, rsp);
698
  __ Move(kScratchRegister, stack_limit);
699
  __ subp(rcx, Operand(kScratchRegister, 0));
700
  // Handle it if the stack pointer is already below the stack limit.
701
  __ j(below_equal, &stack_limit_hit);
702 703
  // Check if there is room for the variable number of registers above
  // the stack limit.
704
  __ cmpp(rcx, Immediate(num_registers_ * kPointerSize));
705
  __ j(above_equal, &stack_ok);
706 707
  // Exit with OutOfMemory exception. There is not enough space on the stack
  // for our working registers.
708
  __ Set(rax, EXCEPTION);
709
  __ jmp(&return_rax);
710 711

  __ bind(&stack_limit_hit);
712
  __ Move(code_object_pointer(), masm_.CodeObject());
713
  CallCheckStackGuardState();  // Preserves no registers beside rbp and rsp.
714
  __ testp(rax, rax);
715
  // If returned value is non-zero, we exit with the returned value as result.
716
  __ j(not_zero, &return_rax);
717 718 719 720

  __ bind(&stack_ok);

  // Allocate space on stack for registers.
721
  __ subp(rsp, Immediate(num_registers_ * kPointerSize));
722
  // Load string length.
723
  __ movp(rsi, Operand(rbp, kInputEnd));
724
  // Load input position.
725
  __ movp(rdi, Operand(rbp, kInputStart));
726
  // Set up rdi to be negative offset from string end.
727
  __ subq(rdi, rsi);
728
  // Set rax to address of char before start of the string
729
  // (effectively string position -1).
730
  __ movp(rbx, Operand(rbp, kStartIndex));
731
  __ negq(rbx);
732
  if (mode_ == UC16) {
733
    __ leap(rax, Operand(rdi, rbx, times_2, -char_size()));
734
  } else {
735
    __ leap(rax, Operand(rdi, rbx, times_1, -char_size()));
736
  }
737 738
  // Store this value in a local variable, for use when clearing
  // position registers.
739
  __ movp(Operand(rbp, kInputStartMinusOne), rax);
740

741
#if V8_OS_WIN
742 743 744 745 746 747 748
  // Ensure that we have written to each stack page, in order. Skipping a page
  // on Windows can cause segmentation faults. Assuming page size is 4k.
  const int kPageSize = 4096;
  const int kRegistersPerPage = kPageSize / kPointerSize;
  for (int i = num_saved_registers_ + kRegistersPerPage - 1;
      i < num_registers_;
      i += kRegistersPerPage) {
749
    __ movp(register_location(i), rax);  // One write every page.
750
  }
751
#endif  // V8_OS_WIN
752

753 754 755 756 757
  // Initialize code object pointer.
  __ Move(code_object_pointer(), masm_.CodeObject());

  Label load_char_start_regexp, start_regexp;
  // Load newline if index is at start, previous character otherwise.
758
  __ cmpl(Operand(rbp, kStartIndex), Immediate(0));
759 760 761 762 763 764 765 766 767 768 769
  __ j(not_equal, &load_char_start_regexp, Label::kNear);
  __ Set(current_character(), '\n');
  __ jmp(&start_regexp, Label::kNear);

  // Global regexp restarts matching here.
  __ bind(&load_char_start_regexp);
  // Load previous char as initial value of current character register.
  LoadCurrentCharacterUnchecked(-1, 1);
  __ bind(&start_regexp);

  // Initialize on-stack registers.
770 771 772 773
  if (num_saved_registers_ > 0) {
    // Fill saved registers with initial value = start offset - 1
    // Fill in stack push order, to avoid accessing across an unwritten
    // page (a problem on Windows).
774 775 776 777
    if (num_saved_registers_ > 8) {
      __ Set(rcx, kRegisterZero);
      Label init_loop;
      __ bind(&init_loop);
778
      __ movp(Operand(rbp, rcx, times_1, 0), rax);
779 780 781 782 783 784
      __ subq(rcx, Immediate(kPointerSize));
      __ cmpq(rcx,
              Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
      __ j(greater, &init_loop);
    } else {  // Unroll the loop.
      for (int i = 0; i < num_saved_registers_; i++) {
785
        __ movp(register_location(i), rax);
786 787
      }
    }
788 789 790
  }

  // Initialize backtrack stack pointer.
791
  __ movp(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
792

793
  __ jmp(&start_label_);
794 795 796 797 798 799 800

  // Exit code:
  if (success_label_.is_linked()) {
    // Save captures when successful.
    __ bind(&success_label_);
    if (num_saved_registers_ > 0) {
      // copy captures to output
801 802 803
      __ movp(rdx, Operand(rbp, kStartIndex));
      __ movp(rbx, Operand(rbp, kRegisterOutput));
      __ movp(rcx, Operand(rbp, kInputEnd));
804
      __ subp(rcx, Operand(rbp, kInputStart));
805
      if (mode_ == UC16) {
806
        __ leap(rcx, Operand(rcx, rdx, times_2, 0));
807
      } else {
808
        __ addp(rcx, rdx);
809
      }
810
      for (int i = 0; i < num_saved_registers_; i++) {
811
        __ movp(rax, register_location(i));
812
        if (i == 0 && global_with_zero_length_check()) {
813
          // Keep capture start in rdx for the zero-length check later.
814
          __ movp(rdx, rax);
815
        }
816
        __ addp(rax, rcx);  // Convert to index from start, not end.
817
        if (mode_ == UC16) {
818
          __ sarp(rax, Immediate(1));  // Convert byte index to character index.
819 820 821 822
        }
        __ movl(Operand(rbx, i * kIntSize), rax);
      }
    }
823 824 825 826

    if (global()) {
      // Restart matching if the regular expression is flagged as global.
      // Increment success counter.
827
      __ incp(Operand(rbp, kSuccessfulCaptures));
828 829
      // Capture results have been stored, so the number of remaining global
      // output registers is reduced by the number of stored captures.
830
      __ movsxlq(rcx, Operand(rbp, kNumOutputRegisters));
831
      __ subp(rcx, Immediate(num_saved_registers_));
832
      // Check whether we have enough room for another set of capture results.
833
      __ cmpp(rcx, Immediate(num_saved_registers_));
834 835
      __ j(less, &exit_label_);

836
      __ movp(Operand(rbp, kNumOutputRegisters), rcx);
837
      // Advance the location for output.
838
      __ addp(Operand(rbp, kRegisterOutput),
839 840 841
              Immediate(num_saved_registers_ * kIntSize));

      // Prepare rax to initialize registers with its value in the next run.
842
      __ movp(rax, Operand(rbp, kInputStartMinusOne));
843

844 845 846
      if (global_with_zero_length_check()) {
        // Special case for zero-length matches.
        // rdx: capture start index
847
        __ cmpp(rdi, rdx);
848 849 850
        // Not a zero-length match, restart.
        __ j(not_equal, &load_char_start_regexp);
        // rdi (offset from the end) is zero if we already reached the end.
851
        __ testp(rdi, rdi);
852 853 854 855 856 857 858
        __ j(zero, &exit_label_, Label::kNear);
        // Advance current position after a zero-length match.
        if (mode_ == UC16) {
          __ addq(rdi, Immediate(2));
        } else {
          __ incq(rdi);
        }
859
      }
860

861 862
      __ jmp(&load_char_start_regexp);
    } else {
863
      __ movp(rax, Immediate(SUCCESS));
864
    }
865 866 867
  }

  __ bind(&exit_label_);
868 869
  if (global()) {
    // Return the number of successful captures.
870
    __ movp(rax, Operand(rbp, kSuccessfulCaptures));
871
  }
872

873
  __ bind(&return_rax);
874
#ifdef _WIN64
875
  // Restore callee save registers.
876
  __ leap(rsp, Operand(rbp, kLastCalleeSaveRegister));
877 878 879
  __ popq(rbx);
  __ popq(rdi);
  __ popq(rsi);
880 881 882
  // Stack now at rbp.
#else
  // Restore callee save register.
883
  __ movp(rbx, Operand(rbp, kBackup_rbx));
884
  // Skip rsp to rbp.
885
  __ movp(rsp, rbp);
886 887
#endif
  // Exit function frame, restore previous one.
888
  __ popq(rbp);
889 890 891 892 893 894 895 896 897 898 899 900 901 902
  __ ret(0);

  // Backtrack code (branch target for conditional backtracks).
  if (backtrack_label_.is_linked()) {
    __ bind(&backtrack_label_);
    Backtrack();
  }

  Label exit_with_exception;

  // Preempt-code
  if (check_preempt_label_.is_linked()) {
    SafeCallTarget(&check_preempt_label_);

903 904
    __ pushq(backtrack_stackpointer());
    __ pushq(rdi);
905 906

    CallCheckStackGuardState();
907
    __ testp(rax, rax);
908 909
    // If returning non-zero, we should end execution with the given
    // result as return value.
910
    __ j(not_zero, &return_rax);
911 912

    // Restore registers.
913
    __ Move(code_object_pointer(), masm_.CodeObject());
914 915
    __ popq(rdi);
    __ popq(backtrack_stackpointer());
916
    // String might have moved: Reload esi from frame.
917
    __ movp(rsi, Operand(rbp, kInputEnd));
918 919 920 921 922 923 924 925 926 927
    SafeReturn();
  }

  // Backtrack stack overflow code.
  if (stack_overflow_label_.is_linked()) {
    SafeCallTarget(&stack_overflow_label_);
    // Reached if the backtrack-stack limit has been hit.

    Label grow_failed;
    // Save registers before calling C function
928
#ifndef _WIN64
929
    // Callee-save in Microsoft 64-bit ABI, but not in AMD64 ABI.
930 931
    __ pushq(rsi);
    __ pushq(rdi);
932 933 934
#endif

    // Call GrowStack(backtrack_stackpointer())
935
    static const int num_arguments = 3;
936
    __ PrepareCallCFunction(num_arguments);
937
#ifdef _WIN64
938
    // Microsoft passes parameters in rcx, rdx, r8.
939
    // First argument, backtrack stackpointer, is already in rcx.
940
    __ leap(rdx, Operand(rbp, kStackHighEnd));  // Second argument
941
    __ LoadAddress(r8, ExternalReference::isolate_address(isolate()));
942
#else
943
    // AMD64 ABI passes parameters in rdi, rsi, rdx.
944
    __ movp(rdi, backtrack_stackpointer());   // First argument.
945
    __ leap(rsi, Operand(rbp, kStackHighEnd));  // Second argument.
946
    __ LoadAddress(rdx, ExternalReference::isolate_address(isolate()));
947
#endif
948
    ExternalReference grow_stack =
949
        ExternalReference::re_grow_stack(isolate());
950
    __ CallCFunction(grow_stack, num_arguments);
951 952
    // If return NULL, we have failed to grow the stack, and
    // must exit with a stack-overflow exception.
953
    __ testp(rax, rax);
954 955
    __ j(equal, &exit_with_exception);
    // Otherwise use return value as new stack pointer.
956
    __ movp(backtrack_stackpointer(), rax);
957
    // Restore saved registers and continue.
958
    __ Move(code_object_pointer(), masm_.CodeObject());
959
#ifndef _WIN64
960 961
    __ popq(rdi);
    __ popq(rsi);
962 963 964 965 966 967 968 969
#endif
    SafeReturn();
  }

  if (exit_with_exception.is_linked()) {
    // If any of the code above needed to exit with an exception.
    __ bind(&exit_with_exception);
    // Exit with Result EXCEPTION(-1) to signal thrown exception.
970
    __ Set(rax, EXCEPTION);
971
    __ jmp(&return_rax);
972 973 974 975 976
  }

  FixupCodeRelativePositions();

  CodeDesc code_desc;
977
  masm_.GetCode(&code_desc);
dcarney@chromium.org's avatar
dcarney@chromium.org committed
978
  Isolate* isolate = this->isolate();
979 980
  Handle<Code> code = isolate->factory()->NewCode(
      code_desc, Code::ComputeFlags(Code::REGEXP),
981
      masm_.CodeObject());
982
  PROFILE(isolate, RegExpCodeCreateEvent(*code, *source));
983
  return Handle<HeapObject>::cast(code);
984 985 986 987 988 989 990 991 992 993 994
}


void RegExpMacroAssemblerX64::GoTo(Label* to) {
  BranchOrBacktrack(no_condition, to);
}


void RegExpMacroAssemblerX64::IfRegisterGE(int reg,
                                           int comparand,
                                           Label* if_ge) {
995
  __ cmpp(register_location(reg), Immediate(comparand));
996 997 998 999 1000 1001 1002
  BranchOrBacktrack(greater_equal, if_ge);
}


void RegExpMacroAssemblerX64::IfRegisterLT(int reg,
                                           int comparand,
                                           Label* if_lt) {
1003
  __ cmpp(register_location(reg), Immediate(comparand));
1004 1005 1006 1007 1008 1009
  BranchOrBacktrack(less, if_lt);
}


void RegExpMacroAssemblerX64::IfRegisterEqPos(int reg,
                                              Label* if_eq) {
1010
  __ cmpp(rdi, register_location(reg));
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024
  BranchOrBacktrack(equal, if_eq);
}


RegExpMacroAssembler::IrregexpImplementation
    RegExpMacroAssemblerX64::Implementation() {
  return kX64Implementation;
}


void RegExpMacroAssemblerX64::LoadCurrentCharacter(int cp_offset,
                                                   Label* on_end_of_input,
                                                   bool check_bounds,
                                                   int characters) {
1025 1026
  DCHECK(cp_offset >= -1);      // ^ and \b can look behind one character.
  DCHECK(cp_offset < (1<<30));  // Be sane! (And ensure negation works)
lrn@chromium.org's avatar
lrn@chromium.org committed
1027 1028 1029
  if (check_bounds) {
    CheckPosition(cp_offset + characters - 1, on_end_of_input);
  }
1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040
  LoadCurrentCharacterUnchecked(cp_offset, characters);
}


void RegExpMacroAssemblerX64::PopCurrentPosition() {
  Pop(rdi);
}


void RegExpMacroAssemblerX64::PopRegister(int register_index) {
  Pop(rax);
1041
  __ movp(register_location(register_index), rax);
1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057
}


void RegExpMacroAssemblerX64::PushBacktrack(Label* label) {
  Push(label);
  CheckStackLimit();
}


void RegExpMacroAssemblerX64::PushCurrentPosition() {
  Push(rdi);
}


void RegExpMacroAssemblerX64::PushRegister(int register_index,
                                           StackCheckFlag check_stack_limit) {
1058
  __ movp(rax, register_location(register_index));
1059 1060 1061 1062 1063
  Push(rax);
  if (check_stack_limit) CheckStackLimit();
}


1064 1065 1066
STATIC_ASSERT(kPointerSize == kInt64Size || kPointerSize == kInt32Size);


1067
void RegExpMacroAssemblerX64::ReadCurrentPositionFromRegister(int reg) {
1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
  if (kPointerSize == kInt64Size) {
    __ movq(rdi, register_location(reg));
  } else {
    // Need sign extension for x32 as rdi might be used as an index register.
    __ movsxlq(rdi, register_location(reg));
  }
}


void RegExpMacroAssemblerX64::ReadPositionFromRegister(Register dst, int reg) {
  if (kPointerSize == kInt64Size) {
    __ movq(dst, register_location(reg));
  } else {
    // Need sign extension for x32 as dst might be used as an index register.
    __ movsxlq(dst, register_location(reg));
  }
1084 1085 1086 1087
}


void RegExpMacroAssemblerX64::ReadStackPointerFromRegister(int reg) {
1088
  __ movp(backtrack_stackpointer(), register_location(reg));
1089
  __ addp(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
1090 1091 1092
}


1093
void RegExpMacroAssemblerX64::SetCurrentPositionFromEnd(int by) {
1094
  Label after_position;
1095
  __ cmpp(rdi, Immediate(-by * char_size()));
1096
  __ j(greater_equal, &after_position, Label::kNear);
1097 1098 1099 1100 1101 1102 1103 1104 1105
  __ movq(rdi, Immediate(-by * char_size()));
  // On RegExp code entry (where this operation is used), the character before
  // the current position is expected to be already loaded.
  // We have advanced the position, so it's safe to read backwards.
  LoadCurrentCharacterUnchecked(-1, 1);
  __ bind(&after_position);
}


1106
void RegExpMacroAssemblerX64::SetRegister(int register_index, int to) {
1107
  DCHECK(register_index >= num_saved_registers_);  // Reserved for positions!
1108
  __ movp(register_location(register_index), Immediate(to));
1109 1110 1111
}


1112
bool RegExpMacroAssemblerX64::Succeed() {
1113
  __ jmp(&success_label_);
1114
  return global();
1115 1116 1117 1118 1119 1120
}


void RegExpMacroAssemblerX64::WriteCurrentPositionToRegister(int reg,
                                                             int cp_offset) {
  if (cp_offset == 0) {
1121
    __ movp(register_location(reg), rdi);
1122
  } else {
1123
    __ leap(rax, Operand(rdi, cp_offset * char_size()));
1124
    __ movp(register_location(reg), rax);
1125 1126 1127 1128 1129
  }
}


void RegExpMacroAssemblerX64::ClearRegisters(int reg_from, int reg_to) {
1130
  DCHECK(reg_from <= reg_to);
1131
  __ movp(rax, Operand(rbp, kInputStartMinusOne));
1132
  for (int reg = reg_from; reg <= reg_to; reg++) {
1133
    __ movp(register_location(reg), rax);
1134 1135 1136 1137 1138
  }
}


void RegExpMacroAssemblerX64::WriteStackPointerToRegister(int reg) {
1139
  __ movp(rax, backtrack_stackpointer());
1140
  __ subp(rax, Operand(rbp, kStackHighEnd));
1141
  __ movp(register_location(reg), rax);
1142 1143 1144 1145 1146 1147 1148 1149
}


// Private methods:

void RegExpMacroAssemblerX64::CallCheckStackGuardState() {
  // This function call preserves no register values. Caller should
  // store anything volatile in a C call or overwritten by this function.
1150
  static const int num_arguments = 3;
1151
  __ PrepareCallCFunction(num_arguments);
1152
#ifdef _WIN64
1153
  // Second argument: Code* of self. (Do this before overwriting r8).
1154
  __ movp(rdx, code_object_pointer());
1155
  // Third argument: RegExp code frame pointer.
1156
  __ movp(r8, rbp);
1157 1158
  // First argument: Next address on the stack (will be address of
  // return address).
1159
  __ leap(rcx, Operand(rsp, -kPointerSize));
1160 1161
#else
  // Third argument: RegExp code frame pointer.
1162
  __ movp(rdx, rbp);
1163
  // Second argument: Code* of self.
1164
  __ movp(rsi, code_object_pointer());
1165 1166
  // First argument: Next address on the stack (will be address of
  // return address).
1167
  __ leap(rdi, Operand(rsp, -kRegisterSize));
1168
#endif
lrn@chromium.org's avatar
lrn@chromium.org committed
1169
  ExternalReference stack_check =
1170
      ExternalReference::re_check_stack_guard_state(isolate());
1171
  __ CallCFunction(stack_check, num_arguments);
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184
}


// Helper function for reading a value out of a stack frame.
template <typename T>
static T& frame_entry(Address re_frame, int frame_offset) {
  return reinterpret_cast<T&>(Memory::int32_at(re_frame + frame_offset));
}


int RegExpMacroAssemblerX64::CheckStackGuardState(Address* return_address,
                                                  Code* re_code,
                                                  Address re_frame) {
1185
  Isolate* isolate = frame_entry<Isolate*>(re_frame, kIsolate);
1186 1187
  StackLimitCheck check(isolate);
  if (check.JsHasOverflowed()) {
1188
    isolate->StackOverflow();
1189 1190 1191 1192 1193 1194
    return EXCEPTION;
  }

  // If not real stack overflow the stack guard was used to interrupt
  // execution for another purpose.

1195 1196 1197 1198 1199 1200
  // If this is a direct call from JavaScript retry the RegExp forcing the call
  // through the runtime system. Currently the direct call cannot handle a GC.
  if (frame_entry<int>(re_frame, kDirectCall) == 1) {
    return RETRY;
  }

1201
  // Prepare for possible GC.
1202
  HandleScope handles(isolate);
1203 1204 1205
  Handle<Code> code_handle(re_code);

  Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
1206

1207
  // Current string.
1208
  bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
1209

1210 1211
  DCHECK(re_code->instruction_start() <= *return_address);
  DCHECK(*return_address <=
1212 1213
      re_code->instruction_start() + re_code->instruction_size());

1214
  Object* result = isolate->stack_guard()->HandleInterrupts();
1215 1216

  if (*code_handle != re_code) {  // Return address no longer valid
1217
    intptr_t delta = code_handle->address() - re_code->address();
1218 1219 1220 1221 1222 1223 1224 1225
    // Overwrite the return address on the stack.
    *return_address += delta;
  }

  if (result->IsException()) {
    return EXCEPTION;
  }

1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237
  Handle<String> subject_tmp = subject;
  int slice_offset = 0;

  // Extract the underlying string and the slice offset.
  if (StringShape(*subject_tmp).IsCons()) {
    subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
  } else if (StringShape(*subject_tmp).IsSliced()) {
    SlicedString* slice = SlicedString::cast(*subject_tmp);
    subject_tmp = Handle<String>(slice->parent());
    slice_offset = slice->offset();
  }

1238
  // String might have changed.
1239 1240
  if (subject_tmp->IsOneByteRepresentation() != is_one_byte) {
    // If we changed between an Latin1 and an UC16 string, the specialized
1241 1242 1243 1244 1245 1246 1247 1248 1249
    // code cannot be used, and we need to restart regexp matching from
    // scratch (including, potentially, compiling a new version of the code).
    return RETRY;
  }

  // Otherwise, the content of the string might have moved. It must still
  // be a sequential or external string with the same content.
  // Update the start and end pointers in the stack frame to the current
  // location (whether it has actually moved or not).
1250
  DCHECK(StringShape(*subject_tmp).IsSequential() ||
1251
      StringShape(*subject_tmp).IsExternal());
1252 1253 1254 1255 1256 1257 1258

  // The original start address of the characters to match.
  const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);

  // Find the current start address of the same character at the current string
  // position.
  int start_index = frame_entry<int>(re_frame, kStartIndex);
1259 1260
  const byte* new_address = StringCharacterPosition(*subject_tmp,
                                                    start_index + slice_offset);
1261 1262 1263 1264 1265

  if (start_address != new_address) {
    // If there is a difference, update the object pointer and start and end
    // addresses in the RegExp stack frame to match the new value.
    const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
1266
    int byte_length = static_cast<int>(end_address - start_address);
1267 1268 1269
    frame_entry<const String*>(re_frame, kInputString) = *subject;
    frame_entry<const byte*>(re_frame, kInputStart) = new_address;
    frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
1270 1271 1272 1273 1274
  } else if (frame_entry<const String*>(re_frame, kInputString) != *subject) {
    // Subject string might have been a ConsString that underwent
    // short-circuiting during GC. That will not change start_address but
    // will change pointer inside the subject handle.
    frame_entry<const String*>(re_frame, kInputString) = *subject;
1275 1276 1277 1278 1279 1280 1281
  }

  return 0;
}


Operand RegExpMacroAssemblerX64::register_location(int register_index) {
1282
  DCHECK(register_index < (1<<30));
1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321
  if (num_registers_ <= register_index) {
    num_registers_ = register_index + 1;
  }
  return Operand(rbp, kRegisterZero - register_index * kPointerSize);
}


void RegExpMacroAssemblerX64::CheckPosition(int cp_offset,
                                            Label* on_outside_input) {
  __ cmpl(rdi, Immediate(-cp_offset * char_size()));
  BranchOrBacktrack(greater_equal, on_outside_input);
}


void RegExpMacroAssemblerX64::BranchOrBacktrack(Condition condition,
                                                Label* to) {
  if (condition < 0) {  // No condition
    if (to == NULL) {
      Backtrack();
      return;
    }
    __ jmp(to);
    return;
  }
  if (to == NULL) {
    __ j(condition, &backtrack_label_);
    return;
  }
  __ j(condition, to);
}


void RegExpMacroAssemblerX64::SafeCall(Label* to) {
  __ call(to);
}


void RegExpMacroAssemblerX64::SafeCallTarget(Label* label) {
  __ bind(label);
1322
  __ subp(Operand(rsp, 0), code_object_pointer());
1323 1324 1325 1326
}


void RegExpMacroAssemblerX64::SafeReturn() {
1327
  __ addp(Operand(rsp, 0), code_object_pointer());
1328 1329 1330 1331 1332
  __ ret(0);
}


void RegExpMacroAssemblerX64::Push(Register source) {
1333
  DCHECK(!source.is(backtrack_stackpointer()));
1334
  // Notice: This updates flags, unlike normal Push.
1335
  __ subp(backtrack_stackpointer(), Immediate(kIntSize));
1336 1337 1338 1339 1340 1341
  __ movl(Operand(backtrack_stackpointer(), 0), source);
}


void RegExpMacroAssemblerX64::Push(Immediate value) {
  // Notice: This updates flags, unlike normal Push.
1342
  __ subp(backtrack_stackpointer(), Immediate(kIntSize));
1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353
  __ movl(Operand(backtrack_stackpointer(), 0), value);
}


void RegExpMacroAssemblerX64::FixupCodeRelativePositions() {
  for (int i = 0, n = code_relative_fixup_positions_.length(); i < n; i++) {
    int position = code_relative_fixup_positions_[i];
    // The position succeeds a relative label offset from position.
    // Patch the relative offset to be relative to the Code object pointer
    // instead.
    int patch_position = position - kIntSize;
1354 1355
    int offset = masm_.long_at(patch_position);
    masm_.long_at_put(patch_position,
1356 1357 1358 1359 1360 1361 1362 1363 1364 1365
                       offset
                       + position
                       + Code::kHeaderSize
                       - kHeapObjectTag);
  }
  code_relative_fixup_positions_.Clear();
}


void RegExpMacroAssemblerX64::Push(Label* backtrack_target) {
1366
  __ subp(backtrack_stackpointer(), Immediate(kIntSize));
1367 1368 1369 1370 1371 1372
  __ movl(Operand(backtrack_stackpointer(), 0), backtrack_target);
  MarkPositionForCodeRelativeFixup();
}


void RegExpMacroAssemblerX64::Pop(Register target) {
1373
  DCHECK(!target.is(backtrack_stackpointer()));
1374 1375
  __ movsxlq(target, Operand(backtrack_stackpointer(), 0));
  // Notice: This updates flags, unlike normal Pop.
1376
  __ addp(backtrack_stackpointer(), Immediate(kIntSize));
1377 1378 1379 1380
}


void RegExpMacroAssemblerX64::Drop() {
1381
  __ addp(backtrack_stackpointer(), Immediate(kIntSize));
1382 1383 1384 1385 1386 1387
}


void RegExpMacroAssemblerX64::CheckPreemption() {
  // Check for preemption.
  Label no_preempt;
1388
  ExternalReference stack_limit =
1389
      ExternalReference::address_of_stack_limit(isolate());
1390
  __ load_rax(stack_limit);
1391
  __ cmpp(rsp, rax);
1392
  __ j(above, &no_preempt);
1393 1394 1395 1396 1397 1398 1399 1400

  SafeCall(&check_preempt_label_);

  __ bind(&no_preempt);
}


void RegExpMacroAssemblerX64::CheckStackLimit() {
1401 1402
  Label no_stack_overflow;
  ExternalReference stack_limit =
1403
      ExternalReference::address_of_regexp_stack_limit(isolate());
1404
  __ load_rax(stack_limit);
1405
  __ cmpp(backtrack_stackpointer(), rax);
1406
  __ j(above, &no_stack_overflow);
1407

1408
  SafeCall(&stack_overflow_label_);
1409

1410
  __ bind(&no_stack_overflow);
1411 1412 1413 1414 1415
}


void RegExpMacroAssemblerX64::LoadCurrentCharacterUnchecked(int cp_offset,
                                                            int characters) {
1416
  if (mode_ == LATIN1) {
1417 1418 1419 1420 1421
    if (characters == 4) {
      __ movl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
    } else if (characters == 2) {
      __ movzxwl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
    } else {
1422
      DCHECK(characters == 1);
1423 1424 1425
      __ movzxbl(current_character(), Operand(rsi, rdi, times_1, cp_offset));
    }
  } else {
1426
    DCHECK(mode_ == UC16);
1427 1428 1429 1430
    if (characters == 2) {
      __ movl(current_character(),
              Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
    } else {
1431
      DCHECK(characters == 1);
1432 1433 1434 1435 1436 1437 1438
      __ movzxwl(current_character(),
                 Operand(rsi, rdi, times_1, cp_offset * sizeof(uc16)));
    }
  }
}

#undef __
lrn@chromium.org's avatar
lrn@chromium.org committed
1439

1440
#endif  // V8_INTERPRETED_REGEXP
lrn@chromium.org's avatar
lrn@chromium.org committed
1441

1442
}}  // namespace v8::internal
1443 1444

#endif  // V8_TARGET_ARCH_X64