macro-assembler-arm64.cc 125 KB
Newer Older
1
// Copyright 2013 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5
#if V8_TARGET_ARCH_ARM64
6

7
#include "src/base/bits.h"
8
#include "src/base/division-by-constant.h"
9
#include "src/codegen/assembler.h"
10
#include "src/codegen/callable.h"
11
#include "src/codegen/code-factory.h"
12
#include "src/codegen/external-reference-table.h"
13
#include "src/codegen/interface-descriptors-inl.h"
14 15
#include "src/codegen/macro-assembler-inl.h"
#include "src/codegen/register-configuration.h"
16
#include "src/codegen/reloc-info.h"
17
#include "src/debug/debug.h"
18
#include "src/deoptimizer/deoptimizer.h"
19 20
#include "src/execution/frame-constants.h"
#include "src/execution/frames-inl.h"
21
#include "src/heap/memory-chunk.h"
22
#include "src/init/bootstrapper.h"
23
#include "src/logging/counters.h"
24
#include "src/runtime/runtime.h"
25
#include "src/snapshot/snapshot.h"
26 27

#if V8_ENABLE_WEBASSEMBLY
28
#include "src/wasm/wasm-code-manager.h"
29
#endif  // V8_ENABLE_WEBASSEMBLY
30

31 32 33
// Satisfy cpplint check, but don't include platform-specific header. It is
// included recursively via macro-assembler.h.
#if 0
34
#include "src/base/platform/wrappers.h"
35
#include "src/codegen/arm64/macro-assembler-arm64.h"
36
#endif
37

38 39 40
namespace v8 {
namespace internal {

41
CPURegList TurboAssembler::DefaultTmpList() { return CPURegList(ip0, ip1); }
42

43
CPURegList TurboAssembler::DefaultFPTmpList() {
44 45 46
  return CPURegList(fp_scratch1, fp_scratch2);
}

47 48 49 50 51 52 53 54 55 56 57 58
namespace {

// For WebAssembly we care about the full floating point register. If we are not
// running Wasm, we can get away with saving half of those registers.
#if V8_ENABLE_WEBASSEMBLY
constexpr int kStackSavedSavedFPSizeInBits = kQRegSizeInBits;
#else
constexpr int kStackSavedSavedFPSizeInBits = kDRegSizeInBits;
#endif  // V8_ENABLE_WEBASSEMBLY

}  // namespace

59
int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
60
                                                    Register exclusion) const {
61
  auto list = kCallerSaved;
62 63
  list.Remove(exclusion);
  list.Align();
64

65
  int bytes = list.TotalSizeInBytes();
66

67
  if (fp_mode == SaveFPRegsMode::kSave) {
68 69 70
    auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
    DCHECK_EQ(fp_list.Count() % 2, 0);
    bytes += fp_list.TotalSizeInBytes();
71 72 73 74
  }
  return bytes;
}

75 76
int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode,
                                    Register exclusion) {
77
  ASM_CODE_COMMENT(this);
78
  auto list = kCallerSaved;
79 80
  list.Remove(exclusion);
  list.Align();
81

82
  PushCPURegList<kDontStoreLR>(list);
83

84
  int bytes = list.TotalSizeInBytes();
85

86
  if (fp_mode == SaveFPRegsMode::kSave) {
87 88 89 90
    auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
    DCHECK_EQ(fp_list.Count() % 2, 0);
    PushCPURegList(fp_list);
    bytes += fp_list.TotalSizeInBytes();
91
  }
92
  return bytes;
93 94
}

95
int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion) {
96
  ASM_CODE_COMMENT(this);
97
  int bytes = 0;
98
  if (fp_mode == SaveFPRegsMode::kSave) {
99 100 101 102
    auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
    DCHECK_EQ(fp_list.Count() % 2, 0);
    PopCPURegList(fp_list);
    bytes += fp_list.TotalSizeInBytes();
103 104 105
  }

  auto list = kCallerSaved;
106 107
  list.Remove(exclusion);
  list.Align();
108

109
  PopCPURegList<kDontLoadLR>(list);
110
  bytes += list.TotalSizeInBytes();
111 112

  return bytes;
113 114
}

115 116
void TurboAssembler::LogicalMacro(const Register& rd, const Register& rn,
                                  const Operand& operand, LogicalOp op) {
117
  ASM_CODE_COMMENT(this);
118 119
  UseScratchRegisterScope temps(this);

120
  if (operand.NeedsRelocation(this)) {
121
    Register temp = temps.AcquireX();
122
    Ldr(temp, operand.immediate());
123
    Logical(rd, rn, temp, op);
124 125

  } else if (operand.IsImmediate()) {
126
    int64_t immediate = operand.ImmediateValue();
127 128 129 130 131 132 133 134
    unsigned reg_size = rd.SizeInBits();

    // If the operation is NOT, invert the operation and immediate.
    if ((op & NOT) == NOT) {
      op = static_cast<LogicalOp>(op & ~NOT);
      immediate = ~immediate;
    }

135 136 137
    // Ignore the top 32 bits of an immediate if we're moving to a W register.
    if (rd.Is32Bits()) {
      // Check that the top 32 bits are consistent.
138
      DCHECK(((immediate >> kWRegSizeInBits) == 0) ||
139 140 141 142
             ((immediate >> kWRegSizeInBits) == -1));
      immediate &= kWRegMask;
    }

143
    DCHECK(rd.Is64Bits() || is_uint32(immediate));
144

145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
    // Special cases for all set or all clear immediates.
    if (immediate == 0) {
      switch (op) {
        case AND:
          Mov(rd, 0);
          return;
        case ORR:  // Fall through.
        case EOR:
          Mov(rd, rn);
          return;
        case ANDS:  // Fall through.
        case BICS:
          break;
        default:
          UNREACHABLE();
      }
    } else if ((rd.Is64Bits() && (immediate == -1L)) ||
162
               (rd.Is32Bits() && (immediate == 0xFFFFFFFFL))) {
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
      switch (op) {
        case AND:
          Mov(rd, rn);
          return;
        case ORR:
          Mov(rd, immediate);
          return;
        case EOR:
          Mvn(rd, rn);
          return;
        case ANDS:  // Fall through.
        case BICS:
          break;
        default:
          UNREACHABLE();
      }
    }

    unsigned n, imm_s, imm_r;
    if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
      // Immediate can be encoded in the instruction.
      LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
    } else {
      // Immediate can't be encoded: synthesize using move immediate.
187
      Register temp = temps.AcquireSameSizeAs(rn);
188 189 190

      // If the left-hand input is the stack pointer, we can't pre-shift the
      // immediate, as the encoding won't allow the subsequent post shift.
191
      PreShiftImmMode mode = rn == sp ? kNoShift : kAnyShift;
192 193
      Operand imm_operand = MoveImmediateForShiftedOp(temp, immediate, mode);

194
      if (rd.IsSP()) {
195 196
        // If rd is the stack pointer we cannot use it as the destination
        // register so we use the temp register as an intermediate again.
197
        Logical(temp, rn, imm_operand, op);
198
        Mov(sp, temp);
199
      } else {
200
        Logical(rd, rn, imm_operand, op);
201 202 203 204
      }
    }

  } else if (operand.IsExtendedRegister()) {
205
    DCHECK(operand.reg().SizeInBits() <= rd.SizeInBits());
206 207
    // Add/sub extended supports shift <= 4. We want to support exactly the
    // same modes here.
208
    DCHECK_LE(operand.shift_amount(), 4);
209
    DCHECK(operand.reg().Is64Bits() ||
210
           ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
211
    Register temp = temps.AcquireSameSizeAs(rn);
212 213 214 215 216 217
    EmitExtendShift(temp, operand.reg(), operand.extend(),
                    operand.shift_amount());
    Logical(rd, rn, temp, op);

  } else {
    // The operand can be encoded in the instruction.
218
    DCHECK(operand.IsShiftedRegister());
219 220 221 222
    Logical(rd, rn, operand, op);
  }
}

223 224
void TurboAssembler::Mov(const Register& rd, uint64_t imm) {
  DCHECK(allow_macro_instructions());
225 226
  DCHECK(is_uint32(imm) || is_int32(imm) || rd.Is64Bits());
  DCHECK(!rd.IsZero());
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244

  // TODO(all) extend to support more immediates.
  //
  // Immediates on Aarch64 can be produced using an initial value, and zero to
  // three move keep operations.
  //
  // Initial values can be generated with:
  //  1. 64-bit move zero (movz).
  //  2. 32-bit move inverted (movn).
  //  3. 64-bit move inverted.
  //  4. 32-bit orr immediate.
  //  5. 64-bit orr immediate.
  // Move-keep may then be used to modify each of the 16-bit half-words.
  //
  // The code below supports all five initial value generators, and
  // applying move-keep operations to move-zero and move-inverted initial
  // values.

245 246 247 248 249
  // Try to move the immediate in one instruction, and if that fails, switch to
  // using multiple instructions.
  if (!TryOneInstrMoveImmediate(rd, imm)) {
    unsigned reg_size = rd.SizeInBits();

250 251 252
    // Generic immediate case. Imm will be represented by
    //   [imm3, imm2, imm1, imm0], where each imm is 16 bits.
    // A move-zero or move-inverted is generated for the first non-zero or
253
    // non-0xFFFF immX, and a move-keep for subsequent non-zero immX.
254 255 256

    uint64_t ignored_halfword = 0;
    bool invert_move = false;
257
    // If the number of 0xFFFF halfwords is greater than the number of 0x0000
258
    // halfwords, it's more efficient to use move-inverted.
259
    if (CountSetHalfWords(imm, reg_size) > CountSetHalfWords(~imm, reg_size)) {
260
      ignored_halfword = 0xFFFFL;
261 262 263
      invert_move = true;
    }

264 265 266 267
    // Mov instructions can't move immediate values into the stack pointer, so
    // set up a temporary register, if needed.
    UseScratchRegisterScope temps(this);
    Register temp = rd.IsSP() ? temps.AcquireSameSizeAs(rd) : rd;
268 269 270

    // Iterate through the halfwords. Use movn/movz for the first non-ignored
    // halfword, and movk for subsequent halfwords.
271
    DCHECK_EQ(reg_size % 16, 0);
272
    bool first_mov_done = false;
273
    for (int i = 0; i < (rd.SizeInBits() / 16); i++) {
274
      uint64_t imm16 = (imm >> (16 * i)) & 0xFFFFL;
275 276 277
      if (imm16 != ignored_halfword) {
        if (!first_mov_done) {
          if (invert_move) {
278
            movn(temp, (~imm16) & 0xFFFFL, 16 * i);
279 280 281 282 283 284 285 286 287 288
          } else {
            movz(temp, imm16, 16 * i);
          }
          first_mov_done = true;
        } else {
          // Construct a wider constant.
          movk(temp, imm16, 16 * i);
        }
      }
    }
289
    DCHECK(first_mov_done);
290 291 292 293 294 295 296 297 298

    // Move the temporary if the original destination register was the stack
    // pointer.
    if (rd.IsSP()) {
      mov(rd, temp);
    }
  }
}

299
void TurboAssembler::Mov(const Register& rd, const Operand& operand,
300
                         DiscardMoveMode discard_mode) {
301
  DCHECK(allow_macro_instructions());
302
  DCHECK(!rd.IsZero());
303

304 305
  // Provide a swap register for instructions that need to write into the
  // system stack pointer (and can't do this inherently).
306 307
  UseScratchRegisterScope temps(this);
  Register dst = (rd.IsSP()) ? temps.AcquireSameSizeAs(rd) : rd;
308

309
  if (operand.NeedsRelocation(this)) {
310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
    // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
    // non-isolate-independent code. In many cases it might be cheaper than
    // embedding the relocatable value.
    if (root_array_available_ && options().isolate_independent_code) {
      if (operand.ImmediateRMode() == RelocInfo::EXTERNAL_REFERENCE) {
        Address addr = static_cast<Address>(operand.ImmediateValue());
        ExternalReference reference = bit_cast<ExternalReference>(addr);
        IndirectLoadExternalReference(rd, reference);
        return;
      } else if (RelocInfo::IsEmbeddedObjectMode(operand.ImmediateRMode())) {
        Handle<HeapObject> x(
            reinterpret_cast<Address*>(operand.ImmediateValue()));
        // TODO(v8:9706): Fix-it! This load will always uncompress the value
        // even when we are loading a compressed embedded object.
        IndirectLoadConstant(rd.X(), x);
        return;
326 327
      }
    }
328
    Ldr(dst, operand);
329 330
  } else if (operand.IsImmediate()) {
    // Call the macro assembler for generic immediates.
331
    Mov(dst, operand.ImmediateValue());
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350
  } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
    // Emit a shift instruction if moving a shifted register. This operation
    // could also be achieved using an orr instruction (like orn used by Mvn),
    // but using a shift instruction makes the disassembly clearer.
    EmitShift(dst, operand.reg(), operand.shift(), operand.shift_amount());
  } else if (operand.IsExtendedRegister()) {
    // Emit an extend instruction if moving an extended register. This handles
    // extend with post-shift operations, too.
    EmitExtendShift(dst, operand.reg(), operand.extend(),
                    operand.shift_amount());
  } else {
    // Otherwise, emit a register move only if the registers are distinct, or
    // if they are not X registers.
    //
    // Note that mov(w0, w0) is not a no-op because it clears the top word of
    // x0. A flag is provided (kDiscardForSameWReg) if a move between the same W
    // registers is not required to clear the top word of the X register. In
    // this case, the instruction is discarded.
    //
351
    // If sp is an operand, add #0 is emitted, otherwise, orr #0.
352
    if (rd != operand.reg() ||
353
        (rd.Is32Bits() && (discard_mode == kDontDiscardForSameWReg))) {
354 355 356 357 358 359 360
      Assembler::mov(rd, operand.reg());
    }
    // This case can handle writes into the system stack pointer directly.
    dst = rd;
  }

  // Copy the result to the system stack pointer.
361
  if (dst != rd) {
362
    DCHECK(rd.IsSP());
363 364 365 366
    Assembler::mov(rd, dst);
  }
}

367 368 369 370
void TurboAssembler::Mov(const Register& rd, Smi smi) {
  return Mov(rd, Operand(smi));
}

371
void TurboAssembler::Movi16bitHelper(const VRegister& vd, uint64_t imm) {
372
  DCHECK(is_uint16(imm));
373 374
  int byte1 = (imm & 0xFF);
  int byte2 = ((imm >> 8) & 0xFF);
375 376 377 378 379 380
  if (byte1 == byte2) {
    movi(vd.Is64Bits() ? vd.V8B() : vd.V16B(), byte1);
  } else if (byte1 == 0) {
    movi(vd, byte2, LSL, 8);
  } else if (byte2 == 0) {
    movi(vd, byte1);
381 382 383 384
  } else if (byte1 == 0xFF) {
    mvni(vd, ~byte2 & 0xFF, LSL, 8);
  } else if (byte2 == 0xFF) {
    mvni(vd, ~byte1 & 0xFF);
385 386 387 388 389 390 391 392
  } else {
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireW();
    movz(temp, imm);
    dup(vd, temp);
  }
}

393
void TurboAssembler::Movi32bitHelper(const VRegister& vd, uint64_t imm) {
394 395 396
  DCHECK(is_uint32(imm));

  uint8_t bytes[sizeof(imm)];
397
  memcpy(bytes, &imm, sizeof(imm));
398

399
  // All bytes are either 0x00 or 0xFF.
400 401 402
  {
    bool all0orff = true;
    for (int i = 0; i < 4; ++i) {
403
      if ((bytes[i] != 0) && (bytes[i] != 0xFF)) {
404 405 406 407 408 409 410 411 412 413 414 415 416
        all0orff = false;
        break;
      }
    }

    if (all0orff == true) {
      movi(vd.Is64Bits() ? vd.V1D() : vd.V2D(), ((imm << 32) | imm));
      return;
    }
  }

  // Of the 4 bytes, only one byte is non-zero.
  for (int i = 0; i < 4; i++) {
417
    if ((imm & (0xFF << (i * 8))) == imm) {
418 419 420 421 422
      movi(vd, bytes[i], LSL, i * 8);
      return;
    }
  }

423
  // Of the 4 bytes, only one byte is not 0xFF.
424
  for (int i = 0; i < 4; i++) {
425
    uint32_t mask = ~(0xFF << (i * 8));
426
    if ((imm & mask) == mask) {
427
      mvni(vd, ~bytes[i] & 0xFF, LSL, i * 8);
428 429 430 431 432
      return;
    }
  }

  // Immediate is of the form 0x00MMFFFF.
433
  if ((imm & 0xFF00FFFF) == 0x0000FFFF) {
434 435 436 437 438
    movi(vd, bytes[2], MSL, 16);
    return;
  }

  // Immediate is of the form 0x0000MMFF.
439
  if ((imm & 0xFFFF00FF) == 0x000000FF) {
440 441 442 443 444
    movi(vd, bytes[1], MSL, 8);
    return;
  }

  // Immediate is of the form 0xFFMM0000.
445 446
  if ((imm & 0xFF00FFFF) == 0xFF000000) {
    mvni(vd, ~bytes[2] & 0xFF, MSL, 16);
447 448 449
    return;
  }
  // Immediate is of the form 0xFFFFMM00.
450 451
  if ((imm & 0xFFFF00FF) == 0xFFFF0000) {
    mvni(vd, ~bytes[1] & 0xFF, MSL, 8);
452 453 454 455
    return;
  }

  // Top and bottom 16-bits are equal.
456 457
  if (((imm >> 16) & 0xFFFF) == (imm & 0xFFFF)) {
    Movi16bitHelper(vd.Is64Bits() ? vd.V4H() : vd.V8H(), imm & 0xFFFF);
458 459 460 461 462 463 464 465 466 467 468 469
    return;
  }

  // Default case.
  {
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireW();
    Mov(temp, imm);
    dup(vd, temp);
  }
}

470
void TurboAssembler::Movi64bitHelper(const VRegister& vd, uint64_t imm) {
471
  // All bytes are either 0x00 or 0xFF.
472 473 474
  {
    bool all0orff = true;
    for (int i = 0; i < 8; ++i) {
475 476
      int byteval = (imm >> (i * 8)) & 0xFF;
      if (byteval != 0 && byteval != 0xFF) {
477 478 479 480 481 482 483 484 485 486 487
        all0orff = false;
        break;
      }
    }
    if (all0orff == true) {
      movi(vd, imm);
      return;
    }
  }

  // Top and bottom 32-bits are equal.
488 489
  if (((imm >> 32) & 0xFFFFFFFF) == (imm & 0xFFFFFFFF)) {
    Movi32bitHelper(vd.Is64Bits() ? vd.V2S() : vd.V4S(), imm & 0xFFFFFFFF);
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505
    return;
  }

  // Default case.
  {
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireX();
    Mov(temp, imm);
    if (vd.Is1D()) {
      mov(vd.D(), 0, temp);
    } else {
      dup(vd.V2D(), temp);
    }
  }
}

506
void TurboAssembler::Movi(const VRegister& vd, uint64_t imm, Shift shift,
507
                          int shift_amount) {
508
  DCHECK(allow_macro_instructions());
509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
  if (shift_amount != 0 || shift != LSL) {
    movi(vd, imm, shift, shift_amount);
  } else if (vd.Is8B() || vd.Is16B()) {
    // 8-bit immediate.
    DCHECK(is_uint8(imm));
    movi(vd, imm);
  } else if (vd.Is4H() || vd.Is8H()) {
    // 16-bit immediate.
    Movi16bitHelper(vd, imm);
  } else if (vd.Is2S() || vd.Is4S()) {
    // 32-bit immediate.
    Movi32bitHelper(vd, imm);
  } else {
    // 64-bit immediate.
    Movi64bitHelper(vd, imm);
  }
}

527
void TurboAssembler::Movi(const VRegister& vd, uint64_t hi, uint64_t lo) {
528
  // TODO(v8:11033): Move 128-bit values in a more efficient way.
529 530
  DCHECK(vd.Is128Bits());
  Movi(vd.V2D(), lo);
531 532 533 534 535 536
  if (lo != hi) {
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireX();
    Mov(temp, hi);
    Ins(vd.V2D(), 1, temp);
  }
537
}
538

539 540
void TurboAssembler::Mvn(const Register& rd, const Operand& operand) {
  DCHECK(allow_macro_instructions());
541

542
  if (operand.NeedsRelocation(this)) {
543
    Ldr(rd, operand.immediate());
544
    mvn(rd, rd);
545 546 547

  } else if (operand.IsImmediate()) {
    // Call the macro assembler for generic immediates.
548
    Mov(rd, ~operand.ImmediateValue());
549 550 551 552

  } else if (operand.IsExtendedRegister()) {
    // Emit two instructions for the extend case. This differs from Mov, as
    // the extend and invert can't be achieved in one instruction.
553
    EmitExtendShift(rd, operand.reg(), operand.extend(),
554
                    operand.shift_amount());
555
    mvn(rd, rd);
556 557 558 559 560 561

  } else {
    mvn(rd, operand);
  }
}

562 563 564 565 566 567 568 569 570 571 572
unsigned TurboAssembler::CountSetHalfWords(uint64_t imm, unsigned reg_size) {
  DCHECK_EQ(reg_size % 16, 0);

#define HALFWORD(idx) (((imm >> ((idx)*16)) & 0xFFFF) ? 1u : 0u)
  switch (reg_size / 16) {
    case 1:
      return HALFWORD(0);
    case 2:
      return HALFWORD(0) + HALFWORD(1);
    case 4:
      return HALFWORD(0) + HALFWORD(1) + HALFWORD(2) + HALFWORD(3);
573
  }
574 575
#undef HALFWORD
  UNREACHABLE();
576 577 578 579
}

// The movz instruction can generate immediates containing an arbitrary 16-bit
// half-word, with remaining bits clear, eg. 0x00001234, 0x0000123400000000.
580
bool TurboAssembler::IsImmMovz(uint64_t imm, unsigned reg_size) {
581
  DCHECK((reg_size == kXRegSizeInBits) || (reg_size == kWRegSizeInBits));
582
  return CountSetHalfWords(imm, reg_size) <= 1;
583 584 585
}

// The movn instruction can generate immediates containing an arbitrary 16-bit
586
// half-word, with remaining bits set, eg. 0xFFFF1234, 0xFFFF1234FFFFFFFF.
587
bool TurboAssembler::IsImmMovn(uint64_t imm, unsigned reg_size) {
588 589 590
  return IsImmMovz(~imm, reg_size);
}

591
void TurboAssembler::ConditionalCompareMacro(const Register& rn,
592
                                             const Operand& operand,
593
                                             StatusFlags nzcv, Condition cond,
594
                                             ConditionalCompareOp op) {
595
  DCHECK((cond != al) && (cond != nv));
596
  if (operand.NeedsRelocation(this)) {
597 598
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireX();
599
    Ldr(temp, operand.immediate());
600
    ConditionalCompareMacro(rn, temp, nzcv, cond, op);
601 602

  } else if ((operand.IsShiftedRegister() && (operand.shift_amount() == 0)) ||
603 604
             (operand.IsImmediate() &&
              IsImmConditionalCompare(operand.ImmediateValue()))) {
605 606 607 608 609 610 611
    // The immediate can be encoded in the instruction, or the operand is an
    // unshifted register: call the assembler.
    ConditionalCompare(rn, operand, nzcv, cond, op);

  } else {
    // The operand isn't directly supported by the instruction: perform the
    // operation on a temporary register.
612 613
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireSameSizeAs(rn);
614 615 616 617 618
    Mov(temp, operand);
    ConditionalCompare(rn, temp, nzcv, cond, op);
  }
}

619 620
void TurboAssembler::Csel(const Register& rd, const Register& rn,
                          const Operand& operand, Condition cond) {
621
  DCHECK(allow_macro_instructions());
622 623
  DCHECK(!rd.IsZero());
  DCHECK((cond != al) && (cond != nv));
624 625 626
  if (operand.IsImmediate()) {
    // Immediate argument. Handle special cases of 0, 1 and -1 using zero
    // register.
627
    int64_t imm = operand.ImmediateValue();
628 629 630 631 632 633 634 635
    Register zr = AppropriateZeroRegFor(rn);
    if (imm == 0) {
      csel(rd, rn, zr, cond);
    } else if (imm == 1) {
      csinc(rd, rn, zr, cond);
    } else if (imm == -1) {
      csinv(rd, rn, zr, cond);
    } else {
636 637
      UseScratchRegisterScope temps(this);
      Register temp = temps.AcquireSameSizeAs(rn);
638
      Mov(temp, imm);
639 640 641 642 643 644 645
      csel(rd, rn, temp, cond);
    }
  } else if (operand.IsShiftedRegister() && (operand.shift_amount() == 0)) {
    // Unshifted register argument.
    csel(rd, rn, operand.reg(), cond);
  } else {
    // All other arguments.
646 647
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireSameSizeAs(rn);
648 649 650 651 652
    Mov(temp, operand);
    csel(rd, rn, temp, cond);
  }
}

653
bool TurboAssembler::TryOneInstrMoveImmediate(const Register& dst,
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674
                                              int64_t imm) {
  unsigned n, imm_s, imm_r;
  int reg_size = dst.SizeInBits();
  if (IsImmMovz(imm, reg_size) && !dst.IsSP()) {
    // Immediate can be represented in a move zero instruction. Movz can't write
    // to the stack pointer.
    movz(dst, imm);
    return true;
  } else if (IsImmMovn(imm, reg_size) && !dst.IsSP()) {
    // Immediate can be represented in a move not instruction. Movn can't write
    // to the stack pointer.
    movn(dst, dst.Is64Bits() ? ~imm : (~imm & kWRegMask));
    return true;
  } else if (IsImmLogical(imm, reg_size, &n, &imm_s, &imm_r)) {
    // Immediate can be represented in a logical orr instruction.
    LogicalImmediate(dst, AppropriateZeroRegFor(dst), n, imm_s, imm_r, ORR);
    return true;
  }
  return false;
}

675
Operand TurboAssembler::MoveImmediateForShiftedOp(const Register& dst,
676 677
                                                  int64_t imm,
                                                  PreShiftImmMode mode) {
678 679 680 681 682 683
  int reg_size = dst.SizeInBits();
  // Encode the immediate in a single move instruction, if possible.
  if (TryOneInstrMoveImmediate(dst, imm)) {
    // The move was successful; nothing to do here.
  } else {
    // Pre-shift the immediate to the least-significant bits of the register.
684 685 686 687 688 689 690 691
    int shift_low;
    if (reg_size == 64) {
      shift_low = base::bits::CountTrailingZeros(imm);
    } else {
      DCHECK_EQ(reg_size, 32);
      shift_low = base::bits::CountTrailingZeros(static_cast<uint32_t>(imm));
    }

692 693 694 695 696 697 698
    if (mode == kLimitShiftForSP) {
      // When applied to the stack pointer, the subsequent arithmetic operation
      // can use the extend form to shift left by a maximum of four bits. Right
      // shifts are not allowed, so we filter them out later before the new
      // immediate is tested.
      shift_low = std::min(shift_low, 4);
    }
699 700 701 702 703 704 705 706
    int64_t imm_low = imm >> shift_low;

    // Pre-shift the immediate to the most-significant bits of the register. We
    // insert set bits in the least-significant bits, as this creates a
    // different immediate that may be encodable using movn or orr-immediate.
    // If this new immediate is encodable, the set bits will be eliminated by
    // the post shift on the following instruction.
    int shift_high = CountLeadingZeros(imm, reg_size);
707
    int64_t imm_high = (imm << shift_high) | ((INT64_C(1) << shift_high) - 1);
708

709
    if ((mode != kNoShift) && TryOneInstrMoveImmediate(dst, imm_low)) {
710 711 712
      // The new immediate has been moved into the destination's low bits:
      // return a new leftward-shifting operand.
      return Operand(dst, LSL, shift_low);
713
    } else if ((mode == kAnyShift) && TryOneInstrMoveImmediate(dst, imm_high)) {
714 715 716 717 718 719 720 721 722 723 724
      // The new immediate has been moved into the destination's high bits:
      // return a new rightward-shifting operand.
      return Operand(dst, LSR, shift_high);
    } else {
      // Use the generic move operation to set up the immediate.
      Mov(dst, imm);
    }
  }
  return Operand(dst);
}

725 726
void TurboAssembler::AddSubMacro(const Register& rd, const Register& rn,
                                 const Operand& operand, FlagsUpdate S,
727
                                 AddSubOp op) {
728
  if (operand.IsZero() && rd == rn && rd.Is64Bits() && rn.Is64Bits() &&
729
      !operand.NeedsRelocation(this) && (S == LeaveFlags)) {
730 731 732 733
    // The instruction would be a nop. Avoid generating useless code.
    return;
  }

734
  if (operand.NeedsRelocation(this)) {
735 736
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireX();
737
    Ldr(temp, operand.immediate());
738
    AddSubMacro(rd, rn, temp, S, op);
739
  } else if ((operand.IsImmediate() &&
740
              !IsImmAddSub(operand.ImmediateValue())) ||
741
             (rn.IsZero() && !operand.IsShiftedRegister()) ||
742
             (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
743 744
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireSameSizeAs(rn);
745
    if (operand.IsImmediate()) {
746 747 748 749 750
      PreShiftImmMode mode = kAnyShift;

      // If the destination or source register is the stack pointer, we can
      // only pre-shift the immediate right by values supported in the add/sub
      // extend encoding.
751
      if (rd == sp) {
752 753 754
        // If the destination is SP and flags will be set, we can't pre-shift
        // the immediate at all.
        mode = (S == SetFlags) ? kNoShift : kLimitShiftForSP;
755
      } else if (rn == sp) {
756 757 758
        mode = kLimitShiftForSP;
      }

759
      Operand imm_operand =
760
          MoveImmediateForShiftedOp(temp, operand.ImmediateValue(), mode);
761 762 763 764 765
      AddSub(rd, rn, imm_operand, S, op);
    } else {
      Mov(temp, operand);
      AddSub(rd, rn, temp, S, op);
    }
766 767 768 769 770
  } else {
    AddSub(rd, rn, operand, S, op);
  }
}

771
void TurboAssembler::AddSubWithCarryMacro(const Register& rd,
772
                                          const Register& rn,
773
                                          const Operand& operand, FlagsUpdate S,
774
                                          AddSubWithCarryOp op) {
775
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
776
  UseScratchRegisterScope temps(this);
777

778
  if (operand.NeedsRelocation(this)) {
779
    Register temp = temps.AcquireX();
780
    Ldr(temp, operand.immediate());
781
    AddSubWithCarryMacro(rd, rn, temp, S, op);
782 783 784 785

  } else if (operand.IsImmediate() ||
             (operand.IsShiftedRegister() && (operand.shift() == ROR))) {
    // Add/sub with carry (immediate or ROR shifted register.)
786
    Register temp = temps.AcquireSameSizeAs(rn);
787 788
    Mov(temp, operand);
    AddSubWithCarry(rd, rn, temp, S, op);
789

790 791
  } else if (operand.IsShiftedRegister() && (operand.shift_amount() != 0)) {
    // Add/sub with carry (shifted register).
792 793
    DCHECK(operand.reg().SizeInBits() == rd.SizeInBits());
    DCHECK(operand.shift() != ROR);
794 795 796
    DCHECK(is_uintn(operand.shift_amount(), rd.SizeInBits() == kXRegSizeInBits
                                                ? kXRegSizeInBitsLog2
                                                : kWRegSizeInBitsLog2));
797
    Register temp = temps.AcquireSameSizeAs(rn);
798 799 800 801 802
    EmitShift(temp, operand.reg(), operand.shift(), operand.shift_amount());
    AddSubWithCarry(rd, rn, temp, S, op);

  } else if (operand.IsExtendedRegister()) {
    // Add/sub with carry (extended register).
803
    DCHECK(operand.reg().SizeInBits() <= rd.SizeInBits());
804 805
    // Add/sub extended supports a shift <= 4. We want to support exactly the
    // same modes.
806
    DCHECK_LE(operand.shift_amount(), 4);
807
    DCHECK(operand.reg().Is64Bits() ||
808
           ((operand.extend() != UXTX) && (operand.extend() != SXTX)));
809
    Register temp = temps.AcquireSameSizeAs(rn);
810 811 812 813 814 815 816 817 818 819
    EmitExtendShift(temp, operand.reg(), operand.extend(),
                    operand.shift_amount());
    AddSubWithCarry(rd, rn, temp, S, op);

  } else {
    // The addressing mode is directly supported by the instruction.
    AddSubWithCarry(rd, rn, operand, S, op);
  }
}

820 821
void TurboAssembler::LoadStoreMacro(const CPURegister& rt,
                                    const MemOperand& addr, LoadStoreOp op) {
822
  int64_t offset = addr.offset();
823
  unsigned size = CalcLSDataSize(op);
824 825 826 827 828 829 830 831

  // Check if an immediate offset fits in the immediate field of the
  // appropriate instruction. If not, emit two instructions to perform
  // the operation.
  if (addr.IsImmediateOffset() && !IsImmLSScaled(offset, size) &&
      !IsImmLSUnscaled(offset)) {
    // Immediate offset that can't be encoded using unsigned or unscaled
    // addressing modes.
832 833
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireSameSizeAs(addr.base());
834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849
    Mov(temp, addr.offset());
    LoadStore(rt, MemOperand(addr.base(), temp), op);
  } else if (addr.IsPostIndex() && !IsImmLSUnscaled(offset)) {
    // Post-index beyond unscaled addressing range.
    LoadStore(rt, MemOperand(addr.base()), op);
    add(addr.base(), addr.base(), offset);
  } else if (addr.IsPreIndex() && !IsImmLSUnscaled(offset)) {
    // Pre-index beyond unscaled addressing range.
    add(addr.base(), addr.base(), offset);
    LoadStore(rt, MemOperand(addr.base()), op);
  } else {
    // Encodable in one load/store instruction.
    LoadStore(rt, addr, op);
  }
}

850
void TurboAssembler::LoadStorePairMacro(const CPURegister& rt,
851 852 853 854 855 856 857
                                        const CPURegister& rt2,
                                        const MemOperand& addr,
                                        LoadStorePairOp op) {
  // TODO(all): Should we support register offset for load-store-pair?
  DCHECK(!addr.IsRegisterOffset());

  int64_t offset = addr.offset();
858
  unsigned size = CalcLSPairDataSize(op);
859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882

  // Check if the offset fits in the immediate field of the appropriate
  // instruction. If not, emit two instructions to perform the operation.
  if (IsImmLSPair(offset, size)) {
    // Encodable in one load/store pair instruction.
    LoadStorePair(rt, rt2, addr, op);
  } else {
    Register base = addr.base();
    if (addr.IsImmediateOffset()) {
      UseScratchRegisterScope temps(this);
      Register temp = temps.AcquireSameSizeAs(base);
      Add(temp, base, offset);
      LoadStorePair(rt, rt2, MemOperand(temp), op);
    } else if (addr.IsPostIndex()) {
      LoadStorePair(rt, rt2, MemOperand(base), op);
      Add(base, base, offset);
    } else {
      DCHECK(addr.IsPreIndex());
      Add(base, base, offset);
      LoadStorePair(rt, rt2, MemOperand(base), op);
    }
  }
}

883 884
bool TurboAssembler::NeedExtraInstructionsOrRegisterBranch(
    Label* label, ImmBranchType b_type) {
885 886 887 888 889 890 891 892
  bool need_longer_range = false;
  // There are two situations in which we care about the offset being out of
  // range:
  //  - The label is bound but too far away.
  //  - The label is not bound but linked, and the previous branch
  //    instruction in the chain is too far away.
  if (label->is_bound() || label->is_linked()) {
    need_longer_range =
893
        !Instruction::IsValidImmPCOffset(b_type, label->pos() - pc_offset());
894 895 896
  }
  if (!need_longer_range && !label->is_bound()) {
    int max_reachable_pc = pc_offset() + Instruction::ImmBranchRange(b_type);
897 898
    unresolved_branches_.insert(std::pair<int, FarBranchInfo>(
        max_reachable_pc, FarBranchInfo(pc_offset(), label)));
899
    // Also maintain the next pool check.
900
    next_veneer_pool_check_ = std::min(
901
        next_veneer_pool_check_, max_reachable_pc - kVeneerDistanceCheckMargin);
902 903 904 905
  }
  return need_longer_range;
}

906 907
void TurboAssembler::Adr(const Register& rd, Label* label, AdrHint hint) {
  DCHECK(allow_macro_instructions());
908
  DCHECK(!rd.IsZero());
909 910 911 912 913 914

  if (hint == kAdrNear) {
    adr(rd, label);
    return;
  }

915
  DCHECK_EQ(hint, kAdrFar);
916 917 918 919 920
  if (label->is_bound()) {
    int label_offset = label->pos() - pc_offset();
    if (Instruction::IsValidPCRelOffset(label_offset)) {
      adr(rd, label);
    } else {
921
      DCHECK_LE(label_offset, 0);
922 923 924 925 926
      int min_adr_offset = -(1 << (Instruction::ImmPCRelRangeBitwidth - 1));
      adr(rd, min_adr_offset);
      Add(rd, rd, label_offset - min_adr_offset);
    }
  } else {
927 928 929
    UseScratchRegisterScope temps(this);
    Register scratch = temps.AcquireX();

930 931
    InstructionAccurateScope scope(this,
                                   PatchingAssembler::kAdrFarPatchableNInstrs);
932 933 934 935 936 937 938 939
    adr(rd, label);
    for (int i = 0; i < PatchingAssembler::kAdrFarPatchableNNops; ++i) {
      nop(ADR_FAR_NOP);
    }
    movz(scratch, 0);
  }
}

940
void TurboAssembler::B(Label* label, BranchType type, Register reg, int bit) {
941
  DCHECK((reg == NoReg || type >= kBranchTypeFirstUsingReg) &&
942 943 944 945 946
         (bit == -1 || type >= kBranchTypeFirstUsingBit));
  if (kBranchTypeFirstCondition <= type && type <= kBranchTypeLastCondition) {
    B(static_cast<Condition>(type), label);
  } else {
    switch (type) {
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963
      case always:
        B(label);
        break;
      case never:
        break;
      case reg_zero:
        Cbz(reg, label);
        break;
      case reg_not_zero:
        Cbnz(reg, label);
        break;
      case reg_bit_clear:
        Tbz(reg, bit, label);
        break;
      case reg_bit_set:
        Tbnz(reg, bit, label);
        break;
964 965 966 967 968 969
      default:
        UNREACHABLE();
    }
  }
}

970 971
void TurboAssembler::B(Label* label, Condition cond) {
  DCHECK(allow_macro_instructions());
972
  DCHECK((cond != al) && (cond != nv));
973 974 975

  Label done;
  bool need_extra_instructions =
976
      NeedExtraInstructionsOrRegisterBranch(label, CondBranchType);
977 978

  if (need_extra_instructions) {
979
    b(&done, NegateCondition(cond));
980
    B(label);
981 982 983 984 985 986
  } else {
    b(label, cond);
  }
  bind(&done);
}

987 988
void TurboAssembler::Tbnz(const Register& rt, unsigned bit_pos, Label* label) {
  DCHECK(allow_macro_instructions());
989 990 991

  Label done;
  bool need_extra_instructions =
992
      NeedExtraInstructionsOrRegisterBranch(label, TestBranchType);
993 994 995

  if (need_extra_instructions) {
    tbz(rt, bit_pos, &done);
996
    B(label);
997 998 999 1000 1001 1002
  } else {
    tbnz(rt, bit_pos, label);
  }
  bind(&done);
}

1003 1004
void TurboAssembler::Tbz(const Register& rt, unsigned bit_pos, Label* label) {
  DCHECK(allow_macro_instructions());
1005 1006 1007

  Label done;
  bool need_extra_instructions =
1008
      NeedExtraInstructionsOrRegisterBranch(label, TestBranchType);
1009 1010 1011

  if (need_extra_instructions) {
    tbnz(rt, bit_pos, &done);
1012
    B(label);
1013 1014 1015 1016 1017 1018
  } else {
    tbz(rt, bit_pos, label);
  }
  bind(&done);
}

1019 1020
void TurboAssembler::Cbnz(const Register& rt, Label* label) {
  DCHECK(allow_macro_instructions());
1021 1022 1023

  Label done;
  bool need_extra_instructions =
1024
      NeedExtraInstructionsOrRegisterBranch(label, CompareBranchType);
1025 1026 1027

  if (need_extra_instructions) {
    cbz(rt, &done);
1028
    B(label);
1029 1030 1031 1032 1033 1034
  } else {
    cbnz(rt, label);
  }
  bind(&done);
}

1035 1036
void TurboAssembler::Cbz(const Register& rt, Label* label) {
  DCHECK(allow_macro_instructions());
1037 1038 1039

  Label done;
  bool need_extra_instructions =
1040
      NeedExtraInstructionsOrRegisterBranch(label, CompareBranchType);
1041 1042 1043

  if (need_extra_instructions) {
    cbnz(rt, &done);
1044
    B(label);
1045 1046 1047 1048 1049 1050
  } else {
    cbz(rt, label);
  }
  bind(&done);
}

1051 1052
// Pseudo-instructions.

1053 1054 1055
void TurboAssembler::Abs(const Register& rd, const Register& rm,
                         Label* is_not_representable, Label* is_representable) {
  DCHECK(allow_macro_instructions());
1056
  DCHECK(AreSameSizeAndType(rd, rm));
1057 1058 1059 1060 1061 1062 1063

  Cmp(rm, 1);
  Cneg(rd, rm, lt);

  // If the comparison sets the v flag, the input was the smallest value
  // representable by rm, and the mathematical result of abs(rm) is not
  // representable using two's complement.
1064
  if ((is_not_representable != nullptr) && (is_representable != nullptr)) {
1065 1066
    B(is_not_representable, vs);
    B(is_representable);
1067
  } else if (is_not_representable != nullptr) {
1068
    B(is_not_representable, vs);
1069
  } else if (is_representable != nullptr) {
1070 1071 1072 1073 1074 1075
    B(is_representable, vc);
  }
}

// Abstracted stack operations.

1076
void TurboAssembler::Push(const CPURegister& src0, const CPURegister& src1,
1077 1078 1079
                          const CPURegister& src2, const CPURegister& src3,
                          const CPURegister& src4, const CPURegister& src5,
                          const CPURegister& src6, const CPURegister& src7) {
1080
  DCHECK(AreSameSizeAndType(src0, src1, src2, src3, src4, src5, src6, src7));
1081

1082
  int count = 5 + src5.is_valid() + src6.is_valid() + src6.is_valid();
1083
  int size = src0.SizeInBytes();
1084
  DCHECK_EQ(0, (size * count) % 16);
1085 1086 1087 1088 1089

  PushHelper(4, size, src0, src1, src2, src3);
  PushHelper(count - 4, size, src4, src5, src6, src7);
}

1090
void TurboAssembler::Pop(const CPURegister& dst0, const CPURegister& dst1,
1091 1092 1093 1094 1095 1096 1097
                         const CPURegister& dst2, const CPURegister& dst3,
                         const CPURegister& dst4, const CPURegister& dst5,
                         const CPURegister& dst6, const CPURegister& dst7) {
  // It is not valid to pop into the same register more than once in one
  // instruction, not even into the zero register.
  DCHECK(!AreAliased(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7));
  DCHECK(AreSameSizeAndType(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7));
1098
  DCHECK(dst0.is_valid());
1099

1100
  int count = 5 + dst5.is_valid() + dst6.is_valid() + dst7.is_valid();
1101
  int size = dst0.SizeInBytes();
1102
  DCHECK_EQ(0, (size * count) % 16);
1103 1104 1105 1106 1107

  PopHelper(4, size, dst0, dst1, dst2, dst3);
  PopHelper(count - 4, size, dst4, dst5, dst6, dst7);
}

1108
void MacroAssembler::PushMultipleTimes(CPURegister src, Register count) {
1109 1110
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireSameSizeAs(count);
1111

1112
  Label loop, leftover2, leftover1, done;
1113

1114 1115
  Subs(temp, count, 4);
  B(mi, &leftover2);
1116

1117 1118 1119 1120 1121
  // Push groups of four first.
  Bind(&loop);
  Subs(temp, temp, 4);
  PushHelper(4, src.SizeInBytes(), src, src, src, src);
  B(pl, &loop);
1122

1123 1124 1125 1126
  // Push groups of two.
  Bind(&leftover2);
  Tbz(count, 1, &leftover1);
  PushHelper(2, src.SizeInBytes(), src, src, NoReg, NoReg);
1127

1128 1129 1130 1131
  // Push the last one (if required).
  Bind(&leftover1);
  Tbz(count, 0, &done);
  PushHelper(1, src.SizeInBytes(), src, NoReg, NoReg, NoReg);
1132

1133
  Bind(&done);
1134 1135
}

1136
void TurboAssembler::PushHelper(int count, int size, const CPURegister& src0,
1137 1138 1139 1140 1141 1142
                                const CPURegister& src1,
                                const CPURegister& src2,
                                const CPURegister& src3) {
  // Ensure that we don't unintentially modify scratch or debug registers.
  InstructionAccurateScope scope(this);

1143 1144
  DCHECK(AreSameSizeAndType(src0, src1, src2, src3));
  DCHECK(size == src0.SizeInBytes());
1145 1146 1147 1148 1149

  // When pushing multiple registers, the store order is chosen such that
  // Push(a, b) is equivalent to Push(a) followed by Push(b).
  switch (count) {
    case 1:
1150
      DCHECK(src1.IsNone() && src2.IsNone() && src3.IsNone());
1151
      str(src0, MemOperand(sp, -1 * size, PreIndex));
1152 1153
      break;
    case 2:
1154
      DCHECK(src2.IsNone() && src3.IsNone());
1155
      stp(src1, src0, MemOperand(sp, -2 * size, PreIndex));
1156 1157
      break;
    case 3:
1158
      DCHECK(src3.IsNone());
1159 1160
      stp(src2, src1, MemOperand(sp, -3 * size, PreIndex));
      str(src0, MemOperand(sp, 2 * size));
1161 1162 1163
      break;
    case 4:
      // Skip over 4 * size, then fill in the gap. This allows four W registers
1164
      // to be pushed using sp, whilst maintaining 16-byte alignment for sp
1165
      // at all times.
1166 1167
      stp(src3, src2, MemOperand(sp, -4 * size, PreIndex));
      stp(src1, src0, MemOperand(sp, 2 * size));
1168 1169 1170 1171 1172 1173
      break;
    default:
      UNREACHABLE();
  }
}

1174 1175
void TurboAssembler::PopHelper(int count, int size, const CPURegister& dst0,
                               const CPURegister& dst1, const CPURegister& dst2,
1176 1177 1178 1179
                               const CPURegister& dst3) {
  // Ensure that we don't unintentially modify scratch or debug registers.
  InstructionAccurateScope scope(this);

1180 1181
  DCHECK(AreSameSizeAndType(dst0, dst1, dst2, dst3));
  DCHECK(size == dst0.SizeInBytes());
1182 1183 1184 1185 1186

  // When popping multiple registers, the load order is chosen such that
  // Pop(a, b) is equivalent to Pop(a) followed by Pop(b).
  switch (count) {
    case 1:
1187
      DCHECK(dst1.IsNone() && dst2.IsNone() && dst3.IsNone());
1188
      ldr(dst0, MemOperand(sp, 1 * size, PostIndex));
1189 1190
      break;
    case 2:
1191
      DCHECK(dst2.IsNone() && dst3.IsNone());
1192
      ldp(dst0, dst1, MemOperand(sp, 2 * size, PostIndex));
1193 1194
      break;
    case 3:
1195
      DCHECK(dst3.IsNone());
1196 1197
      ldr(dst2, MemOperand(sp, 2 * size));
      ldp(dst0, dst1, MemOperand(sp, 3 * size, PostIndex));
1198 1199 1200 1201
      break;
    case 4:
      // Load the higher addresses first, then load the lower addresses and
      // skip the whole block in the second instruction. This allows four W
1202 1203 1204 1205
      // registers to be popped using sp, whilst maintaining 16-byte alignment
      // for sp at all times.
      ldp(dst2, dst3, MemOperand(sp, 2 * size));
      ldp(dst0, dst1, MemOperand(sp, 4 * size, PostIndex));
1206 1207 1208 1209 1210 1211
      break;
    default:
      UNREACHABLE();
  }
}

1212
void TurboAssembler::PokePair(const CPURegister& src1, const CPURegister& src2,
1213
                              int offset) {
1214 1215
  DCHECK(AreSameSizeAndType(src1, src2));
  DCHECK((offset >= 0) && ((offset % src1.SizeInBytes()) == 0));
1216
  Stp(src1, src2, MemOperand(sp, offset));
1217 1218
}

1219
void MacroAssembler::PeekPair(const CPURegister& dst1, const CPURegister& dst2,
1220
                              int offset) {
1221 1222
  DCHECK(AreSameSizeAndType(dst1, dst2));
  DCHECK((offset >= 0) && ((offset % dst1.SizeInBytes()) == 0));
1223
  Ldp(dst1, dst2, MemOperand(sp, offset));
1224 1225 1226
}

void MacroAssembler::PushCalleeSavedRegisters() {
1227 1228 1229
  ASM_CODE_COMMENT(this);
  // Ensure that the macro-assembler doesn't use any scratch registers.
  InstructionAccurateScope scope(this);
1230

1231
  MemOperand tos(sp, -2 * static_cast<int>(kXRegSize), PreIndex);
1232

1233 1234 1235 1236
  stp(d14, d15, tos);
  stp(d12, d13, tos);
  stp(d10, d11, tos);
  stp(d8, d9, tos);
1237

1238 1239 1240 1241 1242
  stp(x27, x28, tos);
  stp(x25, x26, tos);
  stp(x23, x24, tos);
  stp(x21, x22, tos);
  stp(x19, x20, tos);
1243

1244 1245 1246
  STATIC_ASSERT(
      EntryFrameConstants::kCalleeSavedRegisterBytesPushedBeforeFpLrPair ==
      18 * kSystemPointerSize);
1247

1248 1249 1250 1251 1252 1253
#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
    // Use the stack pointer's value immediately before pushing the LR as the
    // context for signing it. This is what the StackFrameIterator expects.
    pacibsp();
#endif

1254 1255 1256 1257
    stp(x29, x30, tos);  // fp, lr

    STATIC_ASSERT(
        EntryFrameConstants::kCalleeSavedRegisterBytesPushedAfterFpLrPair == 0);
1258 1259 1260
}

void MacroAssembler::PopCalleeSavedRegisters() {
1261 1262 1263
  ASM_CODE_COMMENT(this);
  // Ensure that the macro-assembler doesn't use any scratch registers.
  InstructionAccurateScope scope(this);
1264

1265
  MemOperand tos(sp, 2 * kXRegSize, PostIndex);
1266

1267
  ldp(x29, x30, tos);  // fp, lr
1268

1269
#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
1270 1271 1272 1273
                       // The context (stack pointer value) for authenticating
                       // the LR here must
  // match the one used for signing it (see `PushCalleeSavedRegisters`).
  autibsp();
1274 1275
#endif

1276 1277 1278 1279 1280
    ldp(x19, x20, tos);
    ldp(x21, x22, tos);
    ldp(x23, x24, tos);
    ldp(x25, x26, tos);
    ldp(x27, x28, tos);
1281

1282 1283 1284 1285
    ldp(d8, d9, tos);
    ldp(d10, d11, tos);
    ldp(d12, d13, tos);
    ldp(d14, d15, tos);
1286 1287
}

1288
void TurboAssembler::AssertSpAligned() {
1289 1290 1291 1292 1293 1294 1295 1296 1297 1298
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  HardAbortScope hard_abort(this);  // Avoid calls to Abort.
  // Arm64 requires the stack pointer to be 16-byte aligned prior to address
  // calculation.
  UseScratchRegisterScope scope(this);
  Register temp = scope.AcquireX();
  Mov(temp, sp);
  Tst(temp, 15);
  Check(eq, AbortReason::kUnexpectedStackPointer);
1299
}
1300

1301 1302 1303 1304
void TurboAssembler::CopySlots(int dst, Register src, Register slot_count) {
  DCHECK(!src.IsZero());
  UseScratchRegisterScope scope(this);
  Register dst_reg = scope.AcquireX();
1305 1306
  SlotAddress(dst_reg, dst);
  SlotAddress(src, src);
1307 1308 1309 1310 1311 1312
  CopyDoubleWords(dst_reg, src, slot_count);
}

void TurboAssembler::CopySlots(Register dst, Register src,
                               Register slot_count) {
  DCHECK(!dst.IsZero() && !src.IsZero());
1313 1314
  SlotAddress(dst, dst);
  SlotAddress(src, src);
1315 1316 1317
  CopyDoubleWords(dst, src, slot_count);
}

1318 1319
void TurboAssembler::CopyDoubleWords(Register dst, Register src, Register count,
                                     CopyDoubleWordsMode mode) {
1320
  ASM_CODE_COMMENT(this);
1321 1322
  DCHECK(!AreAliased(dst, src, count));

1323
  if (FLAG_debug_code) {
1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334
    Register pointer1 = dst;
    Register pointer2 = src;
    if (mode == kSrcLessThanDst) {
      pointer1 = src;
      pointer2 = dst;
    }
    // Copy requires pointer1 < pointer2 || (pointer1 - pointer2) >= count.
    Label pointer1_below_pointer2;
    Subs(pointer1, pointer1, pointer2);
    B(lt, &pointer1_below_pointer2);
    Cmp(pointer1, count);
1335
    Check(ge, AbortReason::kOffsetOutOfRange);
1336 1337
    Bind(&pointer1_below_pointer2);
    Add(pointer1, pointer1, pointer2);
1338
  }
1339
  static_assert(kSystemPointerSize == kDRegSize,
1340
                "pointers must be the same size as doubles");
1341

1342 1343 1344 1345 1346 1347 1348 1349
  if (mode == kDstLessThanSrcAndReverse) {
    Add(src, src, Operand(count, LSL, kSystemPointerSizeLog2));
    Sub(src, src, kSystemPointerSize);
  }

  int src_direction = (mode == kDstLessThanSrc) ? 1 : -1;
  int dst_direction = (mode == kSrcLessThanDst) ? -1 : 1;

1350 1351 1352 1353
  UseScratchRegisterScope scope(this);
  VRegister temp0 = scope.AcquireD();
  VRegister temp1 = scope.AcquireD();

1354
  Label pairs, loop, done;
1355 1356

  Tbz(count, 0, &pairs);
1357
  Ldr(temp0, MemOperand(src, src_direction * kSystemPointerSize, PostIndex));
1358
  Sub(count, count, 1);
1359
  Str(temp0, MemOperand(dst, dst_direction * kSystemPointerSize, PostIndex));
1360 1361

  Bind(&pairs);
1362 1363
  if (mode == kSrcLessThanDst) {
    // Adjust pointers for post-index ldp/stp with negative offset:
1364 1365
    Sub(dst, dst, kSystemPointerSize);
    Sub(src, src, kSystemPointerSize);
1366 1367
  } else if (mode == kDstLessThanSrcAndReverse) {
    Sub(src, src, kSystemPointerSize);
1368 1369
  }
  Bind(&loop);
1370
  Cbz(count, &done);
1371
  Ldp(temp0, temp1,
1372
      MemOperand(src, 2 * src_direction * kSystemPointerSize, PostIndex));
1373
  Sub(count, count, 2);
1374 1375 1376 1377 1378 1379 1380
  if (mode == kDstLessThanSrcAndReverse) {
    Stp(temp1, temp0,
        MemOperand(dst, 2 * dst_direction * kSystemPointerSize, PostIndex));
  } else {
    Stp(temp0, temp1,
        MemOperand(dst, 2 * dst_direction * kSystemPointerSize, PostIndex));
  }
1381
  B(&loop);
1382 1383 1384 1385 1386 1387 1388

  // TODO(all): large copies may benefit from using temporary Q registers
  // to copy four double words per iteration.

  Bind(&done);
}

1389
void TurboAssembler::SlotAddress(Register dst, int slot_offset) {
1390
  Add(dst, sp, slot_offset << kSystemPointerSizeLog2);
1391 1392 1393
}

void TurboAssembler::SlotAddress(Register dst, Register slot_offset) {
1394
  Add(dst, sp, Operand(slot_offset, LSL, kSystemPointerSizeLog2));
1395 1396
}

1397
void TurboAssembler::AssertFPCRState(Register fpcr) {
1398 1399 1400 1401 1402 1403 1404 1405
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  Label unexpected_mode, done;
  UseScratchRegisterScope temps(this);
  if (fpcr.IsNone()) {
    fpcr = temps.AcquireX();
    Mrs(fpcr, FPCR);
  }
1406 1407 1408 1409 1410 1411 1412 1413 1414 1415

    // Settings left to their default values:
    //   - Assert that flush-to-zero is not set.
    Tbnz(fpcr, FZ_offset, &unexpected_mode);
    //   - Assert that the rounding mode is nearest-with-ties-to-even.
    STATIC_ASSERT(FPTieEven == 0);
    Tst(fpcr, RMode_mask);
    B(eq, &done);

    Bind(&unexpected_mode);
1416
    Abort(AbortReason::kUnexpectedFPCRMode);
1417 1418 1419 1420

    Bind(&done);
}

1421
void TurboAssembler::CanonicalizeNaN(const VRegister& dst,
1422
                                     const VRegister& src) {
1423 1424
  AssertFPCRState();

1425 1426 1427
  // Subtracting 0.0 preserves all inputs except for signalling NaNs, which
  // become quiet NaNs. We use fsub rather than fadd because fsub preserves -0.0
  // inputs: -0.0 + 0.0 = 0.0, but -0.0 - 0.0 = -0.0.
1428 1429 1430
  Fsub(dst, src, fp_zero);
}

1431
void TurboAssembler::LoadRoot(Register destination, RootIndex index) {
1432
  ASM_CODE_COMMENT(this);
1433 1434
  // TODO(jbramley): Most root values are constants, and can be synthesized
  // without a load. Refer to the ARM back end for details.
1435 1436
  Ldr(destination,
      MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)));
1437 1438
}

1439
void TurboAssembler::PushRoot(RootIndex index) {
1440
  ASM_CODE_COMMENT(this);
1441 1442 1443 1444 1445 1446
  UseScratchRegisterScope temps(this);
  Register tmp = temps.AcquireX();
  LoadRoot(tmp, index);
  Push(tmp);
}

1447
void TurboAssembler::Move(Register dst, Smi src) { Mov(dst, src); }
1448 1449 1450 1451 1452
void TurboAssembler::Move(Register dst, MemOperand src) { Ldr(dst, src); }
void TurboAssembler::Move(Register dst, Register src) {
  if (dst == src) return;
  Mov(dst, src);
}
1453

1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469
void TurboAssembler::MovePair(Register dst0, Register src0, Register dst1,
                              Register src1) {
  DCHECK_NE(dst0, dst1);
  if (dst0 != src1) {
    Mov(dst0, src0);
    Mov(dst1, src1);
  } else if (dst1 != src0) {
    // Swap the order of the moves to resolve the overlap.
    Mov(dst1, src1);
    Mov(dst0, src0);
  } else {
    // Worse case scenario, this is a swap.
    Swap(dst0, src0);
  }
}

1470 1471
void TurboAssembler::Swap(Register lhs, Register rhs) {
  DCHECK(lhs.IsSameSizeAndType(rhs));
1472
  DCHECK_NE(lhs, rhs);
1473 1474 1475 1476 1477 1478 1479 1480 1481
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
  Mov(temp, rhs);
  Mov(rhs, lhs);
  Mov(lhs, temp);
}

void TurboAssembler::Swap(VRegister lhs, VRegister rhs) {
  DCHECK(lhs.IsSameSizeAndType(rhs));
1482
  DCHECK_NE(lhs, rhs);
1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497
  UseScratchRegisterScope temps(this);
  VRegister temp = VRegister::no_reg();
  if (lhs.IsS()) {
    temp = temps.AcquireS();
  } else if (lhs.IsD()) {
    temp = temps.AcquireD();
  } else {
    DCHECK(lhs.IsQ());
    temp = temps.AcquireQ();
  }
  Mov(temp, rhs);
  Mov(rhs, lhs);
  Mov(lhs, temp);
}

1498
void TurboAssembler::AssertSmi(Register object, AbortReason reason) {
1499 1500 1501 1502 1503
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  STATIC_ASSERT(kSmiTag == 0);
  Tst(object, kSmiTagMask);
  Check(eq, reason);
1504 1505
}

1506
void MacroAssembler::AssertNotSmi(Register object, AbortReason reason) {
1507 1508 1509 1510 1511
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  STATIC_ASSERT(kSmiTag == 0);
  Tst(object, kSmiTagMask);
  Check(ne, reason);
1512 1513
}

1514 1515 1516 1517 1518 1519 1520 1521
void MacroAssembler::AssertCodeT(Register object) {
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  AssertNotSmi(object, AbortReason::kOperandIsNotACodeT);

  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();

1522
  CompareObjectType(object, temp, temp, CODET_TYPE);
1523 1524 1525
  Check(eq, AbortReason::kOperandIsNotACodeT);
}

1526
void MacroAssembler::AssertConstructor(Register object) {
1527 1528 1529
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotAConstructor);
1530

1531 1532
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
1533

1534 1535 1536
  LoadMap(temp, object);
  Ldrb(temp, FieldMemOperand(temp, Map::kBitFieldOffset));
  Tst(temp, Operand(Map::Bits1::IsConstructorBit::kMask));
1537

1538
  Check(ne, AbortReason::kOperandIsNotAConstructor);
1539 1540
}

1541
void MacroAssembler::AssertFunction(Register object) {
1542 1543 1544
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotAFunction);
1545

1546 1547 1548 1549 1550 1551
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
  LoadMap(temp, object);
  CompareInstanceTypeRange(temp, temp, FIRST_JS_FUNCTION_TYPE,
                           LAST_JS_FUNCTION_TYPE);
  Check(ls, AbortReason::kOperandIsNotAFunction);
1552 1553
}

1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566
void MacroAssembler::AssertCallableFunction(Register object) {
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotAFunction);

  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
  LoadMap(temp, object);
  CompareInstanceTypeRange(temp, temp, FIRST_CALLABLE_JS_FUNCTION_TYPE,
                           LAST_CALLABLE_JS_FUNCTION_TYPE);
  Check(ls, AbortReason::kOperandIsNotACallableFunction);
}

1567
void MacroAssembler::AssertBoundFunction(Register object) {
1568 1569 1570
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotABoundFunction);
1571

1572 1573
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
1574

1575 1576
  CompareObjectType(object, temp, temp, JS_BOUND_FUNCTION_TYPE);
  Check(eq, AbortReason::kOperandIsNotABoundFunction);
1577 1578
}

1579
void MacroAssembler::AssertGeneratorObject(Register object) {
1580
  if (!FLAG_debug_code) return;
1581
  ASM_CODE_COMMENT(this);
1582
  AssertNotSmi(object, AbortReason::kOperandIsASmiAndNotAGeneratorObject);
1583

1584 1585 1586
  // Load map
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
1587
  LoadMap(temp, object);
1588

1589
  Label do_check;
1590 1591
  // Load instance type and check if JSGeneratorObject
  CompareInstanceType(temp, temp, JS_GENERATOR_OBJECT_TYPE);
1592
  B(eq, &do_check);
1593

1594 1595 1596 1597
  // Check if JSAsyncFunctionObject
  Cmp(temp, JS_ASYNC_FUNCTION_OBJECT_TYPE);
  B(eq, &do_check);

1598 1599 1600 1601 1602
  // Check if JSAsyncGeneratorObject
  Cmp(temp, JS_ASYNC_GENERATOR_OBJECT_TYPE);

  bind(&do_check);
  // Restore generator object to register and perform assertion
1603
  Check(eq, AbortReason::kOperandIsNotAGeneratorObject);
1604
}
1605

1606
void MacroAssembler::AssertUndefinedOrAllocationSite(Register object) {
1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  UseScratchRegisterScope temps(this);
  Register scratch = temps.AcquireX();
  Label done_checking;
  AssertNotSmi(object);
  JumpIfRoot(object, RootIndex::kUndefinedValue, &done_checking);
  LoadMap(scratch, object);
  CompareInstanceType(scratch, scratch, ALLOCATION_SITE_TYPE);
  Assert(eq, AbortReason::kExpectedUndefinedOrCell);
  Bind(&done_checking);
1618 1619
}

1620
void TurboAssembler::AssertPositiveOrZero(Register value) {
1621 1622 1623 1624 1625 1626 1627
  if (!FLAG_debug_code) return;
  ASM_CODE_COMMENT(this);
  Label done;
  int sign_bit = value.Is64Bits() ? kXSignBit : kWSignBit;
  Tbz(value, sign_bit, &done);
  Abort(AbortReason::kUnexpectedNegativeValue);
  Bind(&done);
1628 1629
}

1630
void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments,
1631
                                 SaveFPRegsMode save_doubles) {
1632
  ASM_CODE_COMMENT(this);
1633 1634 1635 1636 1637
  // All arguments must be on the stack before this function is called.
  // x0 holds the return value after the call.

  // Check that the number of arguments matches what the function expects.
  // If f->nargs is -1, the function can accept a variable number of arguments.
1638
  CHECK(f->nargs < 0 || f->nargs == num_arguments);
1639 1640 1641

  // Place the necessary arguments.
  Mov(x0, num_arguments);
1642
  Mov(x1, ExternalReference::Create(f));
1643

1644 1645 1646
  Handle<Code> code =
      CodeFactory::CEntry(isolate(), f->result_size, save_doubles);
  Call(code, RelocInfo::CODE_TARGET);
1647 1648
}

1649 1650
void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
                                             bool builtin_exit_frame) {
1651
  ASM_CODE_COMMENT(this);
1652
  Mov(x1, builtin);
1653 1654
  Handle<Code> code = CodeFactory::CEntry(isolate(), 1, SaveFPRegsMode::kIgnore,
                                          ArgvMode::kStack, builtin_exit_frame);
1655
  Jump(code, RelocInfo::CODE_TARGET);
1656 1657
}

1658
void MacroAssembler::JumpToInstructionStream(Address entry) {
1659
  Ldr(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
1660 1661 1662
  Br(kOffHeapTrampolineRegister);
}

1663
void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) {
1664
  ASM_CODE_COMMENT(this);
1665 1666 1667 1668 1669 1670 1671 1672 1673
  const Runtime::Function* function = Runtime::FunctionForId(fid);
  DCHECK_EQ(1, function->result_size);
  if (function->nargs >= 0) {
    // TODO(1236192): Most runtime routines don't need the number of
    // arguments passed in because it is constant. At some point we
    // should remove this need and make the runtime routine entry code
    // smarter.
    Mov(x0, function->nargs);
  }
1674
  JumpToExternalReference(ExternalReference::Create(fid));
1675 1676
}

1677
int TurboAssembler::ActivationFrameAlignment() {
1678
#if V8_HOST_ARCH_ARM64
1679 1680 1681 1682
  // Running on the real platform. Use the alignment as mandated by the local
  // environment.
  // Note: This will break if we ever start generating snapshots on one ARM
  // platform for another ARM platform with a different alignment.
1683
  return base::OS::ActivationFrameAlignment();
1684
#else   // V8_HOST_ARCH_ARM64
1685 1686 1687 1688 1689
  // If we are using the simulator then we should always align to the expected
  // alignment. As the simulator is used to generate snapshots we do not know
  // if the target platform will need alignment, so this is controlled from a
  // flag.
  return FLAG_sim_stack_alignment;
1690
#endif  // V8_HOST_ARCH_ARM64
1691 1692
}

1693
void TurboAssembler::CallCFunction(ExternalReference function,
1694 1695 1696 1697
                                   int num_of_reg_args) {
  CallCFunction(function, num_of_reg_args, 0);
}

1698
void TurboAssembler::CallCFunction(ExternalReference function,
1699 1700
                                   int num_of_reg_args,
                                   int num_of_double_args) {
1701
  ASM_CODE_COMMENT(this);
1702 1703
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
1704
  Mov(temp, function);
1705
  CallCFunction(temp, num_of_reg_args, num_of_double_args);
1706 1707
}

1708
static const int kRegisterPassedArguments = 8;
1709
static const int kFPRegisterPassedArguments = 8;
1710

1711
void TurboAssembler::CallCFunction(Register function, int num_of_reg_args,
1712
                                   int num_of_double_args) {
1713
  ASM_CODE_COMMENT(this);
1714
  DCHECK_LE(num_of_reg_args + num_of_double_args, kMaxCParameters);
1715
  DCHECK(has_frame());
1716

1717 1718
  // Save the frame pointer and PC so that the stack layout remains iterable,
  // even without an ExitFrame which normally exists between JS and C frames.
1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740
  Register pc_scratch = x4;
  Register addr_scratch = x5;
  Push(pc_scratch, addr_scratch);

  Label get_pc;
  Bind(&get_pc);
  Adr(pc_scratch, &get_pc);

  // See x64 code for reasoning about how to address the isolate data fields.
  if (root_array_available()) {
    Str(pc_scratch,
        MemOperand(kRootRegister, IsolateData::fast_c_call_caller_pc_offset()));
    Str(fp,
        MemOperand(kRootRegister, IsolateData::fast_c_call_caller_fp_offset()));
  } else {
    DCHECK_NOT_NULL(isolate());
    Mov(addr_scratch,
        ExternalReference::fast_c_call_caller_pc_address(isolate()));
    Str(pc_scratch, MemOperand(addr_scratch));
    Mov(addr_scratch,
        ExternalReference::fast_c_call_caller_fp_address(isolate()));
    Str(fp, MemOperand(addr_scratch));
1741 1742
  }

1743 1744
  Pop(addr_scratch, pc_scratch);

1745 1746 1747 1748
  // Call directly. The function called cannot cause a GC, or allow preemption,
  // so the return address in the link register stays correct.
  Call(function);

1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759
  // We don't unset the PC; the FP is the source of truth.
  if (root_array_available()) {
    Str(xzr,
        MemOperand(kRootRegister, IsolateData::fast_c_call_caller_fp_offset()));
  } else {
    DCHECK_NOT_NULL(isolate());
    Push(addr_scratch, xzr);
    Mov(addr_scratch,
        ExternalReference::fast_c_call_caller_fp_address(isolate()));
    Str(xzr, MemOperand(addr_scratch));
    Pop(xzr, addr_scratch);
1760 1761
  }

1762 1763 1764 1765 1766
  if (num_of_reg_args > kRegisterPassedArguments) {
    // Drop the register passed arguments.
    int claim_slots = RoundUp(num_of_reg_args - kRegisterPassedArguments, 2);
    Drop(claim_slots);
  }
1767 1768 1769 1770 1771 1772 1773

  if (num_of_double_args > kFPRegisterPassedArguments) {
    // Drop the register passed arguments.
    int claim_slots =
        RoundUp(num_of_double_args - kFPRegisterPassedArguments, 2);
    Drop(claim_slots);
  }
1774 1775
}

1776 1777
void TurboAssembler::LoadFromConstantsTable(Register destination,
                                            int constant_index) {
1778
  ASM_CODE_COMMENT(this);
1779
  DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable));
1780
  LoadRoot(destination, RootIndex::kBuiltinsConstantsTable);
1781 1782 1783
  LoadTaggedPointerField(
      destination, FieldMemOperand(destination, FixedArray::OffsetOfElementAt(
                                                    constant_index)));
1784 1785
}

1786 1787
void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) {
  Ldr(destination, MemOperand(kRootRegister, offset));
1788
}
1789 1790 1791 1792

void TurboAssembler::LoadRootRegisterOffset(Register destination,
                                            intptr_t offset) {
  if (offset == 0) {
1793
    Mov(destination, kRootRegister);
1794
  } else {
1795
    Add(destination, kRootRegister, offset);
1796 1797
  }
}
1798 1799 1800 1801 1802 1803 1804 1805

void TurboAssembler::Jump(Register target, Condition cond) {
  if (cond == nv) return;
  Label done;
  if (cond != al) B(NegateCondition(cond), &done);
  Br(target);
  Bind(&done);
}
1806

1807 1808
void TurboAssembler::JumpHelper(int64_t offset, RelocInfo::Mode rmode,
                                Condition cond) {
1809 1810 1811
  if (cond == nv) return;
  Label done;
  if (cond != al) B(NegateCondition(cond), &done);
1812 1813 1814 1815 1816 1817
  if (CanUseNearCallOrJump(rmode)) {
    DCHECK(IsNearCallOffset(offset));
    near_jump(static_cast<int>(offset), rmode);
  } else {
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireX();
1818
    uint64_t imm = reinterpret_cast<uint64_t>(pc_) + offset * kInstrSize;
1819 1820 1821
    Mov(temp, Immediate(imm, rmode));
    Br(temp);
  }
1822
  Bind(&done);
1823 1824
}

1825
// The calculated offset is either:
1826
// * the 'target' input unmodified if this is a Wasm call, or
1827 1828
// * the offset of the target from the code range start, if this is a call to
//   un-embedded builtin, or
1829 1830
// * the offset of the target from the current PC, in instructions, for any
//   other type of call.
1831 1832
int64_t TurboAssembler::CalculateTargetOffset(Address target,
                                              RelocInfo::Mode rmode, byte* pc) {
1833
  int64_t offset = static_cast<int64_t>(target);
1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848
  if (rmode == RelocInfo::WASM_CALL || rmode == RelocInfo::WASM_STUB_CALL) {
    // The target of WebAssembly calls is still an index instead of an actual
    // address at this point, and needs to be encoded as-is.
    return offset;
  }
  if (RelocInfo::IsRuntimeEntry(rmode)) {
    // The runtime entry targets are used for generating short builtin calls
    // from JIT-compiled code (it's not used during snapshot creation).
    // The value is encoded as an offset from the code range (see
    // Assembler::runtime_entry_at()).
    // Note, that builtin-to-builitin calls use different OFF_HEAP_TARGET mode
    // and therefore are encoded differently.
    DCHECK_NE(options().code_range_start, 0);
    offset -= static_cast<int64_t>(options().code_range_start);
  } else {
1849 1850
    offset -= reinterpret_cast<int64_t>(pc);
  }
1851 1852
  DCHECK_EQ(offset % kInstrSize, 0);
  offset = offset / static_cast<int>(kInstrSize);
1853 1854 1855
  return offset;
}

1856
void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode,
1857
                          Condition cond) {
1858 1859
  int64_t offset = CalculateTargetOffset(target, rmode, pc_);
  JumpHelper(offset, rmode, cond);
1860 1861
}

1862
void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
1863
                          Condition cond) {
1864
  DCHECK(RelocInfo::IsCodeTarget(rmode));
1865 1866 1867 1868
  DCHECK_IMPLIES(options().isolate_independent_code,
                 Builtins::IsIsolateIndependentBuiltin(*code));

  if (options().inline_offheap_trampolines) {
1869 1870
    Builtin builtin = Builtin::kNoBuiltinId;
    if (isolate()->builtins()->IsBuiltinHandle(code, &builtin)) {
1871
      // Inline the trampoline.
1872
      CHECK_EQ(cond, Condition::al);  // Implement if necessary.
1873
      TailCallBuiltin(builtin);
1874 1875
      return;
    }
1876
  }
1877

1878
  if (CanUseNearCallOrJump(rmode)) {
1879 1880 1881
    EmbeddedObjectIndex index = AddEmbeddedObject(code);
    DCHECK(is_int32(index));
    JumpHelper(static_cast<int64_t>(index), rmode, cond);
1882 1883 1884
  } else {
    Jump(code.address(), rmode, cond);
  }
1885 1886
}

1887 1888 1889 1890 1891 1892 1893
void TurboAssembler::Jump(const ExternalReference& reference) {
  UseScratchRegisterScope temps(this);
  Register scratch = temps.AcquireX();
  Mov(scratch, reference);
  Jump(scratch);
}

1894
void TurboAssembler::Call(Register target) {
1895
  BlockPoolsScope scope(this);
1896 1897 1898
  Blr(target);
}

1899
void TurboAssembler::Call(Address target, RelocInfo::Mode rmode) {
1900
  BlockPoolsScope scope(this);
1901
  if (CanUseNearCallOrJump(rmode)) {
1902
    int64_t offset = CalculateTargetOffset(target, rmode, pc_);
1903 1904
    DCHECK(IsNearCallOffset(offset));
    near_call(static_cast<int>(offset), rmode);
1905
  } else {
1906
    IndirectCall(target, rmode);
1907 1908 1909
  }
}

1910
void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode) {
1911 1912
  DCHECK_IMPLIES(options().isolate_independent_code,
                 Builtins::IsIsolateIndependentBuiltin(*code));
1913
  BlockPoolsScope scope(this);
1914

1915
  if (options().inline_offheap_trampolines) {
1916 1917
    Builtin builtin = Builtin::kNoBuiltinId;
    if (isolate()->builtins()->IsBuiltinHandle(code, &builtin)) {
1918
      // Inline the trampoline.
1919
      CallBuiltin(builtin);
1920 1921
      return;
    }
1922
  }
1923

1924
  DCHECK(code->IsExecutable());
1925
  if (CanUseNearCallOrJump(rmode)) {
1926 1927 1928
    EmbeddedObjectIndex index = AddEmbeddedObject(code);
    DCHECK(is_int32(index));
    near_call(static_cast<int32_t>(index), rmode);
1929 1930 1931
  } else {
    IndirectCall(code.address(), rmode);
  }
1932 1933
}

1934 1935 1936
void TurboAssembler::Call(ExternalReference target) {
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
1937
  Mov(temp, target);
1938 1939 1940
  Call(temp);
}

1941
void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin_index) {
1942
  ASM_CODE_COMMENT(this);
1943
  // The builtin_index register contains the builtin index as a Smi.
1944
  // Untagging is folded into the indexing operand below.
1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
  if (SmiValuesAre32Bits()) {
    Asr(builtin_index, builtin_index, kSmiShift - kSystemPointerSizeLog2);
    Add(builtin_index, builtin_index,
        IsolateData::builtin_entry_table_offset());
    Ldr(builtin_index, MemOperand(kRootRegister, builtin_index));
  } else {
    DCHECK(SmiValuesAre31Bits());
    if (COMPRESS_POINTERS_BOOL) {
      Add(builtin_index, kRootRegister,
          Operand(builtin_index.W(), SXTW, kSystemPointerSizeLog2 - kSmiShift));
    } else {
      Add(builtin_index, kRootRegister,
          Operand(builtin_index, LSL, kSystemPointerSizeLog2 - kSmiShift));
    }
    Ldr(builtin_index,
        MemOperand(builtin_index, IsolateData::builtin_entry_table_offset()));
  }
1962 1963
}

1964
void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin,
1965
                                          Register destination) {
1966
  Ldr(destination, EntryFromBuiltinAsOperand(builtin));
1967 1968
}

1969
MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) {
1970
  ASM_CODE_COMMENT(this);
1971 1972
  DCHECK(root_array_available());
  return MemOperand(kRootRegister,
1973
                    IsolateData::BuiltinEntrySlotOffset(builtin));
1974 1975
}

1976
void TurboAssembler::CallBuiltinByIndex(Register builtin_index) {
1977
  ASM_CODE_COMMENT(this);
1978
  LoadEntryFromBuiltinIndex(builtin_index);
1979
  Call(builtin_index);
1980 1981
}

1982
void TurboAssembler::CallBuiltin(Builtin builtin) {
1983
  ASM_CODE_COMMENT(this);
1984 1985 1986
  DCHECK(Builtins::IsBuiltinId(builtin));
  RecordCommentForOffHeapTrampoline(builtin);
  CHECK_NE(builtin, Builtin::kNoBuiltinId);
1987
  if (options().short_builtin_calls) {
1988
    Call(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY);
1989 1990 1991 1992

  } else {
    UseScratchRegisterScope temps(this);
    Register scratch = temps.AcquireX();
1993
    Ldr(scratch, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
1994 1995 1996 1997
    Call(scratch);
  }
}

1998
void TurboAssembler::TailCallBuiltin(Builtin builtin) {
1999
  ASM_CODE_COMMENT(this);
2000 2001 2002
  DCHECK(Builtins::IsBuiltinId(builtin));
  RecordCommentForOffHeapTrampoline(builtin);
  CHECK_NE(builtin, Builtin::kNoBuiltinId);
2003
  if (options().short_builtin_calls) {
2004
    Jump(BuiltinEntry(builtin), RelocInfo::RUNTIME_ENTRY);
2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017

  } else {
    // The control flow integrity (CFI) feature allows us to "sign" code entry
    // points as a target for calls, jumps or both. Arm64 has special
    // instructions for this purpose, so-called "landing pads" (see
    // TurboAssembler::CallTarget(), TurboAssembler::JumpTarget() and
    // TurboAssembler::JumpOrCallTarget()). Currently, we generate "Call"
    // landing pads for CPP builtins. In order to allow tail calling to those
    // builtins we have to use a workaround.
    // x17 is used to allow using "Call" (i.e. `bti c`) rather than "Jump"
    // (i.e. `bti j`) landing pads for the tail-called code.
    Register temp = x17;

2018
    Ldr(temp, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
2019 2020
    Jump(temp);
  }
2021 2022
}

2023 2024
void TurboAssembler::LoadCodeObjectEntry(Register destination,
                                         Register code_object) {
2025
  ASM_CODE_COMMENT(this);
2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036
  // Code objects are called differently depending on whether we are generating
  // builtin code (which will later be embedded into the binary) or compiling
  // user JS code at runtime.
  // * Builtin code runs in --jitless mode and thus must not call into on-heap
  //   Code targets. Instead, we dispatch through the builtins entry table.
  // * Codegen at runtime does not have this restriction and we can use the
  //   shorter, branchless instruction sequence. The assumption here is that
  //   targets are usually generated code and not builtin Code objects.

  if (options().isolate_independent_code) {
    DCHECK(root_array_available());
2037
    Label if_code_is_off_heap, out;
2038 2039 2040 2041 2042 2043 2044

    UseScratchRegisterScope temps(this);
    Register scratch = temps.AcquireX();

    DCHECK(!AreAliased(destination, scratch));
    DCHECK(!AreAliased(code_object, scratch));

2045 2046 2047
    // Check whether the Code object is an off-heap trampoline. If so, call its
    // (off-heap) entry point directly without going through the (on-heap)
    // trampoline.  Otherwise, just call the Code object as always.
2048

2049 2050 2051
    Ldr(scratch.W(), FieldMemOperand(code_object, Code::kFlagsOffset));
    TestAndBranchIfAnySet(scratch.W(), Code::IsOffHeapTrampoline::kMask,
                          &if_code_is_off_heap);
2052

2053
    // Not an off-heap trampoline object, the entry point is at
2054 2055 2056 2057
    // Code::raw_instruction_start().
    Add(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
    B(&out);

2058
    // An off-heap trampoline, the entry point is loaded from the builtin entry
2059
    // table.
2060 2061
    bind(&if_code_is_off_heap);
    Ldrsw(scratch, FieldMemOperand(code_object, Code::kBuiltinIndexOffset));
2062 2063
    Add(destination, kRootRegister,
        Operand(scratch, LSL, kSystemPointerSizeLog2));
2064 2065 2066 2067 2068 2069 2070 2071 2072 2073
    Ldr(destination,
        MemOperand(destination, IsolateData::builtin_entry_table_offset()));

    bind(&out);
  } else {
    Add(destination, code_object, Code::kHeaderSize - kHeapObjectTag);
  }
}

void TurboAssembler::CallCodeObject(Register code_object) {
2074
  ASM_CODE_COMMENT(this);
2075 2076 2077 2078
  LoadCodeObjectEntry(code_object, code_object);
  Call(code_object);
}

2079
void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) {
2080
  ASM_CODE_COMMENT(this);
2081
  DCHECK_EQ(JumpMode::kJump, jump_mode);
2082
  LoadCodeObjectEntry(code_object, code_object);
2083 2084 2085 2086 2087 2088 2089

  UseScratchRegisterScope temps(this);
  if (code_object != x17) {
    temps.Exclude(x17);
    Mov(x17, code_object);
  }
  Jump(x17);
2090 2091
}

2092 2093 2094 2095
void TurboAssembler::LoadCodeDataContainerEntry(
    Register destination, Register code_data_container_object) {
  ASM_CODE_COMMENT(this);
  CHECK(V8_EXTERNAL_CODE_SPACE_BOOL);
2096 2097 2098 2099 2100 2101

  LoadExternalPointerField(
      destination,
      FieldMemOperand(code_data_container_object,
                      CodeDataContainer::kCodeEntryPointOffset),
      kCodeEntryPointTag);
2102 2103 2104 2105 2106
}

void TurboAssembler::LoadCodeDataContainerCodeNonBuiltin(
    Register destination, Register code_data_container_object) {
  ASM_CODE_COMMENT(this);
2107 2108 2109 2110 2111 2112 2113
  CHECK(V8_EXTERNAL_CODE_SPACE_BOOL);
  // Given the fields layout we can read the Code reference as a full word.
  STATIC_ASSERT(!V8_EXTERNAL_CODE_SPACE_BOOL ||
                (CodeDataContainer::kCodeCageBaseUpper32BitsOffset ==
                 CodeDataContainer::kCodeOffset + kTaggedSize));
  Ldr(destination, FieldMemOperand(code_data_container_object,
                                   CodeDataContainer::kCodeOffset));
2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162
}

void TurboAssembler::CallCodeDataContainerObject(
    Register code_data_container_object) {
  ASM_CODE_COMMENT(this);
  LoadCodeDataContainerEntry(code_data_container_object,
                             code_data_container_object);
  Call(code_data_container_object);
}

void TurboAssembler::JumpCodeDataContainerObject(
    Register code_data_container_object, JumpMode jump_mode) {
  ASM_CODE_COMMENT(this);
  DCHECK_EQ(JumpMode::kJump, jump_mode);
  LoadCodeDataContainerEntry(code_data_container_object,
                             code_data_container_object);
  UseScratchRegisterScope temps(this);
  if (code_data_container_object != x17) {
    temps.Exclude(x17);
    Mov(x17, code_data_container_object);
  }
  Jump(x17);
}

void TurboAssembler::LoadCodeTEntry(Register destination, Register code) {
  ASM_CODE_COMMENT(this);
  if (V8_EXTERNAL_CODE_SPACE_BOOL) {
    LoadCodeDataContainerEntry(destination, code);
  } else {
    Add(destination, code, Operand(Code::kHeaderSize - kHeapObjectTag));
  }
}

void TurboAssembler::CallCodeTObject(Register code) {
  if (V8_EXTERNAL_CODE_SPACE_BOOL) {
    CallCodeDataContainerObject(code);
  } else {
    CallCodeObject(code);
  }
}

void TurboAssembler::JumpCodeTObject(Register code, JumpMode jump_mode) {
  if (V8_EXTERNAL_CODE_SPACE_BOOL) {
    JumpCodeDataContainerObject(code, jump_mode);
  } else {
    JumpCodeObject(code, jump_mode);
  }
}

2163
void TurboAssembler::StoreReturnAddressAndCall(Register target) {
2164
  ASM_CODE_COMMENT(this);
2165 2166 2167 2168 2169 2170 2171 2172
  // This generates the final instruction sequence for calls to C functions
  // once an exit frame has been constructed.
  //
  // Note that this assumes the caller code (i.e. the Code object currently
  // being generated) is immovable or that the callee function cannot trigger
  // GC, since the callee function will return to it.

  UseScratchRegisterScope temps(this);
2173
  temps.Exclude(x16, x17);
2174 2175

  Label return_location;
2176 2177 2178
  Adr(x17, &return_location);
#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
  Add(x16, sp, kSystemPointerSize);
2179
  Pacib1716();
2180 2181
#endif
  Poke(x17, 0);
2182

2183
  if (FLAG_debug_code) {
2184
    ASM_CODE_COMMENT_STRING(this, "Verify fp[kSPOffset]-8");
2185 2186 2187 2188 2189
    // Verify that the slot below fp[kSPOffset]-8 points to the signed return
    // location.
    Ldr(x16, MemOperand(fp, ExitFrameConstants::kSPOffset));
    Ldr(x16, MemOperand(x16, -static_cast<int64_t>(kXRegSize)));
    Cmp(x16, x17);
2190 2191 2192 2193 2194 2195 2196
    Check(eq, AbortReason::kReturnAddressNotFoundInFrame);
  }

  Blr(target);
  Bind(&return_location);
}

2197
void TurboAssembler::IndirectCall(Address target, RelocInfo::Mode rmode) {
2198
  ASM_CODE_COMMENT(this);
2199 2200 2201 2202 2203 2204 2205 2206 2207 2208
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
  Mov(temp, Immediate(target, rmode));
  Blr(temp);
}

bool TurboAssembler::IsNearCallOffset(int64_t offset) {
  return is_int26(offset);
}

2209
void TurboAssembler::CallForDeoptimization(
2210 2211
    Builtin target, int deopt_id, Label* exit, DeoptimizeKind kind, Label* ret,
    Label* jump_deoptimization_entry_label) {
2212
  ASM_CODE_COMMENT(this);
2213
  BlockPoolsScope scope(this);
2214
  bl(jump_deoptimization_entry_label);
2215 2216 2217 2218
  DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
            (kind == DeoptimizeKind::kLazy)
                ? Deoptimizer::kLazyDeoptExitSize
                : Deoptimizer::kNonLazyDeoptExitSize);
2219 2220 2221 2222

  if (kind == DeoptimizeKind::kEagerWithResume) {
    b(ret);
    DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
2223
              Deoptimizer::kEagerWithResumeBeforeArgsSize);
2224
  }
2225 2226
}

2227
void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) {
2228
  ASM_CODE_COMMENT(this);
2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240
  DCHECK(root_array_available());
  Isolate* isolate = this->isolate();
  ExternalReference limit =
      kind == StackLimitKind::kRealStackLimit
          ? ExternalReference::address_of_real_jslimit(isolate)
          : ExternalReference::address_of_jslimit(isolate);
  DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));

  intptr_t offset =
      TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
  Ldr(destination, MemOperand(kRootRegister, offset));
}
2241

2242 2243
void MacroAssembler::StackOverflowCheck(Register num_args,
                                        Label* stack_overflow) {
2244
  ASM_CODE_COMMENT(this);
2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260 2261 2262
  UseScratchRegisterScope temps(this);
  Register scratch = temps.AcquireX();

  // Check the stack for overflow.
  // We are not trying to catch interruptions (e.g. debug break and
  // preemption) here, so the "real stack limit" is checked.

  LoadStackLimit(scratch, StackLimitKind::kRealStackLimit);
  // Make scratch the space we have left. The stack might already be overflowed
  // here which will cause scratch to become negative.
  Sub(scratch, sp, scratch);
  // Check if the arguments will overflow the stack.
  Cmp(scratch, Operand(num_args, LSL, kSystemPointerSizeLog2));
  B(le, stack_overflow);
}

void MacroAssembler::InvokePrologue(Register formal_parameter_count,
                                    Register actual_argument_count, Label* done,
2263
                                    InvokeType type) {
2264
  ASM_CODE_COMMENT(this);
2265 2266 2267
  //  x0: actual arguments count.
  //  x1: function (passed through to callee).
  //  x2: expected arguments count.
2268 2269 2270 2271 2272 2273 2274
  //  x3: new target
  Label regular_invoke;
  DCHECK_EQ(actual_argument_count, x0);
  DCHECK_EQ(formal_parameter_count, x2);

  // If the formal parameter count is equal to the adaptor sentinel, no need
  // to push undefined value as arguments.
2275 2276 2277 2278
  if (kDontAdaptArgumentsSentinel != 0) {
    Cmp(formal_parameter_count, Operand(kDontAdaptArgumentsSentinel));
    B(eq, &regular_invoke);
  }
2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294

  // If overapplication or if the actual argument count is equal to the
  // formal parameter count, no need to push extra undefined values.
  Register extra_argument_count = x2;
  Subs(extra_argument_count, formal_parameter_count, actual_argument_count);
  B(le, &regular_invoke);

  // The stack pointer in arm64 needs to be 16-byte aligned. We might need to
  // (1) add an extra padding or (2) remove (re-use) the extra padding already
  // in the stack. Let {slots_to_copy} be the number of slots (arguments) to
  // move up in the stack and let {slots_to_claim} be the number of extra stack
  // slots to claim.
  Label even_extra_count, skip_move;
  Register slots_to_copy = x4;
  Register slots_to_claim = x5;

2295 2296 2297 2298 2299
  if (kJSArgcIncludesReceiver) {
    Mov(slots_to_copy, actual_argument_count);
  } else {
    Add(slots_to_copy, actual_argument_count, 1);  // Copy with receiver.
  }
2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311 2312
  Mov(slots_to_claim, extra_argument_count);
  Tbz(extra_argument_count, 0, &even_extra_count);

  // Calculate {slots_to_claim} when {extra_argument_count} is odd.
  // If {actual_argument_count} is even, we need one extra padding slot
  // {slots_to_claim = extra_argument_count + 1}.
  // If {actual_argument_count} is odd, we know that the
  // original arguments will have a padding slot that we can reuse
  // {slots_to_claim = extra_argument_count - 1}.
  {
    Register scratch = x11;
    Add(slots_to_claim, extra_argument_count, 1);
    And(scratch, actual_argument_count, 1);
2313 2314 2315
    if (!kJSArgcIncludesReceiver) {
      Eor(scratch, scratch, 1);
    }
2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333
    Sub(slots_to_claim, slots_to_claim, Operand(scratch, LSL, 1));
  }

  Bind(&even_extra_count);
  Cbz(slots_to_claim, &skip_move);

  Label stack_overflow;
  StackOverflowCheck(slots_to_claim, &stack_overflow);
  Claim(slots_to_claim);

  // Move the arguments already in the stack including the receiver.
  {
    Register src = x6;
    Register dst = x7;
    SlotAddress(src, slots_to_claim);
    SlotAddress(dst, 0);
    CopyDoubleWords(dst, src, slots_to_copy);
  }
2334

2335
  Bind(&skip_move);
2336
  Register actual_argument_with_receiver = actual_argument_count;
2337
  Register pointer_next_value = x5;
2338 2339 2340 2341 2342
  if (!kJSArgcIncludesReceiver) {
    actual_argument_with_receiver = x4;
    Add(actual_argument_with_receiver, actual_argument_count,
        1);  // {slots_to_copy} was scratched.
  }
2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371

  // Copy extra arguments as undefined values.
  {
    Label loop;
    Register undefined_value = x6;
    Register count = x7;
    LoadRoot(undefined_value, RootIndex::kUndefinedValue);
    SlotAddress(pointer_next_value, actual_argument_with_receiver);
    Mov(count, extra_argument_count);
    Bind(&loop);
    Str(undefined_value,
        MemOperand(pointer_next_value, kSystemPointerSize, PostIndex));
    Subs(count, count, 1);
    Cbnz(count, &loop);
  }

  // Set padding if needed.
  {
    Label skip;
    Register total_args_slots = x4;
    Add(total_args_slots, actual_argument_with_receiver, extra_argument_count);
    Tbz(total_args_slots, 0, &skip);
    Str(padreg, MemOperand(pointer_next_value));
    Bind(&skip);
  }
  B(&regular_invoke);

  bind(&stack_overflow);
  {
2372 2373
    FrameScope frame(
        this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
2374 2375 2376
    CallRuntime(Runtime::kThrowStackOverflow);
    Unreachable();
  }
2377

2378 2379 2380
  Bind(&regular_invoke);
}

2381
void MacroAssembler::CallDebugOnFunctionCall(Register fun, Register new_target,
2382 2383
                                             Register expected_parameter_count,
                                             Register actual_parameter_count) {
2384
  ASM_CODE_COMMENT(this);
2385
  // Load receiver to pass it later to DebugOnFunctionCall hook.
2386
  Peek(x4, ReceiverOperand(actual_parameter_count));
2387 2388
  FrameScope frame(
      this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
2389

2390
  if (!new_target.is_valid()) new_target = padreg;
2391

2392
  // Save values on stack.
2393 2394 2395
  SmiTag(expected_parameter_count);
  SmiTag(actual_parameter_count);
  Push(expected_parameter_count, actual_parameter_count, new_target, fun);
2396 2397
  Push(fun, x4);
  CallRuntime(Runtime::kDebugOnFunctionCall);
2398

2399
  // Restore values from stack.
2400 2401 2402
  Pop(fun, new_target, actual_parameter_count, expected_parameter_count);
  SmiUntag(actual_parameter_count);
  SmiUntag(expected_parameter_count);
2403 2404 2405
}

void MacroAssembler::InvokeFunctionCode(Register function, Register new_target,
2406 2407
                                        Register expected_parameter_count,
                                        Register actual_parameter_count,
2408
                                        InvokeType type) {
2409
  ASM_CODE_COMMENT(this);
2410
  // You can't call a function without a valid frame.
2411
  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
2412 2413
  DCHECK_EQ(function, x1);
  DCHECK_IMPLIES(new_target.is_valid(), new_target == x3);
2414

2415
  // On function call, call into the debugger if necessary.
2416 2417 2418 2419 2420 2421 2422
  Label debug_hook, continue_after_hook;
  {
    Mov(x4, ExternalReference::debug_hook_on_function_call_address(isolate()));
    Ldrsb(x4, MemOperand(x4));
    Cbnz(x4, &debug_hook);
  }
  bind(&continue_after_hook);
2423 2424

  // Clear the new.target register if not given.
2425
  if (!new_target.is_valid()) {
2426
    LoadRoot(x3, RootIndex::kUndefinedValue);
2427
  }
2428

2429
  Label done;
2430
  InvokePrologue(expected_parameter_count, actual_parameter_count, &done, type);
2431

2432 2433
  // If actual != expected, InvokePrologue will have handled the call through
  // the argument adaptor mechanism.
2434
  // The called function expects the call kind in x5.
2435 2436 2437 2438 2439 2440
  // We call indirectly through the code field in the function to
  // allow recompilation to take effect without changing any of the
  // call sites.
  Register code = kJavaScriptCallCodeStartRegister;
  LoadTaggedPointerField(code,
                         FieldMemOperand(function, JSFunction::kCodeOffset));
2441 2442
  switch (type) {
    case InvokeType::kCall:
2443
      CallCodeTObject(code);
2444 2445
      break;
    case InvokeType::kJump:
2446
      JumpCodeTObject(code);
2447
      break;
2448
  }
2449 2450 2451 2452
  B(&done);

  // Deferred debug hook.
  bind(&debug_hook);
2453 2454
  CallDebugOnFunctionCall(function, new_target, expected_parameter_count,
                          actual_parameter_count);
2455
  B(&continue_after_hook);
2456 2457 2458 2459 2460 2461

  // Continue here if InvokePrologue does handle the invocation due to
  // mismatched parameter counts.
  Bind(&done);
}

2462 2463 2464 2465
Operand MacroAssembler::ReceiverOperand(Register arg_count) {
  return Operand(0);
}

2466 2467
void MacroAssembler::InvokeFunctionWithNewTarget(
    Register function, Register new_target, Register actual_parameter_count,
2468
    InvokeType type) {
2469
  ASM_CODE_COMMENT(this);
2470
  // You can't call a function without a valid frame.
2471
  DCHECK(type == InvokeType::kJump || has_frame());
2472 2473 2474

  // Contract with called JS functions requires that function is passed in x1.
  // (See FullCodeGenerator::Generate().)
2475
  DCHECK_EQ(function, x1);
2476

2477
  Register expected_parameter_count = x2;
2478

2479 2480
  LoadTaggedPointerField(cp,
                         FieldMemOperand(function, JSFunction::kContextOffset));
2481
  // The number of arguments is stored as an int32_t, and -1 is a marker
2482
  // (kDontAdaptArgumentsSentinel), so we need sign
2483
  // extension to correctly handle it.
2484
  LoadTaggedPointerField(
2485
      expected_parameter_count,
2486
      FieldMemOperand(function, JSFunction::kSharedFunctionInfoOffset));
2487 2488
  Ldrh(expected_parameter_count,
       FieldMemOperand(expected_parameter_count,
2489
                       SharedFunctionInfo::kFormalParameterCountOffset));
2490

2491
  InvokeFunctionCode(function, new_target, expected_parameter_count,
2492
                     actual_parameter_count, type);
2493 2494 2495
}

void MacroAssembler::InvokeFunction(Register function,
2496 2497
                                    Register expected_parameter_count,
                                    Register actual_parameter_count,
2498
                                    InvokeType type) {
2499
  ASM_CODE_COMMENT(this);
2500
  // You can't call a function without a valid frame.
2501
  DCHECK(type == InvokeType::kJump || has_frame());
2502 2503 2504

  // Contract with called JS functions requires that function is passed in x1.
  // (See FullCodeGenerator::Generate().)
2505
  DCHECK_EQ(function, x1);
2506 2507

  // Set up the context.
2508 2509
  LoadTaggedPointerField(cp,
                         FieldMemOperand(function, JSFunction::kContextOffset));
2510

2511
  InvokeFunctionCode(function, no_reg, expected_parameter_count,
2512
                     actual_parameter_count, type);
2513 2514
}

2515
void TurboAssembler::TryConvertDoubleToInt64(Register result,
2516 2517
                                             DoubleRegister double_input,
                                             Label* done) {
2518
  ASM_CODE_COMMENT(this);
2519
  // Try to convert with an FPU convert instruction. It's trivial to compute
2520
  // the modulo operation on an integer register so we convert to a 64-bit
2521
  // integer.
2522
  //
2523
  // Fcvtzs will saturate to INT64_MIN (0x800...00) or INT64_MAX (0x7FF...FF)
2524 2525
  // when the double is out of range. NaNs and infinities will be converted to 0
  // (as ECMA-262 requires).
2526
  Fcvtzs(result.X(), double_input);
2527

2528
  // The values INT64_MIN (0x800...00) or INT64_MAX (0x7FF...FF) are not
2529
  // representable using a double, so if the result is one of those then we know
2530
  // that saturation occurred, and we need to manually handle the conversion.
2531 2532 2533
  //
  // It is easy to detect INT64_MIN and INT64_MAX because adding or subtracting
  // 1 will cause signed overflow.
2534 2535
  Cmp(result.X(), 1);
  Ccmp(result.X(), -1, VFlag, vc);
2536

2537 2538
  B(vc, done);
}
2539

2540 2541
void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
                                       Register result,
2542
                                       DoubleRegister double_input,
2543 2544
                                       StubCallMode stub_mode,
                                       LinkRegisterStatus lr_status) {
2545
  ASM_CODE_COMMENT(this);
2546 2547 2548 2549 2550
  if (CpuFeatures::IsSupported(JSCVT)) {
    Fjcvtzs(result.W(), double_input);
    return;
  }

2551
  Label done;
2552

2553 2554 2555
  // Try to convert the double to an int64. If successful, the bottom 32 bits
  // contain our truncated int32 result.
  TryConvertDoubleToInt64(result, double_input, &done);
2556

2557
  // If we fell through then inline version didn't succeed - call stub instead.
2558 2559 2560 2561 2562
  if (lr_status == kLRHasNotBeenSaved) {
    Push<TurboAssembler::kSignLR>(lr, double_input);
  } else {
    Push<TurboAssembler::kDontStoreLR>(xzr, double_input);
  }
2563

2564
  // DoubleToI preserves any registers it needs to clobber.
2565
#if V8_ENABLE_WEBASSEMBLY
2566
  if (stub_mode == StubCallMode::kCallWasmRuntimeStub) {
2567
    Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
2568
#else
2569 2570
  // For balance.
  if (false) {
2571
#endif  // V8_ENABLE_WEBASSEMBLY
2572
  } else if (options().inline_offheap_trampolines) {
2573
    CallBuiltin(Builtin::kDoubleToI);
2574 2575 2576
  } else {
    Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET);
  }
2577
  Ldr(result, MemOperand(sp, 0));
2578

2579
  DCHECK_EQ(xzr.SizeInBytes(), double_input.SizeInBytes());
2580 2581 2582 2583 2584 2585 2586

  if (lr_status == kLRHasNotBeenSaved) {
    // Pop into xzr here to drop the double input on the stack:
    Pop<TurboAssembler::kAuthLR>(xzr, lr);
  } else {
    Drop(2);
  }
2587

2588
  Bind(&done);
2589 2590
  // Keep our invariant that the upper 32 bits are zero.
  Uxtw(result.W(), result.W());
2591 2592
}

2593
void TurboAssembler::Prologue() {
2594
  ASM_CODE_COMMENT(this);
2595 2596 2597 2598
  Push<TurboAssembler::kSignLR>(lr, fp);
  mov(fp, sp);
  STATIC_ASSERT(kExtraSlotClaimedByPrologue == 1);
  Push(cp, kJSFunctionRegister, kJavaScriptCallArgCountRegister, padreg);
2599 2600
}

2601
void TurboAssembler::EnterFrame(StackFrame::Type type) {
2602 2603
  UseScratchRegisterScope temps(this);

2604 2605 2606 2607 2608
  if (type == StackFrame::INTERNAL
#if V8_ENABLE_WEBASSEMBLY
      || type == StackFrame::WASM_DEBUG_BREAK
#endif  // V8_ENABLE_WEBASSEMBLY
  ) {
2609
    Register type_reg = temps.AcquireX();
2610
    Mov(type_reg, StackFrame::TypeToMarker(type));
2611
    Push<TurboAssembler::kSignLR>(lr, fp, type_reg, padreg);
2612
    const int kFrameSize =
2613
        TypedFrameConstants::kFixedFrameSizeFromFp + kSystemPointerSize;
2614
    Add(fp, sp, kFrameSize);
2615 2616
    // sp[3] : lr
    // sp[2] : fp
2617
    // sp[1] : type
2618
    // sp[0] : for alignment
2619
#if V8_ENABLE_WEBASSEMBLY
2620
  } else if (type == StackFrame::WASM ||
2621 2622
             type == StackFrame::WASM_COMPILE_LAZY ||
             type == StackFrame::WASM_EXIT) {
2623
    Register type_reg = temps.AcquireX();
2624
    Mov(type_reg, StackFrame::TypeToMarker(type));
2625
    Push<TurboAssembler::kSignLR>(lr, fp);
2626
    Mov(fp, sp);
2627
    Push(type_reg, kWasmInstanceRegister);
2628 2629 2630
    // sp[3] : lr
    // sp[2] : fp
    // sp[1] : type
2631
    // sp[0] : wasm instance
2632
#endif  // V8_ENABLE_WEBASSEMBLY
2633
  } else if (type == StackFrame::CONSTRUCT) {
2634
    Register type_reg = temps.AcquireX();
2635
    Mov(type_reg, StackFrame::TypeToMarker(type));
2636 2637 2638

    // Users of this frame type push a context pointer after the type field,
    // so do it here to keep the stack pointer aligned.
2639
    Push<TurboAssembler::kSignLR>(lr, fp, type_reg, cp);
2640 2641 2642

    // The context pointer isn't part of the fixed frame, so add an extra slot
    // to account for it.
2643 2644
    Add(fp, sp,
        TypedFrameConstants::kFixedFrameSizeFromFp + kSystemPointerSize);
2645 2646 2647 2648
    // sp[3] : lr
    // sp[2] : fp
    // sp[1] : type
    // sp[0] : cp
2649
  } else {
2650
    DCHECK(StackFrame::IsJavaScript(type));
2651 2652 2653 2654 2655 2656
    // Just push a minimal "machine frame", saving the frame pointer and return
    // address, without any markers.
    Push<TurboAssembler::kSignLR>(lr, fp);
    Mov(fp, sp);
    // sp[1] : lr
    // sp[0] : fp
2657
  }
2658 2659
}

2660
void TurboAssembler::LeaveFrame(StackFrame::Type type) {
2661
  ASM_CODE_COMMENT(this);
2662 2663 2664
  // Drop the execution stack down to the frame pointer and restore
  // the caller frame pointer and return address.
  Mov(sp, fp);
2665
  Pop<TurboAssembler::kAuthLR>(fp, lr);
2666 2667 2668
}

void MacroAssembler::ExitFramePreserveFPRegs() {
2669
  ASM_CODE_COMMENT(this);
2670
  DCHECK_EQ(kCallerSavedV.Count() % 2, 0);
2671
  PushCPURegList(kCallerSavedV);
2672 2673 2674 2675 2676
}

void MacroAssembler::ExitFrameRestoreFPRegs() {
  // Read the registers from the stack without popping them. The stack pointer
  // will be reset as part of the unwinding process.
2677
  ASM_CODE_COMMENT(this);
2678
  CPURegList saved_fp_regs = kCallerSavedV;
2679
  DCHECK_EQ(saved_fp_regs.Count() % 2, 0);
2680 2681 2682 2683 2684

  int offset = ExitFrameConstants::kLastExitFrameField;
  while (!saved_fp_regs.IsEmpty()) {
    const CPURegister& dst0 = saved_fp_regs.PopHighestIndex();
    const CPURegister& dst1 = saved_fp_regs.PopHighestIndex();
2685
    offset -= 2 * kDRegSize;
2686 2687 2688 2689
    Ldp(dst1, dst0, MemOperand(fp, offset));
  }
}

2690 2691 2692
void MacroAssembler::EnterExitFrame(bool save_doubles, const Register& scratch,
                                    int extra_space,
                                    StackFrame::Type frame_type) {
2693
  ASM_CODE_COMMENT(this);
2694 2695
  DCHECK(frame_type == StackFrame::EXIT ||
         frame_type == StackFrame::BUILTIN_EXIT);
2696 2697

  // Set up the new stack frame.
2698
  Push<TurboAssembler::kSignLR>(lr, fp);
2699
  Mov(fp, sp);
2700
  Mov(scratch, StackFrame::TypeToMarker(frame_type));
2701
  Push(scratch, xzr);
2702 2703
  //          fp[8]: CallerPC (lr)
  //    fp -> fp[0]: CallerFP (old fp)
2704
  //          fp[-8]: STUB marker
2705
  //    sp -> fp[-16]: Space reserved for SPOffset.
2706 2707 2708 2709 2710 2711 2712
  STATIC_ASSERT((2 * kSystemPointerSize) ==
                ExitFrameConstants::kCallerSPOffset);
  STATIC_ASSERT((1 * kSystemPointerSize) ==
                ExitFrameConstants::kCallerPCOffset);
  STATIC_ASSERT((0 * kSystemPointerSize) ==
                ExitFrameConstants::kCallerFPOffset);
  STATIC_ASSERT((-2 * kSystemPointerSize) == ExitFrameConstants::kSPOffset);
2713 2714

  // Save the frame pointer and context pointer in the top frame.
2715 2716
  Mov(scratch,
      ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate()));
2717
  Str(fp, MemOperand(scratch));
2718 2719
  Mov(scratch,
      ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
2720 2721
  Str(cp, MemOperand(scratch));

2722
  STATIC_ASSERT((-2 * kSystemPointerSize) ==
2723
                ExitFrameConstants::kLastExitFrameField);
2724 2725 2726 2727
  if (save_doubles) {
    ExitFramePreserveFPRegs();
  }

2728 2729 2730
  // Round the number of space we need to claim to a multiple of two.
  int slots_to_claim = RoundUp(extra_space + 1, 2);

2731 2732 2733
  // Reserve space for the return address and for user requested memory.
  // We do this before aligning to make sure that we end up correctly
  // aligned with the minimum of wasted space.
2734
  Claim(slots_to_claim, kXRegSize);
2735 2736
  //         fp[8]: CallerPC (lr)
  //   fp -> fp[0]: CallerFP (old fp)
2737 2738
  //         fp[-8]: STUB marker
  //         fp[-16]: Space reserved for SPOffset.
2739
  //         fp[-16 - fp_size]: Saved doubles (if save_doubles is true).
2740 2741
  //         sp[8]: Extra space reserved for caller (if extra_space != 0).
  //   sp -> sp[0]: Space reserved for the return address.
2742 2743 2744 2745 2746

  // ExitFrame::GetStateForFramePointer expects to find the return address at
  // the memory address immediately below the pointer stored in SPOffset.
  // It is not safe to derive much else from SPOffset, because the size of the
  // padding can vary.
2747
  Add(scratch, sp, kXRegSize);
2748 2749 2750 2751 2752
  Str(scratch, MemOperand(fp, ExitFrameConstants::kSPOffset));
}

// Leave the current exit frame.
void MacroAssembler::LeaveExitFrame(bool restore_doubles,
2753 2754
                                    const Register& scratch,
                                    const Register& scratch2) {
2755
  ASM_CODE_COMMENT(this);
2756 2757 2758 2759 2760
  if (restore_doubles) {
    ExitFrameRestoreFPRegs();
  }

  // Restore the context pointer from the top frame.
2761 2762
  Mov(scratch,
      ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
2763
  Ldr(cp, MemOperand(scratch));
2764

2765
  if (FLAG_debug_code) {
2766
    // Also emit debug code to clear the cp in the top frame.
2767
    Mov(scratch2, Operand(Context::kInvalidContext));
2768 2769
    Mov(scratch, ExternalReference::Create(IsolateAddressId::kContextAddress,
                                           isolate()));
2770
    Str(scratch2, MemOperand(scratch));
2771 2772
  }
  // Clear the frame pointer from the top frame.
2773 2774
  Mov(scratch,
      ExternalReference::Create(IsolateAddressId::kCEntryFPAddress, isolate()));
2775 2776 2777 2778 2779 2780
  Str(xzr, MemOperand(scratch));

  // Pop the exit frame.
  //         fp[8]: CallerPC (lr)
  //   fp -> fp[0]: CallerFP (old fp)
  //         fp[...]: The rest of the frame.
2781
  Mov(sp, fp);
2782
  Pop<TurboAssembler::kAuthLR>(fp, lr);
2783 2784
}

2785
void MacroAssembler::LoadGlobalProxy(Register dst) {
2786
  ASM_CODE_COMMENT(this);
2787
  LoadNativeContextSlot(dst, Context::GLOBAL_PROXY_INDEX);
2788 2789
}

2790 2791
void MacroAssembler::LoadWeakValue(Register out, Register in,
                                   Label* target_if_cleared) {
2792
  ASM_CODE_COMMENT(this);
2793 2794
  CompareAndBranch(in.W(), Operand(kClearedWeakHeapObjectLower32), eq,
                   target_if_cleared);
2795 2796 2797

  and_(out, in, Operand(~kWeakHeapObjectMask));
}
2798

2799 2800 2801
void MacroAssembler::EmitIncrementCounter(StatsCounter* counter, int value,
                                          Register scratch1,
                                          Register scratch2) {
2802
  ASM_CODE_COMMENT(this);
2803
  DCHECK_NE(value, 0);
2804
  if (FLAG_native_code_counters && counter->Enabled()) {
2805 2806 2807
    // This operation has to be exactly 32-bit wide in case the external
    // reference table redirects the counter to a uint32_t dummy_stats_counter_
    // field.
2808
    Mov(scratch2, ExternalReference::Create(counter));
2809 2810 2811
    Ldr(scratch1.W(), MemOperand(scratch2));
    Add(scratch1.W(), scratch1.W(), value);
    Str(scratch1.W(), MemOperand(scratch2));
2812 2813 2814
  }
}

2815 2816 2817
void MacroAssembler::JumpIfObjectType(Register object, Register map,
                                      Register type_reg, InstanceType type,
                                      Label* if_cond_pass, Condition cond) {
2818
  ASM_CODE_COMMENT(this);
2819 2820 2821 2822 2823
  CompareObjectType(object, map, type_reg, type);
  B(cond, if_cond_pass);
}

// Sets condition flags based on comparison, and returns type in type_reg.
2824 2825
void MacroAssembler::CompareObjectType(Register object, Register map,
                                       Register type_reg, InstanceType type) {
2826
  ASM_CODE_COMMENT(this);
2827
  LoadMap(map, object);
2828 2829 2830
  CompareInstanceType(map, type_reg, type);
}

2831
void TurboAssembler::LoadMap(Register dst, Register object) {
2832
  ASM_CODE_COMMENT(this);
2833 2834 2835
  LoadTaggedPointerField(dst, FieldMemOperand(object, HeapObject::kMapOffset));
}

2836
// Sets condition flags based on comparison, and returns type in type_reg.
2837
void MacroAssembler::CompareInstanceType(Register map, Register type_reg,
2838
                                         InstanceType type) {
2839
  ASM_CODE_COMMENT(this);
2840
  Ldrh(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
2841 2842 2843
  Cmp(type_reg, type);
}

2844 2845 2846 2847
// Sets condition flags based on comparison, and returns type in type_reg.
void MacroAssembler::CompareInstanceTypeRange(Register map, Register type_reg,
                                              InstanceType lower_limit,
                                              InstanceType higher_limit) {
2848
  ASM_CODE_COMMENT(this);
2849 2850 2851 2852 2853 2854 2855 2856
  DCHECK_LT(lower_limit, higher_limit);
  UseScratchRegisterScope temps(this);
  Register scratch = temps.AcquireX();
  Ldrh(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
  Sub(scratch, type_reg, Operand(lower_limit));
  Cmp(scratch, Operand(higher_limit - lower_limit));
}

2857
void MacroAssembler::LoadElementsKindFromMap(Register result, Register map) {
2858
  ASM_CODE_COMMENT(this);
2859
  // Load the map's "bit field 2".
2860
  Ldrb(result, FieldMemOperand(map, Map::kBitField2Offset));
2861
  // Retrieve elements_kind from bit field 2.
2862
  DecodeField<Map::Bits2::ElementsKindBits>(result);
2863 2864
}

2865
void MacroAssembler::CompareRoot(const Register& obj, RootIndex index) {
2866
  ASM_CODE_COMMENT(this);
2867 2868
  UseScratchRegisterScope temps(this);
  Register temp = temps.AcquireX();
2869
  DCHECK(!AreAliased(obj, temp));
2870
  LoadRoot(temp, index);
2871
  CmpTagged(obj, temp);
2872 2873
}

2874
void MacroAssembler::JumpIfRoot(const Register& obj, RootIndex index,
2875 2876 2877 2878 2879
                                Label* if_equal) {
  CompareRoot(obj, index);
  B(eq, if_equal);
}

2880
void MacroAssembler::JumpIfNotRoot(const Register& obj, RootIndex index,
2881 2882 2883 2884 2885
                                   Label* if_not_equal) {
  CompareRoot(obj, index);
  B(ne, if_not_equal);
}

2886 2887 2888 2889
void MacroAssembler::JumpIfIsInRange(const Register& value,
                                     unsigned lower_limit,
                                     unsigned higher_limit,
                                     Label* on_in_range) {
2890
  ASM_CODE_COMMENT(this);
2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902
  if (lower_limit != 0) {
    UseScratchRegisterScope temps(this);
    Register scratch = temps.AcquireW();
    Sub(scratch, value, Operand(lower_limit));
    CompareAndBranch(scratch, Operand(higher_limit - lower_limit), ls,
                     on_in_range);
  } else {
    CompareAndBranch(value, Operand(higher_limit - lower_limit), ls,
                     on_in_range);
  }
}

2903 2904
void TurboAssembler::LoadTaggedPointerField(const Register& destination,
                                            const MemOperand& field_operand) {
2905 2906 2907 2908 2909
  if (COMPRESS_POINTERS_BOOL) {
    DecompressTaggedPointer(destination, field_operand);
  } else {
    Ldr(destination, field_operand);
  }
2910 2911 2912 2913
}

void TurboAssembler::LoadAnyTaggedField(const Register& destination,
                                        const MemOperand& field_operand) {
2914 2915 2916 2917 2918
  if (COMPRESS_POINTERS_BOOL) {
    DecompressAnyTagged(destination, field_operand);
  } else {
    Ldr(destination, field_operand);
  }
2919 2920
}

2921 2922 2923 2924 2925 2926 2927 2928 2929
void TurboAssembler::LoadTaggedSignedField(const Register& destination,
                                           const MemOperand& field_operand) {
  if (COMPRESS_POINTERS_BOOL) {
    DecompressTaggedSigned(destination, field_operand);
  } else {
    Ldr(destination, field_operand);
  }
}

2930 2931 2932 2933
void TurboAssembler::SmiUntagField(Register dst, const MemOperand& src) {
  SmiUntag(dst, src);
}

2934 2935
void TurboAssembler::StoreTaggedField(const Register& value,
                                      const MemOperand& dst_field_operand) {
2936 2937 2938 2939 2940
  if (COMPRESS_POINTERS_BOOL) {
    Str(value.W(), dst_field_operand);
  } else {
    Str(value, dst_field_operand);
  }
2941 2942
}

2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954
void TurboAssembler::AtomicStoreTaggedField(const Register& value,
                                            const Register& dst_base,
                                            const Register& dst_index,
                                            const Register& temp) {
  Add(temp, dst_base, dst_index);
  if (COMPRESS_POINTERS_BOOL) {
    Stlr(value.W(), temp);
  } else {
    Stlr(value, temp);
  }
}

2955 2956
void TurboAssembler::DecompressTaggedSigned(const Register& destination,
                                            const MemOperand& field_operand) {
2957
  ASM_CODE_COMMENT(this);
2958
  Ldr(destination.W(), field_operand);
2959 2960 2961 2962 2963
  if (FLAG_debug_code) {
    // Corrupt the top 32 bits. Made up of 16 fixed bits and 16 pc offset bits.
    Add(destination, destination,
        ((kDebugZapValue << 16) | (pc_offset() & 0xffff)) << 32);
  }
2964 2965 2966 2967
}

void TurboAssembler::DecompressTaggedPointer(const Register& destination,
                                             const MemOperand& field_operand) {
2968
  ASM_CODE_COMMENT(this);
2969
  Ldr(destination.W(), field_operand);
2970
  Add(destination, kPtrComprCageBaseRegister, destination);
2971 2972
}

2973 2974
void TurboAssembler::DecompressTaggedPointer(const Register& destination,
                                             const Register& source) {
2975
  ASM_CODE_COMMENT(this);
2976
  Add(destination, kPtrComprCageBaseRegister, Operand(source, UXTW));
2977 2978
}

2979 2980
void TurboAssembler::DecompressAnyTagged(const Register& destination,
                                         const MemOperand& field_operand) {
2981
  ASM_CODE_COMMENT(this);
2982
  Ldr(destination.W(), field_operand);
2983
  Add(destination, kPtrComprCageBaseRegister, destination);
2984
}
2985

2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019
void TurboAssembler::AtomicDecompressTaggedSigned(const Register& destination,
                                                  const Register& base,
                                                  const Register& index,
                                                  const Register& temp) {
  ASM_CODE_COMMENT(this);
  Add(temp, base, index);
  Ldar(destination.W(), temp);
  if (FLAG_debug_code) {
    // Corrupt the top 32 bits. Made up of 16 fixed bits and 16 pc offset bits.
    Add(destination, destination,
        ((kDebugZapValue << 16) | (pc_offset() & 0xffff)) << 32);
  }
}

void TurboAssembler::AtomicDecompressTaggedPointer(const Register& destination,
                                                   const Register& base,
                                                   const Register& index,
                                                   const Register& temp) {
  ASM_CODE_COMMENT(this);
  Add(temp, base, index);
  Ldar(destination.W(), temp);
  Add(destination, kPtrComprCageBaseRegister, destination);
}

void TurboAssembler::AtomicDecompressAnyTagged(const Register& destination,
                                               const Register& base,
                                               const Register& index,
                                               const Register& temp) {
  ASM_CODE_COMMENT(this);
  Add(temp, base, index);
  Ldar(destination.W(), temp);
  Add(destination, kPtrComprCageBaseRegister, destination);
}

3020
void TurboAssembler::CheckPageFlag(const Register& object, int mask,
3021
                                   Condition cc, Label* condition_met) {
3022
  ASM_CODE_COMMENT(this);
3023 3024
  UseScratchRegisterScope temps(this);
  Register scratch = temps.AcquireX();
3025
  And(scratch, object, ~kPageAlignmentMask);
3026
  Ldr(scratch, MemOperand(scratch, BasicMemoryChunk::kFlagsOffset));
3027 3028 3029
  if (cc == eq) {
    TestAndBranchIfAnySet(scratch, mask, condition_met);
  } else {
3030
    DCHECK_EQ(cc, ne);
3031 3032 3033
    TestAndBranchIfAllClear(scratch, mask, condition_met);
  }
}
3034

3035
void MacroAssembler::RecordWriteField(Register object, int offset,
3036
                                      Register value,
3037 3038 3039 3040
                                      LinkRegisterStatus lr_status,
                                      SaveFPRegsMode save_fp,
                                      RememberedSetAction remembered_set_action,
                                      SmiCheck smi_check) {
3041
  ASM_CODE_COMMENT(this);
3042
  DCHECK(!AreAliased(object, value));
3043 3044 3045 3046 3047
  // First, check if a write barrier is even needed. The tests below
  // catch stores of Smis.
  Label done;

  // Skip the barrier if writing a smi.
3048
  if (smi_check == SmiCheck::kInline) {
3049 3050 3051 3052
    JumpIfSmi(value, &done);
  }

  // Although the object register is tagged, the offset is relative to the start
3053 3054
  // of the object, so offset must be a multiple of kTaggedSize.
  DCHECK(IsAligned(offset, kTaggedSize));
3055

3056
  if (FLAG_debug_code) {
3057
    ASM_CODE_COMMENT_STRING(this, "Verify slot_address");
3058
    Label ok;
3059 3060
    UseScratchRegisterScope temps(this);
    Register scratch = temps.AcquireX();
3061
    DCHECK(!AreAliased(object, value, scratch));
3062
    Add(scratch, object, offset - kHeapObjectTag);
3063
    Tst(scratch, kTaggedSize - 1);
3064
    B(eq, &ok);
3065
    Abort(AbortReason::kUnalignedCellInWriteBarrier);
3066 3067 3068
    Bind(&ok);
  }

3069
  RecordWrite(object, Operand(offset - kHeapObjectTag), value, lr_status,
3070
              save_fp, remembered_set_action, SmiCheck::kOmit);
3071 3072 3073 3074

  Bind(&done);
}

3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102
void TurboAssembler::LoadExternalPointerField(Register destination,
                                              MemOperand field_operand,
                                              ExternalPointerTag tag,
                                              Register isolate_root) {
  DCHECK(!AreAliased(destination, isolate_root));
  ASM_CODE_COMMENT(this);
#ifdef V8_HEAP_SANDBOX
  UseScratchRegisterScope temps(this);
  Register external_table = temps.AcquireX();
  if (isolate_root == no_reg) {
    DCHECK(root_array_available_);
    isolate_root = kRootRegister;
  }
  Ldr(external_table,
      MemOperand(isolate_root,
                 IsolateData::external_pointer_table_offset() +
                     Internals::kExternalPointerTableBufferOffset));
  Ldr(destination, field_operand);
  Ldr(destination,
      MemOperand(external_table, destination, LSL, kSystemPointerSizeLog2));
  if (tag != 0) {
    And(destination, destination, Immediate(~tag));
  }
#else
  Ldr(destination, field_operand);
#endif  // V8_HEAP_SANDBOX
}

3103 3104
void TurboAssembler::MaybeSaveRegisters(RegList registers) {
  if (registers == 0) return;
3105
  ASM_CODE_COMMENT(this);
3106 3107 3108 3109
  CPURegList regs(CPURegister::kRegister, kXRegSizeInBits, registers);
  // If we were saving LR, we might need to sign it.
  DCHECK(!regs.IncludesAliasOf(lr));
  regs.Align();
3110 3111 3112
  PushCPURegList(regs);
}

3113 3114
void TurboAssembler::MaybeRestoreRegisters(RegList registers) {
  if (registers == 0) return;
3115
  ASM_CODE_COMMENT(this);
3116 3117 3118 3119
  CPURegList regs(CPURegister::kRegister, kXRegSizeInBits, registers);
  // If we were saving LR, we might need to sign it.
  DCHECK(!regs.IncludesAliasOf(lr));
  regs.Align();
3120 3121 3122
  PopCPURegList(regs);
}

3123
void TurboAssembler::CallEphemeronKeyBarrier(Register object, Operand offset,
3124
                                             SaveFPRegsMode fp_mode) {
3125
  ASM_CODE_COMMENT(this);
3126 3127
  RegList registers = WriteBarrierDescriptor::ComputeSavedRegisters(object);
  MaybeSaveRegisters(registers);
3128

3129 3130 3131
  MoveObjectAndSlot(WriteBarrierDescriptor::ObjectRegister(),
                    WriteBarrierDescriptor::SlotAddressRegister(), object,
                    offset);
3132

3133
  Call(isolate()->builtins()->code_handle(
3134
           Builtins::GetEphemeronKeyBarrierStub(fp_mode)),
3135
       RelocInfo::CODE_TARGET);
3136
  MaybeRestoreRegisters(registers);
3137 3138
}

3139
void TurboAssembler::CallRecordWriteStubSaveRegisters(
3140
    Register object, Operand offset, RememberedSetAction remembered_set_action,
3141
    SaveFPRegsMode fp_mode, StubCallMode mode) {
3142
  ASM_CODE_COMMENT(this);
3143 3144
  RegList registers = WriteBarrierDescriptor::ComputeSavedRegisters(object);
  MaybeSaveRegisters(registers);
3145

3146 3147 3148 3149
  Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
  Register slot_address_parameter =
      WriteBarrierDescriptor::SlotAddressRegister();
  MoveObjectAndSlot(object_parameter, slot_address_parameter, object, offset);
3150

3151 3152
  CallRecordWriteStub(object_parameter, slot_address_parameter,
                      remembered_set_action, fp_mode, mode);
3153

3154 3155 3156 3157 3158 3159 3160
  MaybeRestoreRegisters(registers);
}

void TurboAssembler::CallRecordWriteStub(
    Register object, Register slot_address,
    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
    StubCallMode mode) {
3161
  ASM_CODE_COMMENT(this);
3162 3163
  DCHECK_EQ(WriteBarrierDescriptor::ObjectRegister(), object);
  DCHECK_EQ(WriteBarrierDescriptor::SlotAddressRegister(), slot_address);
3164
#if V8_ENABLE_WEBASSEMBLY
3165 3166 3167
  if (mode == StubCallMode::kCallWasmRuntimeStub) {
    auto wasm_target =
        wasm::WasmCode::GetRecordWriteStub(remembered_set_action, fp_mode);
3168
    Call(wasm_target, RelocInfo::WASM_STUB_CALL);
3169 3170 3171
#else
  if (false) {
#endif
3172
  } else {
3173
    Builtin builtin =
3174 3175
        Builtins::GetRecordWriteStub(remembered_set_action, fp_mode);
    if (options().inline_offheap_trampolines) {
3176
      CallBuiltin(builtin);
3177
    } else {
3178
      Handle<Code> code_target = isolate()->builtins()->code_handle(builtin);
3179 3180
      Call(code_target, RelocInfo::CODE_TARGET);
    }
3181
  }
3182
}
3183

3184 3185
void TurboAssembler::MoveObjectAndSlot(Register dst_object, Register dst_slot,
                                       Register object, Operand offset) {
3186
  ASM_CODE_COMMENT(this);
3187 3188 3189 3190 3191 3192 3193 3194 3195 3196 3197 3198 3199 3200 3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218
  DCHECK_NE(dst_object, dst_slot);
  // If `offset` is a register, it cannot overlap with `object`.
  DCHECK_IMPLIES(!offset.IsImmediate(), offset.reg() != object);

  // If the slot register does not overlap with the object register, we can
  // overwrite it.
  if (dst_slot != object) {
    Add(dst_slot, object, offset);
    Mov(dst_object, object);
    return;
  }

  DCHECK_EQ(dst_slot, object);

  // If the destination object register does not overlap with the offset
  // register, we can overwrite it.
  if (offset.IsImmediate() || (offset.reg() != dst_object)) {
    Mov(dst_object, dst_slot);
    Add(dst_slot, dst_slot, offset);
    return;
  }

  DCHECK_EQ(dst_object, offset.reg());

  // We only have `dst_slot` and `dst_object` left as distinct registers so we
  // have to swap them. We write this as a add+sub sequence to avoid using a
  // scratch register.
  Add(dst_slot, dst_slot, dst_object);
  Sub(dst_object, dst_slot, dst_object);
}

// If lr_status is kLRHasBeenSaved, lr will be clobbered.
3219 3220 3221
//
// The register 'object' contains a heap object pointer. The heap object tag is
// shifted away.
3222
void MacroAssembler::RecordWrite(Register object, Operand offset,
3223 3224 3225 3226
                                 Register value, LinkRegisterStatus lr_status,
                                 SaveFPRegsMode fp_mode,
                                 RememberedSetAction remembered_set_action,
                                 SmiCheck smi_check) {
3227
  ASM_CODE_COMMENT(this);
3228
  ASM_LOCATION_IN_ASSEMBLER("MacroAssembler::RecordWrite");
3229
  DCHECK(!AreAliased(object, value));
3230

3231
  if (FLAG_debug_code) {
3232
    ASM_CODE_COMMENT_STRING(this, "Verify slot_address");
3233 3234
    UseScratchRegisterScope temps(this);
    Register temp = temps.AcquireX();
3235
    DCHECK(!AreAliased(object, value, temp));
3236 3237
    Add(temp, object, offset);
    LoadTaggedPointerField(temp, MemOperand(temp));
3238
    Cmp(temp, value);
3239
    Check(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite);
3240 3241
  }

3242
  if ((remembered_set_action == RememberedSetAction::kOmit &&
3243 3244 3245 3246 3247
       !FLAG_incremental_marking) ||
      FLAG_disable_write_barriers) {
    return;
  }

3248 3249 3250 3251
  // First, check if a write barrier is even needed. The tests below
  // catch stores of smis and stores into the young generation.
  Label done;

3252
  if (smi_check == SmiCheck::kInline) {
3253
    DCHECK_EQ(0, kSmiTag);
3254 3255
    JumpIfSmi(value, &done);
  }
3256 3257
  CheckPageFlag(value, MemoryChunk::kPointersToHereAreInterestingMask, ne,
                &done);
3258

3259 3260
  CheckPageFlag(object, MemoryChunk::kPointersFromHereAreInterestingMask, ne,
                &done);
3261 3262 3263

  // Record the actual write.
  if (lr_status == kLRHasNotBeenSaved) {
3264
    Push<TurboAssembler::kSignLR>(padreg, lr);
3265
  }
3266 3267 3268 3269 3270 3271
  Register slot_address = WriteBarrierDescriptor::SlotAddressRegister();
  DCHECK(!AreAliased(object, slot_address, value));
  // TODO(cbruni): Turn offset into int.
  DCHECK(offset.IsImmediate());
  Add(slot_address, object, offset);
  CallRecordWriteStub(object, slot_address, remembered_set_action, fp_mode);
3272
  if (lr_status == kLRHasNotBeenSaved) {
3273
    Pop<TurboAssembler::kAuthLR>(lr, padreg);
3274
  }
3275
  if (FLAG_debug_code) Mov(slot_address, Operand(kZapValue));
3276 3277 3278 3279

  Bind(&done);
}

3280
void TurboAssembler::Assert(Condition cond, AbortReason reason) {
3281
  if (FLAG_debug_code) {
3282 3283 3284 3285
    Check(cond, reason);
  }
}

3286
void TurboAssembler::AssertUnreachable(AbortReason reason) {
3287
  if (FLAG_debug_code) Abort(reason);
3288 3289
}

3290
void TurboAssembler::Check(Condition cond, AbortReason reason) {
3291 3292 3293 3294 3295 3296 3297
  Label ok;
  B(cond, &ok);
  Abort(reason);
  // Will not return here.
  Bind(&ok);
}

3298
void TurboAssembler::Trap() { Brk(0); }
3299
void TurboAssembler::DebugBreak() { Debug("DebugBreak", 0, BREAK); }
3300

3301
void TurboAssembler::Abort(AbortReason reason) {
3302
  ASM_CODE_COMMENT(this);
3303 3304 3305 3306
  if (FLAG_code_comments) {
    RecordComment("Abort message: ");
    RecordComment(GetAbortReason(reason));
  }
3307

3308 3309
  // Avoid emitting call to builtin if requested.
  if (trap_on_abort()) {
3310 3311 3312 3313
    Brk(0);
    return;
  }

3314 3315 3316
  // We need some scratch registers for the MacroAssembler, so make sure we have
  // some. This is safe here because Abort never returns.
  RegList old_tmp_list = TmpList()->list();
3317
  TmpList()->Combine(MacroAssembler::DefaultTmpList());
3318

3319 3320
  if (should_abort_hard()) {
    // We don't care if we constructed a frame. Just pretend we did.
3321
    FrameScope assume_frame(this, StackFrame::NO_FRAME_TYPE);
3322 3323 3324 3325
    Mov(w0, static_cast<int>(reason));
    Call(ExternalReference::abort_with_reason());
    return;
  }
3326

3327 3328
  // Avoid infinite recursion; Push contains some assertions that use Abort.
  HardAbortScope hard_aborts(this);
3329

3330
  Mov(x1, Smi::FromInt(static_cast<int>(reason)));
3331 3332 3333 3334

  if (!has_frame_) {
    // We don't actually want to generate a pile of code for this, so just
    // claim there is a stack frame, without generating one.
3335
    FrameScope scope(this, StackFrame::NO_FRAME_TYPE);
3336
    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
3337
  } else {
3338
    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
3339
  }
3340

3341
  TmpList()->set_list(old_tmp_list);
3342 3343
}

3344
void MacroAssembler::LoadNativeContextSlot(Register dst, int index) {
3345 3346 3347 3348 3349
  LoadMap(dst, cp);
  LoadTaggedPointerField(
      dst, FieldMemOperand(
               dst, Map::kConstructorOrBackPointerOrNativeContextOffset));
  LoadTaggedPointerField(dst, MemOperand(dst, Context::SlotOffset(index)));
3350 3351 3352 3353
}

// This is the main Printf implementation. All other Printf variants call
// PrintfNoPreserve after setting up one or more PreserveRegisterScopes.
3354
void TurboAssembler::PrintfNoPreserve(const char* format,
3355 3356 3357 3358
                                      const CPURegister& arg0,
                                      const CPURegister& arg1,
                                      const CPURegister& arg2,
                                      const CPURegister& arg3) {
3359
  ASM_CODE_COMMENT(this);
3360 3361
  // We cannot handle a caller-saved stack pointer. It doesn't make much sense
  // in most cases anyway, so this restriction shouldn't be too serious.
3362
  DCHECK(!kCallerSaved.IncludesAliasOf(sp));
3363

3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374
  // The provided arguments, and their proper procedure-call standard registers.
  CPURegister args[kPrintfMaxArgCount] = {arg0, arg1, arg2, arg3};
  CPURegister pcs[kPrintfMaxArgCount] = {NoReg, NoReg, NoReg, NoReg};

  int arg_count = kPrintfMaxArgCount;

  // The PCS varargs registers for printf. Note that x0 is used for the printf
  // format string.
  static const CPURegList kPCSVarargs =
      CPURegList(CPURegister::kRegister, kXRegSizeInBits, 1, arg_count);
  static const CPURegList kPCSVarargsFP =
3375
      CPURegList(CPURegister::kVRegister, kDRegSizeInBits, 0, arg_count - 1);
3376 3377 3378 3379

  // We can use caller-saved registers as scratch values, except for the
  // arguments and the PCS registers where they might need to go.
  CPURegList tmp_list = kCallerSaved;
3380
  tmp_list.Remove(x0);  // Used to pass the format string.
3381 3382 3383
  tmp_list.Remove(kPCSVarargs);
  tmp_list.Remove(arg0, arg1, arg2, arg3);

3384
  CPURegList fp_tmp_list = kCallerSavedV;
3385 3386 3387
  fp_tmp_list.Remove(kPCSVarargsFP);
  fp_tmp_list.Remove(arg0, arg1, arg2, arg3);

3388
  // Override the TurboAssembler's scratch register list. The lists will be
3389 3390 3391 3392 3393 3394 3395
  // reset automatically at the end of the UseScratchRegisterScope.
  UseScratchRegisterScope temps(this);
  TmpList()->set_list(tmp_list.list());
  FPTmpList()->set_list(fp_tmp_list.list());

  // Copies of the printf vararg registers that we can pop from.
  CPURegList pcs_varargs = kPCSVarargs;
3396
#ifndef V8_OS_WIN
3397
  CPURegList pcs_varargs_fp = kPCSVarargsFP;
3398
#endif
3399 3400 3401 3402 3403 3404 3405

  // Place the arguments. There are lots of clever tricks and optimizations we
  // could use here, but Printf is a debug tool so instead we just try to keep
  // it simple: Move each input that isn't already in the right place to a
  // scratch register, then move everything back.
  for (unsigned i = 0; i < kPrintfMaxArgCount; i++) {
    // Work out the proper PCS register for this argument.
3406
    if (args[i].IsRegister()) {
3407 3408 3409 3410
      pcs[i] = pcs_varargs.PopLowestIndex().X();
      // We might only need a W register here. We need to know the size of the
      // argument so we can properly encode it for the simulator call.
      if (args[i].Is32Bits()) pcs[i] = pcs[i].W();
3411
    } else if (args[i].IsVRegister()) {
3412
      // In C, floats are always cast to doubles for varargs calls.
3413 3414 3415 3416 3417 3418
#ifdef V8_OS_WIN
      // In case of variadic functions SIMD and Floating-point registers
      // aren't used. The general x0-x7 should be used instead.
      // https://docs.microsoft.com/en-us/cpp/build/arm64-windows-abi-conventions
      pcs[i] = pcs_varargs.PopLowestIndex().X();
#else
3419
      pcs[i] = pcs_varargs_fp.PopLowestIndex().D();
3420
#endif
3421
    } else {
3422
      DCHECK(args[i].IsNone());
3423 3424 3425 3426
      arg_count = i;
      break;
    }

3427 3428 3429 3430 3431 3432 3433 3434
    // If the argument is already in the right place, leave it where it is.
    if (args[i].Aliases(pcs[i])) continue;

    // Otherwise, if the argument is in a PCS argument register, allocate an
    // appropriate scratch register and then move it out of the way.
    if (kPCSVarargs.IncludesAliasOf(args[i]) ||
        kPCSVarargsFP.IncludesAliasOf(args[i])) {
      if (args[i].IsRegister()) {
3435
        Register old_arg = args[i].Reg();
3436 3437 3438 3439
        Register new_arg = temps.AcquireSameSizeAs(old_arg);
        Mov(new_arg, old_arg);
        args[i] = new_arg;
      } else {
3440
        VRegister old_arg = args[i].VReg();
3441
        VRegister new_arg = temps.AcquireSameSizeAs(old_arg);
3442 3443 3444
        Fmov(new_arg, old_arg);
        args[i] = new_arg;
      }
3445 3446 3447
    }
  }

3448 3449 3450
  // Do a second pass to move values into their final positions and perform any
  // conversions that may be required.
  for (int i = 0; i < arg_count; i++) {
3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466
#ifdef V8_OS_WIN
    if (args[i].IsVRegister()) {
      if (pcs[i].SizeInBytes() != args[i].SizeInBytes()) {
        // If the argument is half- or single-precision
        // converts to double-precision before that is
        // moved into the one of X scratch register.
        VRegister temp0 = temps.AcquireD();
        Fcvt(temp0.VReg(), args[i].VReg());
        Fmov(pcs[i].Reg(), temp0);
      } else {
        Fmov(pcs[i].Reg(), args[i].VReg());
      }
    } else {
      Mov(pcs[i].Reg(), args[i].Reg(), kDiscardForSameWReg);
    }
#else
3467
    DCHECK(pcs[i].type() == args[i].type());
3468
    if (pcs[i].IsRegister()) {
3469
      Mov(pcs[i].Reg(), args[i].Reg(), kDiscardForSameWReg);
3470
    } else {
3471
      DCHECK(pcs[i].IsVRegister());
3472
      if (pcs[i].SizeInBytes() == args[i].SizeInBytes()) {
3473
        Fmov(pcs[i].VReg(), args[i].VReg());
3474
      } else {
3475
        Fcvt(pcs[i].VReg(), args[i].VReg());
3476 3477
      }
    }
3478
#endif
3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490
  }

  // Load the format string into x0, as per the procedure-call standard.
  //
  // To make the code as portable as possible, the format string is encoded
  // directly in the instruction stream. It might be cleaner to encode it in a
  // literal pool, but since Printf is usually used for debugging, it is
  // beneficial for it to be minimally dependent on other features.
  Label format_address;
  Adr(x0, &format_address);

  // Emit the format string directly in the instruction stream.
3491 3492
  {
    BlockPoolsScope scope(this);
3493 3494 3495 3496 3497 3498 3499 3500
    Label after_data;
    B(&after_data);
    Bind(&format_address);
    EmitStringData(format);
    Unreachable();
    Bind(&after_data);
  }

3501
  CallPrintf(arg_count, pcs);
3502 3503
}

3504
void TurboAssembler::CallPrintf(int arg_count, const CPURegister* args) {
3505
  ASM_CODE_COMMENT(this);
3506 3507 3508 3509
  // A call to printf needs special handling for the simulator, since the system
  // printf function will use a different instruction set and the procedure-call
  // standard will not be compatible.
  if (options().enable_simulator_code) {
3510
    InstructionAccurateScope scope(this, kPrintfLength / kInstrSize);
3511
    hlt(kImmExceptionIsPrintf);
3512
    dc32(arg_count);  // kPrintfArgCountOffset
3513 3514 3515 3516 3517 3518 3519 3520

    // Determine the argument pattern.
    uint32_t arg_pattern_list = 0;
    for (int i = 0; i < arg_count; i++) {
      uint32_t arg_pattern;
      if (args[i].IsRegister()) {
        arg_pattern = args[i].Is32Bits() ? kPrintfArgW : kPrintfArgX;
      } else {
3521
        DCHECK(args[i].Is64Bits());
3522 3523
        arg_pattern = kPrintfArgD;
      }
3524
      DCHECK(arg_pattern < (1 << kPrintfArgPatternBits));
3525 3526
      arg_pattern_list |= (arg_pattern << (kPrintfArgPatternBits * i));
    }
3527
    dc32(arg_pattern_list);  // kPrintfArgPatternListOffset
3528
    return;
3529
  }
3530

3531
  Call(ExternalReference::printf_function());
3532 3533
}

3534
void TurboAssembler::Printf(const char* format, CPURegister arg0,
3535
                            CPURegister arg1, CPURegister arg2,
3536
                            CPURegister arg3) {
3537
  ASM_CODE_COMMENT(this);
3538 3539 3540 3541 3542 3543 3544
  // Printf is expected to preserve all registers, so make sure that none are
  // available as scratch registers until we've preserved them.
  RegList old_tmp_list = TmpList()->list();
  RegList old_fp_tmp_list = FPTmpList()->list();
  TmpList()->set_list(0);
  FPTmpList()->set_list(0);

3545
  CPURegList saved_registers = kCallerSaved;
3546
  saved_registers.Align();
3547

3548
  // Preserve all caller-saved registers as well as NZCV.
3549 3550
  // PushCPURegList asserts that the size of each list is a multiple of 16
  // bytes.
3551
  PushCPURegList<kDontStoreLR>(saved_registers);
3552
  PushCPURegList(kCallerSavedV);
3553 3554

  // We can use caller-saved registers as scratch values (except for argN).
3555
  CPURegList tmp_list = saved_registers;
3556
  CPURegList fp_tmp_list = kCallerSavedV;
3557 3558 3559 3560 3561
  tmp_list.Remove(arg0, arg1, arg2, arg3);
  fp_tmp_list.Remove(arg0, arg1, arg2, arg3);
  TmpList()->set_list(tmp_list.list());
  FPTmpList()->set_list(fp_tmp_list.list());

3562 3563
  {
    UseScratchRegisterScope temps(this);
3564 3565 3566
    // If any of the arguments are the current stack pointer, allocate a new
    // register for them, and adjust the value to compensate for pushing the
    // caller-saved registers.
3567 3568 3569 3570
    bool arg0_sp = arg0.is_valid() && sp.Aliases(arg0);
    bool arg1_sp = arg1.is_valid() && sp.Aliases(arg1);
    bool arg2_sp = arg2.is_valid() && sp.Aliases(arg2);
    bool arg3_sp = arg3.is_valid() && sp.Aliases(arg3);
3571 3572 3573 3574
    if (arg0_sp || arg1_sp || arg2_sp || arg3_sp) {
      // Allocate a register to hold the original stack pointer value, to pass
      // to PrintfNoPreserve as an argument.
      Register arg_sp = temps.AcquireX();
3575
      Add(arg_sp, sp,
3576 3577
          saved_registers.TotalSizeInBytes() +
              kCallerSavedV.TotalSizeInBytes());
3578 3579 3580 3581 3582
      if (arg0_sp) arg0 = Register::Create(arg_sp.code(), arg0.SizeInBits());
      if (arg1_sp) arg1 = Register::Create(arg_sp.code(), arg1.SizeInBits());
      if (arg2_sp) arg2 = Register::Create(arg_sp.code(), arg2.SizeInBits());
      if (arg3_sp) arg3 = Register::Create(arg_sp.code(), arg3.SizeInBits());
    }
3583

3584
    // Preserve NZCV.
3585 3586
    {
      UseScratchRegisterScope temps(this);
3587 3588 3589 3590
      Register tmp = temps.AcquireX();
      Mrs(tmp, NZCV);
      Push(tmp, xzr);
    }
3591

3592 3593 3594
    PrintfNoPreserve(format, arg0, arg1, arg2, arg3);

    // Restore NZCV.
3595 3596
    {
      UseScratchRegisterScope temps(this);
3597 3598 3599 3600
      Register tmp = temps.AcquireX();
      Pop(xzr, tmp);
      Msr(NZCV, tmp);
    }
3601 3602
  }

3603
  PopCPURegList(kCallerSavedV);
3604
  PopCPURegList<kDontLoadLR>(saved_registers);
3605 3606 3607

  TmpList()->set_list(old_tmp_list);
  FPTmpList()->set_list(old_fp_tmp_list);
3608 3609
}

3610 3611 3612 3613 3614 3615 3616 3617 3618 3619
UseScratchRegisterScope::~UseScratchRegisterScope() {
  available_->set_list(old_available_);
  availablefp_->set_list(old_availablefp_);
}

Register UseScratchRegisterScope::AcquireSameSizeAs(const Register& reg) {
  int code = AcquireNextAvailable(available_).code();
  return Register::Create(code, reg.SizeInBits());
}

3620
VRegister UseScratchRegisterScope::AcquireSameSizeAs(const VRegister& reg) {
3621
  int code = AcquireNextAvailable(availablefp_).code();
3622
  return VRegister::Create(code, reg.SizeInBits());
3623 3624 3625 3626 3627 3628
}

CPURegister UseScratchRegisterScope::AcquireNextAvailable(
    CPURegList* available) {
  CHECK(!available->IsEmpty());
  CPURegister result = available->PopLowestIndex();
3629
  DCHECK(!AreAliased(result, xzr, sp));
3630 3631 3632
  return result;
}

3633 3634 3635 3636 3637
void TurboAssembler::ComputeCodeStartAddress(const Register& rd) {
  // We can use adr to load a pc relative location.
  adr(rd, -pc_offset());
}

3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648
void TurboAssembler::RestoreFPAndLR() {
  static_assert(StandardFrameConstants::kCallerFPOffset + kSystemPointerSize ==
                    StandardFrameConstants::kCallerPCOffset,
                "Offsets must be consecutive for ldp!");
#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
  // Make sure we can use x16 and x17.
  UseScratchRegisterScope temps(this);
  temps.Exclude(x16, x17);
  // We can load the return address directly into x17.
  Add(x16, fp, StandardFrameConstants::kCallerSPOffset);
  Ldp(fp, x17, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
3649
  Autib1716();
3650 3651 3652 3653 3654 3655
  Mov(lr, x17);
#else
  Ldp(fp, lr, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
#endif
}

3656
#if V8_ENABLE_WEBASSEMBLY
3657 3658 3659 3660 3661 3662
void TurboAssembler::StoreReturnAddressInWasmExitFrame(Label* return_location) {
  UseScratchRegisterScope temps(this);
  temps.Exclude(x16, x17);
  Adr(x17, return_location);
#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
  Add(x16, fp, WasmExitFrameConstants::kCallingPCOffset + kSystemPointerSize);
3663
  Pacib1716();
3664 3665 3666
#endif
  Str(x17, MemOperand(fp, WasmExitFrameConstants::kCallingPCOffset));
}
3667
#endif  // V8_ENABLE_WEBASSEMBLY
3668

3669 3670 3671 3672 3673 3674 3675 3676 3677 3678
void TurboAssembler::PopcntHelper(Register dst, Register src) {
  UseScratchRegisterScope temps(this);
  VRegister scratch = temps.AcquireV(kFormat8B);
  VRegister tmp = src.Is32Bits() ? scratch.S() : scratch.D();
  Fmov(tmp, src);
  Cnt(scratch, scratch);
  Addv(scratch.B(), scratch);
  Fmov(dst, tmp);
}

3679
void TurboAssembler::I64x2BitMask(Register dst, VRegister src) {
3680
  ASM_CODE_COMMENT(this);
3681 3682 3683 3684 3685 3686 3687 3688 3689
  UseScratchRegisterScope scope(this);
  VRegister tmp1 = scope.AcquireV(kFormat2D);
  Register tmp2 = scope.AcquireX();
  Ushr(tmp1.V2D(), src.V2D(), 63);
  Mov(dst.X(), tmp1.D(), 0);
  Mov(tmp2.X(), tmp1.D(), 1);
  Add(dst.W(), dst.W(), Operand(tmp2.W(), LSL, 1));
}

3690
void TurboAssembler::I64x2AllTrue(Register dst, VRegister src) {
3691
  ASM_CODE_COMMENT(this);
3692 3693 3694 3695 3696 3697 3698 3699
  UseScratchRegisterScope scope(this);
  VRegister tmp = scope.AcquireV(kFormat2D);
  Cmeq(tmp.V2D(), src.V2D(), 0);
  Addp(tmp.D(), tmp);
  Fcmp(tmp.D(), tmp.D());
  Cset(dst, eq);
}

3700 3701
}  // namespace internal
}  // namespace v8
3702

3703
#endif  // V8_TARGET_ARCH_ARM64