assembler-arm64.cc 159 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
// Copyright 2013 the V8 project authors. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
//       notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials provided
//       with the distribution.
//     * Neither the name of Google Inc. nor the names of its
//       contributors may be used to endorse or promote products derived
//       from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

29
#if V8_TARGET_ARCH_ARM64
30

31
#include "src/codegen/arm64/assembler-arm64.h"
32

33
#include "src/base/bits.h"
34
#include "src/base/cpu.h"
35
#include "src/codegen/arm64/assembler-arm64-inl.h"
36
#include "src/codegen/register-configuration.h"
37
#include "src/codegen/safepoint-table.h"
38
#include "src/codegen/string-constants.h"
39
#include "src/execution/frame-constants.h"
40 41 42 43

namespace v8 {
namespace internal {

44 45 46
namespace {

#ifdef USE_SIMULATOR
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static unsigned SimulatorFeaturesFromCommandLine() {
  if (strcmp(FLAG_sim_arm64_optional_features, "none") == 0) {
    return 0;
  }
  if (strcmp(FLAG_sim_arm64_optional_features, "all") == 0) {
    return (1u << NUMBER_OF_CPU_FEATURES) - 1;
  }
  fprintf(
      stderr,
      "Error: unrecognised value for --sim-arm64-optional-features ('%s').\n",
      FLAG_sim_arm64_optional_features);
  fprintf(stderr,
          "Supported values are:  none\n"
          "                       all\n");
  FATAL("sim-arm64-optional-features");
}
63
#endif  // USE_SIMULATOR
64 65 66 67 68 69 70 71 72

static constexpr unsigned CpuFeaturesFromCompiler() {
  unsigned features = 0;
#if defined(__ARM_FEATURE_JCVT)
  features |= 1u << JSCVT;
#endif
  return features;
}

73 74
}  // namespace

75
// -----------------------------------------------------------------------------
76
// CpuFeatures implementation.
77

78
void CpuFeatures::ProbeImpl(bool cross_compile) {
79
  // Only use statically determined features for cross compile (snapshot).
80 81 82 83
  if (cross_compile) {
    supported_ |= CpuFeaturesFromCompiler();
    return;
  }
84

85 86 87
  // We used to probe for coherent cache support, but on older CPUs it
  // causes crashes (crbug.com/524337), and newer CPUs don't even have
  // the feature any more.
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103

#ifdef USE_SIMULATOR
  supported_ |= SimulatorFeaturesFromCommandLine();
#else
  // Probe for additional features at runtime.
  base::CPU cpu;
  unsigned runtime = 0;
  if (cpu.has_jscvt()) {
    runtime |= 1u << JSCVT;
  }

  // Use the best of the features found by CPU detection and those inferred from
  // the build system.
  supported_ |= CpuFeaturesFromCompiler();
  supported_ |= runtime;
#endif  // USE_SIMULATOR
104 105
}

106
void CpuFeatures::PrintTarget() {}
107
void CpuFeatures::PrintFeatures() {}
108

109 110 111 112 113 114 115
// -----------------------------------------------------------------------------
// CPURegList utilities.

CPURegister CPURegList::PopLowestIndex() {
  if (IsEmpty()) {
    return NoCPUReg;
  }
116
  int index = base::bits::CountTrailingZeros(list_);
117
  DCHECK((1LL << index) & list_);
118 119 120 121 122 123 124 125 126 127
  Remove(index);
  return CPURegister::Create(index, size_, type_);
}

CPURegister CPURegList::PopHighestIndex() {
  if (IsEmpty()) {
    return NoCPUReg;
  }
  int index = CountLeadingZeros(list_, kRegListSizeInBits);
  index = kRegListSizeInBits - 1 - index;
128
  DCHECK((1LL << index) & list_);
129 130 131 132
  Remove(index);
  return CPURegister::Create(index, size_, type_);
}

133 134 135 136 137 138 139 140 141 142 143 144
void CPURegList::Align() {
  // Use padreg, if necessary, to maintain stack alignment.
  if (Count() % 2 != 0) {
    if (IncludesAliasOf(padreg)) {
      Remove(padreg);
    } else {
      Combine(padreg);
    }
  }

  DCHECK_EQ(Count() % 2, 0);
}
145

146
CPURegList CPURegList::GetCalleeSaved(int size) {
147
  return CPURegList(CPURegister::kRegister, size, 19, 28);
148 149
}

150 151
CPURegList CPURegList::GetCalleeSavedV(int size) {
  return CPURegList(CPURegister::kVRegister, size, 8, 15);
152 153
}

154
CPURegList CPURegList::GetCallerSaved(int size) {
155
  // x18 is the platform register and is reserved for the use of platform ABIs.
156
  // Registers x0-x17 are caller-saved.
157
  CPURegList list = CPURegList(CPURegister::kRegister, size, 0, 17);
158 159 160
  return list;
}

161
CPURegList CPURegList::GetCallerSavedV(int size) {
162
  // Registers d0-d7 and d16-d31 are caller-saved.
163 164
  CPURegList list = CPURegList(CPURegister::kVRegister, size, 0, 7);
  list.Combine(CPURegList(CPURegister::kVRegister, size, 16, 31));
165 166 167 168 169 170
  return list;
}

// -----------------------------------------------------------------------------
// Implementation of RelocInfo

171 172 173 174
const int RelocInfo::kApplyMask =
    RelocInfo::ModeMask(RelocInfo::CODE_TARGET) |
    RelocInfo::ModeMask(RelocInfo::RUNTIME_ENTRY) |
    RelocInfo::ModeMask(RelocInfo::INTERNAL_REFERENCE);
175 176 177

bool RelocInfo::IsCodedSpecially() {
  // The deserializer needs to know whether a pointer is specially coded. Being
178 179 180 181 182 183 184 185
  // specially coded on ARM64 means that it is an immediate branch.
  Instruction* instr = reinterpret_cast<Instruction*>(pc_);
  if (instr->IsLdrLiteralX()) {
    return false;
  } else {
    DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
    return true;
  }
186 187
}

188 189
bool RelocInfo::IsInConstantPool() {
  Instruction* instr = reinterpret_cast<Instruction*>(pc_);
190 191 192
  DCHECK_IMPLIES(instr->IsLdrLiteralW(), COMPRESS_POINTERS_BOOL);
  return instr->IsLdrLiteralX() ||
         (COMPRESS_POINTERS_BOOL && instr->IsLdrLiteralW());
193 194
}

195 196
uint32_t RelocInfo::wasm_call_tag() const {
  DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
197 198 199
  Instruction* instr = reinterpret_cast<Instruction*>(pc_);
  if (instr->IsLdrLiteralX()) {
    return static_cast<uint32_t>(
200
        Memory<Address>(Assembler::target_pointer_address_at(pc_)));
201 202
  } else {
    DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
203
    return static_cast<uint32_t>(instr->ImmPCOffset() / kInstrSize);
204 205 206
  }
}

207 208 209 210 211 212 213 214 215 216 217 218
bool AreAliased(const CPURegister& reg1, const CPURegister& reg2,
                const CPURegister& reg3, const CPURegister& reg4,
                const CPURegister& reg5, const CPURegister& reg6,
                const CPURegister& reg7, const CPURegister& reg8) {
  int number_of_valid_regs = 0;
  int number_of_valid_fpregs = 0;

  RegList unique_regs = 0;
  RegList unique_fpregs = 0;

  const CPURegister regs[] = {reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8};

219
  for (unsigned i = 0; i < arraysize(regs); i++) {
220 221
    if (regs[i].IsRegister()) {
      number_of_valid_regs++;
222
      unique_regs |= regs[i].bit();
223
    } else if (regs[i].IsVRegister()) {
224
      number_of_valid_fpregs++;
225
      unique_fpregs |= regs[i].bit();
226
    } else {
227
      DCHECK(!regs[i].is_valid());
228 229 230 231
    }
  }

  int number_of_unique_regs =
232
      CountSetBits(unique_regs, sizeof(unique_regs) * kBitsPerByte);
233
  int number_of_unique_fpregs =
234
      CountSetBits(unique_fpregs, sizeof(unique_fpregs) * kBitsPerByte);
235

236 237
  DCHECK(number_of_valid_regs >= number_of_unique_regs);
  DCHECK(number_of_valid_fpregs >= number_of_unique_fpregs);
238 239 240 241 242 243 244 245 246

  return (number_of_valid_regs != number_of_unique_regs) ||
         (number_of_valid_fpregs != number_of_unique_fpregs);
}

bool AreSameSizeAndType(const CPURegister& reg1, const CPURegister& reg2,
                        const CPURegister& reg3, const CPURegister& reg4,
                        const CPURegister& reg5, const CPURegister& reg6,
                        const CPURegister& reg7, const CPURegister& reg8) {
247
  DCHECK(reg1.is_valid());
248
  bool match = true;
249 250 251 252 253 254 255
  match &= !reg2.is_valid() || reg2.IsSameSizeAndType(reg1);
  match &= !reg3.is_valid() || reg3.IsSameSizeAndType(reg1);
  match &= !reg4.is_valid() || reg4.IsSameSizeAndType(reg1);
  match &= !reg5.is_valid() || reg5.IsSameSizeAndType(reg1);
  match &= !reg6.is_valid() || reg6.IsSameSizeAndType(reg1);
  match &= !reg7.is_valid() || reg7.IsSameSizeAndType(reg1);
  match &= !reg8.is_valid() || reg8.IsSameSizeAndType(reg1);
256 257 258
  return match;
}

259 260
bool AreSameFormat(const VRegister& reg1, const VRegister& reg2,
                   const VRegister& reg3, const VRegister& reg4) {
261 262 263 264
  DCHECK(reg1.is_valid());
  return (!reg2.is_valid() || reg2.IsSameFormat(reg1)) &&
         (!reg3.is_valid() || reg3.IsSameFormat(reg1)) &&
         (!reg4.is_valid() || reg4.IsSameFormat(reg1));
265 266 267 268
}

bool AreConsecutive(const VRegister& reg1, const VRegister& reg2,
                    const VRegister& reg3, const VRegister& reg4) {
269 270 271
  DCHECK(reg1.is_valid());
  if (!reg2.is_valid()) {
    DCHECK(!reg3.is_valid() && !reg4.is_valid());
272 273 274 275 276
    return true;
  } else if (reg2.code() != ((reg1.code() + 1) % kNumberOfVRegisters)) {
    return false;
  }

277 278
  if (!reg3.is_valid()) {
    DCHECK(!reg4.is_valid());
279 280 281 282 283
    return true;
  } else if (reg3.code() != ((reg2.code() + 1) % kNumberOfVRegisters)) {
    return false;
  }

284
  if (!reg4.is_valid()) {
285 286 287 288 289 290 291
    return true;
  } else if (reg4.code() != ((reg3.code() + 1) % kNumberOfVRegisters)) {
    return false;
  }

  return true;
}
292

293
bool Operand::NeedsRelocation(const Assembler* assembler) const {
294 295
  RelocInfo::Mode rmode = immediate_.rmode();

296 297
  if (RelocInfo::IsOnlyForSerializer(rmode)) {
    return assembler->options().record_reloc_info_for_serialization;
298 299
  }

300
  return !RelocInfo::IsNone(rmode);
301 302
}

303
// Assembler
304 305 306
Assembler::Assembler(const AssemblerOptions& options,
                     std::unique_ptr<AssemblerBuffer> buffer)
    : AssemblerBase(options, std::move(buffer)),
307 308
      unresolved_branches_(),
      constpool_(this) {
309
  veneer_pool_blocked_nesting_ = 0;
310
  Reset();
311 312 313 314 315 316

#if defined(V8_OS_WIN)
  if (options.collect_win64_unwind_info) {
    xdata_encoder_ = std::make_unique<win64_unwindinfo::XdataEncoder>(*this);
  }
#endif
317 318 319
}

Assembler::~Assembler() {
320
  DCHECK(constpool_.IsEmpty());
321
  DCHECK_EQ(veneer_pool_blocked_nesting_, 0);
322 323
}

324
void Assembler::AbortedCodeGeneration() { constpool_.Clear(); }
325 326 327

void Assembler::Reset() {
#ifdef DEBUG
328
  DCHECK((pc_ >= buffer_start_) && (pc_ < buffer_start_ + buffer_->size()));
329
  DCHECK_EQ(veneer_pool_blocked_nesting_, 0);
330
  DCHECK(unresolved_branches_.empty());
331
  memset(buffer_start_, 0, pc_ - buffer_start_);
332
#endif
333 334
  pc_ = buffer_start_;
  reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
335
  constpool_.Clear();
336
  next_veneer_pool_check_ = kMaxInt;
337 338
}

339 340 341 342 343 344 345 346
#if defined(V8_OS_WIN)
win64_unwindinfo::BuiltinUnwindInfo Assembler::GetUnwindInfo() const {
  DCHECK(options().collect_win64_unwind_info);
  DCHECK_NOT_NULL(xdata_encoder_);
  return xdata_encoder_->unwinding_info();
}
#endif

347
void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
348
  DCHECK_IMPLIES(isolate == nullptr, heap_object_requests_.empty());
349
  for (auto& request : heap_object_requests_) {
350
    Address pc = reinterpret_cast<Address>(buffer_start_) + request.offset();
351
    switch (request.kind()) {
352
      case HeapObjectRequest::kHeapNumber: {
353 354 355
        Handle<HeapObject> object =
            isolate->factory()->NewHeapNumber<AllocationType::kOld>(
                request.heap_number());
356 357
        EmbeddedObjectIndex index = AddEmbeddedObject(object);
        set_embedded_object_index_referenced_from(pc, index);
358
        break;
359
      }
360 361 362
      case HeapObjectRequest::kStringConstant: {
        const StringConstantBase* str = request.string();
        CHECK_NOT_NULL(str);
363 364 365
        EmbeddedObjectIndex index =
            AddEmbeddedObject(str->AllocateStringConstant(isolate));
        set_embedded_object_index_referenced_from(pc, index);
366 367
        break;
      }
368 369
    }
  }
370
}
371

372 373 374
void Assembler::GetCode(Isolate* isolate, CodeDesc* desc,
                        SafepointTableBuilder* safepoint_table_builder,
                        int handler_table_offset) {
375
  // Emit constant pool if necessary.
376
  ForceConstantPoolEmissionWithoutJump();
377
  DCHECK(constpool_.IsEmpty());
378

379 380
  int code_comments_size = WriteCodeComments();

381
  AllocateAndInstallRequestedHeapObjects(isolate);
382

383
  // Set up code descriptor.
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402
  // TODO(jgruber): Reconsider how these offsets and sizes are maintained up to
  // this point to make CodeDesc initialization less fiddly.

  static constexpr int kConstantPoolSize = 0;
  const int instruction_size = pc_offset();
  const int code_comments_offset = instruction_size - code_comments_size;
  const int constant_pool_offset = code_comments_offset - kConstantPoolSize;
  const int handler_table_offset2 = (handler_table_offset == kNoHandlerTable)
                                        ? constant_pool_offset
                                        : handler_table_offset;
  const int safepoint_table_offset =
      (safepoint_table_builder == kNoSafepointTable)
          ? handler_table_offset2
          : safepoint_table_builder->GetCodeOffset();
  const int reloc_info_offset =
      static_cast<int>(reloc_info_writer.pos() - buffer_->start());
  CodeDesc::Initialize(desc, this, safepoint_table_offset,
                       handler_table_offset2, constant_pool_offset,
                       code_comments_offset, reloc_info_offset);
403 404 405
}

void Assembler::Align(int m) {
406
  DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m));
407 408 409 410 411
  while ((pc_offset() & (m - 1)) != 0) {
    nop();
  }
}

412 413 414 415
void Assembler::CodeTargetAlign() {
  // Preferred alignment of jump targets on some ARM chips.
  Align(8);
}
416

417
void Assembler::CheckLabelLinkChain(Label const* label) {
418 419
#ifdef DEBUG
  if (label->is_linked()) {
420
    static const int kMaxLinksToCheck = 64;  // Avoid O(n2) behaviour.
421
    int links_checked = 0;
422
    int64_t linkoffset = label->pos();
423 424
    bool end_of_chain = false;
    while (!end_of_chain) {
425
      if (++links_checked > kMaxLinksToCheck) break;
426
      Instruction* link = InstructionAt(linkoffset);
427 428
      int64_t linkpcoffset = link->ImmPCOffset();
      int64_t prevlinkoffset = linkoffset + linkpcoffset;
429

430
      end_of_chain = (linkoffset == prevlinkoffset);
431 432 433 434 435 436
      linkoffset = linkoffset + linkpcoffset;
    }
  }
#endif
}

437 438 439
void Assembler::RemoveBranchFromLabelLinkChain(Instruction* branch,
                                               Label* label,
                                               Instruction* label_veneer) {
440
  DCHECK(label->is_linked());
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455

  CheckLabelLinkChain(label);

  Instruction* link = InstructionAt(label->pos());
  Instruction* prev_link = link;
  Instruction* next_link;
  bool end_of_chain = false;

  while (link != branch && !end_of_chain) {
    next_link = link->ImmPCOffsetTarget();
    end_of_chain = (link == next_link);
    prev_link = link;
    link = next_link;
  }

456
  DCHECK(branch == link);
457 458 459 460 461 462 463 464 465
  next_link = branch->ImmPCOffsetTarget();

  if (branch == prev_link) {
    // The branch is the first instruction in the chain.
    if (branch == next_link) {
      // It is also the last instruction in the chain, so it is the only branch
      // currently referring to this label.
      label->Unuse();
    } else {
466
      label->link_to(
467
          static_cast<int>(reinterpret_cast<byte*>(next_link) - buffer_start_));
468 469 470 471
    }

  } else if (branch == next_link) {
    // The branch is the last (but not also the first) instruction in the chain.
472
    prev_link->SetImmPCOffsetTarget(options(), prev_link);
473 474 475 476

  } else {
    // The branch is in the middle of the chain.
    if (prev_link->IsTargetInImmPCOffsetRange(next_link)) {
477
      prev_link->SetImmPCOffsetTarget(options(), next_link);
478
    } else if (label_veneer != nullptr) {
479
      // Use the veneer for all previous links in the chain.
480
      prev_link->SetImmPCOffsetTarget(options(), prev_link);
481 482 483 484 485 486

      end_of_chain = false;
      link = next_link;
      while (!end_of_chain) {
        next_link = link->ImmPCOffsetTarget();
        end_of_chain = (link == next_link);
487
        link->SetImmPCOffsetTarget(options(), label_veneer);
488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516
        link = next_link;
      }
    } else {
      // The assert below will fire.
      // Some other work could be attempted to fix up the chain, but it would be
      // rather complicated. If we crash here, we may want to consider using an
      // other mechanism than a chain of branches.
      //
      // Note that this situation currently should not happen, as we always call
      // this function with a veneer to the target label.
      // However this could happen with a MacroAssembler in the following state:
      //    [previous code]
      //    B(label);
      //    [20KB code]
      //    Tbz(label);   // First tbz. Pointing to unconditional branch.
      //    [20KB code]
      //    Tbz(label);   // Second tbz. Pointing to the first tbz.
      //    [more code]
      // and this function is called to remove the first tbz from the label link
      // chain. Since tbz has a range of +-32KB, the second tbz cannot point to
      // the unconditional branch.
      CHECK(prev_link->IsTargetInImmPCOffsetRange(next_link));
      UNREACHABLE();
    }
  }

  CheckLabelLinkChain(label);
}

517 518 519 520 521
void Assembler::bind(Label* label) {
  // Bind label to the address at pc_. All instructions (most likely branches)
  // that are linked to this label will be updated to point to the newly-bound
  // label.

522 523
  DCHECK(!label->is_near_linked());
  DCHECK(!label->is_bound());
524

525 526
  DeleteUnresolvedBranchInfoForLabel(label);

527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
  // If the label is linked, the link chain looks something like this:
  //
  // |--I----I-------I-------L
  // |---------------------->| pc_offset
  // |-------------->|         linkoffset = label->pos()
  //         |<------|         link->ImmPCOffset()
  // |------>|                 prevlinkoffset = linkoffset + link->ImmPCOffset()
  //
  // On each iteration, the last link is updated and then removed from the
  // chain until only one remains. At that point, the label is bound.
  //
  // If the label is not linked, no preparation is required before binding.
  while (label->is_linked()) {
    int linkoffset = label->pos();
    Instruction* link = InstructionAt(linkoffset);
542
    int prevlinkoffset = linkoffset + static_cast<int>(link->ImmPCOffset());
543 544 545

    CheckLabelLinkChain(label);

546
    DCHECK_GE(linkoffset, 0);
547 548
    DCHECK(linkoffset < pc_offset());
    DCHECK((linkoffset > prevlinkoffset) ||
549
           (linkoffset - prevlinkoffset == kStartOfLabelLinkChain));
550
    DCHECK_GE(prevlinkoffset, 0);
551 552

    // Update the link to point to the label.
553 554 555 556
    if (link->IsUnresolvedInternalReference()) {
      // Internal references do not get patched to an instruction but directly
      // to an address.
      internal_reference_positions_.push_back(linkoffset);
557
      PatchingAssembler patcher(options(), reinterpret_cast<byte*>(link), 2);
558 559
      patcher.dc64(reinterpret_cast<uintptr_t>(pc_));
    } else {
560
      link->SetImmPCOffsetTarget(options(),
561
                                 reinterpret_cast<Instruction*>(pc_));
562
    }
563 564 565 566 567 568 569 570 571 572 573 574

    // Link the label to the previous link in the chain.
    if (linkoffset - prevlinkoffset == kStartOfLabelLinkChain) {
      // We hit kStartOfLabelLinkChain, so the chain is fully processed.
      label->Unuse();
    } else {
      // Update the label for the next iteration.
      label->link_to(prevlinkoffset);
    }
  }
  label->bind_to(pc_offset());

575 576
  DCHECK(label->is_bound());
  DCHECK(!label->is_linked());
577 578 579
}

int Assembler::LinkAndGetByteOffsetTo(Label* label) {
580
  DCHECK_EQ(sizeof(*pc_), 1);
581 582 583 584 585 586 587 588 589 590 591 592 593 594
  CheckLabelLinkChain(label);

  int offset;
  if (label->is_bound()) {
    // The label is bound, so it does not need to be updated. Referring
    // instructions must link directly to the label as they will not be
    // updated.
    //
    // In this case, label->pos() returns the offset of the label from the
    // start of the buffer.
    //
    // Note that offset can be zero for self-referential instructions. (This
    // could be useful for ADR, for example.)
    offset = label->pos() - pc_offset();
595
    DCHECK_LE(offset, 0);
596 597 598 599 600 601 602 603
  } else {
    if (label->is_linked()) {
      // The label is linked, so the referring instruction should be added onto
      // the end of the label's link chain.
      //
      // In this case, label->pos() returns the offset of the last linked
      // instruction from the start of the buffer.
      offset = label->pos() - pc_offset();
604
      DCHECK_NE(offset, kStartOfLabelLinkChain);
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
      // Note that the offset here needs to be PC-relative only so that the
      // first instruction in a buffer can link to an unbound label. Otherwise,
      // the offset would be 0 for this case, and 0 is reserved for
      // kStartOfLabelLinkChain.
    } else {
      // The label is unused, so it now becomes linked and the referring
      // instruction is at the start of the new link chain.
      offset = kStartOfLabelLinkChain;
    }
    // The instruction at pc is now the last link in the label's chain.
    label->link_to(pc_offset());
  }

  return offset;
}

621
void Assembler::DeleteUnresolvedBranchInfoForLabelTraverse(Label* label) {
622
  DCHECK(label->is_linked());
623 624 625 626 627 628 629
  CheckLabelLinkChain(label);

  int link_offset = label->pos();
  int link_pcoffset;
  bool end_of_chain = false;

  while (!end_of_chain) {
630
    Instruction* link = InstructionAt(link_offset);
631
    link_pcoffset = static_cast<int>(link->ImmPCOffset());
632 633 634

    // ADR instructions are not handled by veneers.
    if (link->IsImmBranch()) {
635 636 637
      int max_reachable_pc =
          static_cast<int>(InstructionOffset(link) +
                           Instruction::ImmBranchRange(link->BranchType()));
638
      using unresolved_info_it = std::multimap<int, FarBranchInfo>::iterator;
639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654
      std::pair<unresolved_info_it, unresolved_info_it> range;
      range = unresolved_branches_.equal_range(max_reachable_pc);
      unresolved_info_it it;
      for (it = range.first; it != range.second; ++it) {
        if (it->second.pc_offset_ == link_offset) {
          unresolved_branches_.erase(it);
          break;
        }
      }
    }

    end_of_chain = (link_pcoffset == 0);
    link_offset = link_offset + link_pcoffset;
  }
}

655
void Assembler::DeleteUnresolvedBranchInfoForLabel(Label* label) {
656
  if (unresolved_branches_.empty()) {
657
    DCHECK_EQ(next_veneer_pool_check_, kMaxInt);
658 659 660
    return;
  }

661
  if (label->is_linked()) {
662 663 664
    // Branches to this label will be resolved when the label is bound, normally
    // just after all the associated info has been deleted.
    DeleteUnresolvedBranchInfoForLabelTraverse(label);
665
  }
666 667 668 669
  if (unresolved_branches_.empty()) {
    next_veneer_pool_check_ = kMaxInt;
  } else {
    next_veneer_pool_check_ =
670
        unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
671
  }
672 673
}

674 675 676 677
bool Assembler::IsConstantPoolAt(Instruction* instr) {
  // The constant pool marker is made of two instructions. These instructions
  // will never be emitted by the JIT, so checking for the first one is enough:
  // 0: ldr xzr, #<size of pool>
678
  bool result = instr->IsLdrLiteralX() && (instr->Rt() == kZeroRegCode);
679 680 681

  // It is still worth asserting the marker is complete.
  // 4: blr xzr
682
  DCHECK(!result || (instr->following()->IsBranchAndLinkToRegister() &&
683
                     instr->following()->Rn() == kZeroRegCode));
684 685 686 687 688

  return result;
}

int Assembler::ConstantPoolSizeAt(Instruction* instr) {
689 690 691 692 693 694
#ifdef USE_SIMULATOR
  // Assembler::debug() embeds constants directly into the instruction stream.
  // Although this is not a genuine constant pool, treat it like one to avoid
  // disassembling the constants.
  if ((instr->Mask(ExceptionMask) == HLT) &&
      (instr->ImmException() == kImmExceptionIsDebug)) {
695 696
    const char* message = reinterpret_cast<const char*>(
        instr->InstructionAtOffset(kDebugMessageOffset));
697
    int size = static_cast<int>(kDebugMessageOffset + strlen(message) + 1);
698
    return RoundUp(size, kInstrSize) / kInstrSize;
699 700 701 702
  }
  // Same for printf support, see MacroAssembler::CallPrintf().
  if ((instr->Mask(ExceptionMask) == HLT) &&
      (instr->ImmException() == kImmExceptionIsPrintf)) {
703
    return kPrintfLength / kInstrSize;
704 705
  }
#endif
706 707 708 709 710 711 712
  if (IsConstantPoolAt(instr)) {
    return instr->ImmLLiteral();
  } else {
    return -1;
  }
}

713 714 715 716 717 718
void Assembler::EmitPoolGuard() {
  // We must generate only one instruction as this is used in scopes that
  // control the size of the code generated.
  Emit(BLR | Rn(xzr));
}

719
void Assembler::StartBlockVeneerPool() { ++veneer_pool_blocked_nesting_; }
720 721 722 723

void Assembler::EndBlockVeneerPool() {
  if (--veneer_pool_blocked_nesting_ == 0) {
    // Check the veneer pool hasn't been blocked for too long.
724
    DCHECK(unresolved_branches_.empty() ||
725 726 727 728
           (pc_offset() < unresolved_branches_first_limit()));
  }
}

729
void Assembler::br(const Register& xn) {
730
  DCHECK(xn.Is64Bits());
731 732 733 734
  Emit(BR | Rn(xn));
}

void Assembler::blr(const Register& xn) {
735
  DCHECK(xn.Is64Bits());
736 737
  // The pattern 'blr xzr' is used as a guard to detect when execution falls
  // through the constant pool. It should not be emitted.
738
  DCHECK_NE(xn, xzr);
739 740 741 742
  Emit(BLR | Rn(xn));
}

void Assembler::ret(const Register& xn) {
743
  DCHECK(xn.Is64Bits());
744 745 746
  Emit(RET | Rn(xn));
}

747
void Assembler::b(int imm26) { Emit(B | ImmUncondBranch(imm26)); }
748

749
void Assembler::b(Label* label) { b(LinkAndGetInstructionOffsetTo(label)); }
750 751 752 753 754 755 756 757 758

void Assembler::b(int imm19, Condition cond) {
  Emit(B_cond | ImmCondBranch(imm19) | cond);
}

void Assembler::b(Label* label, Condition cond) {
  b(LinkAndGetInstructionOffsetTo(label), cond);
}

759
void Assembler::bl(int imm26) { Emit(BL | ImmUncondBranch(imm26)); }
760

761
void Assembler::bl(Label* label) { bl(LinkAndGetInstructionOffsetTo(label)); }
762

763
void Assembler::cbz(const Register& rt, int imm19) {
764 765 766
  Emit(SF(rt) | CBZ | ImmCmpBranch(imm19) | Rt(rt));
}

767
void Assembler::cbz(const Register& rt, Label* label) {
768 769 770
  cbz(rt, LinkAndGetInstructionOffsetTo(label));
}

771
void Assembler::cbnz(const Register& rt, int imm19) {
772 773 774
  Emit(SF(rt) | CBNZ | ImmCmpBranch(imm19) | Rt(rt));
}

775
void Assembler::cbnz(const Register& rt, Label* label) {
776 777 778
  cbnz(rt, LinkAndGetInstructionOffsetTo(label));
}

779
void Assembler::tbz(const Register& rt, unsigned bit_pos, int imm14) {
780
  DCHECK(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSizeInBits)));
781 782 783
  Emit(TBZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
}

784
void Assembler::tbz(const Register& rt, unsigned bit_pos, Label* label) {
785 786 787
  tbz(rt, bit_pos, LinkAndGetInstructionOffsetTo(label));
}

788
void Assembler::tbnz(const Register& rt, unsigned bit_pos, int imm14) {
789
  DCHECK(rt.Is64Bits() || (rt.Is32Bits() && (bit_pos < kWRegSizeInBits)));
790 791 792
  Emit(TBNZ | ImmTestBranchBit(bit_pos) | ImmTestBranch(imm14) | Rt(rt));
}

793
void Assembler::tbnz(const Register& rt, unsigned bit_pos, Label* label) {
794 795 796 797
  tbnz(rt, bit_pos, LinkAndGetInstructionOffsetTo(label));
}

void Assembler::adr(const Register& rd, int imm21) {
798
  DCHECK(rd.Is64Bits());
799 800 801 802 803 804 805
  Emit(ADR | ImmPCRelAddress(imm21) | Rd(rd));
}

void Assembler::adr(const Register& rd, Label* label) {
  adr(rd, LinkAndGetByteOffsetTo(label));
}

806 807 808 809 810
void Assembler::nop(NopMarkerTypes n) {
  DCHECK((FIRST_NOP_MARKER <= n) && (n <= LAST_NOP_MARKER));
  mov(Register::XRegFromCode(n), Register::XRegFromCode(n));
}

811
void Assembler::add(const Register& rd, const Register& rn,
812 813 814 815
                    const Operand& operand) {
  AddSub(rd, rn, operand, LeaveFlags, ADD);
}

816
void Assembler::adds(const Register& rd, const Register& rn,
817 818 819 820
                     const Operand& operand) {
  AddSub(rd, rn, operand, SetFlags, ADD);
}

821
void Assembler::cmn(const Register& rn, const Operand& operand) {
822 823 824 825
  Register zr = AppropriateZeroRegFor(rn);
  adds(zr, rn, operand);
}

826
void Assembler::sub(const Register& rd, const Register& rn,
827 828 829 830
                    const Operand& operand) {
  AddSub(rd, rn, operand, LeaveFlags, SUB);
}

831
void Assembler::subs(const Register& rd, const Register& rn,
832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850
                     const Operand& operand) {
  AddSub(rd, rn, operand, SetFlags, SUB);
}

void Assembler::cmp(const Register& rn, const Operand& operand) {
  Register zr = AppropriateZeroRegFor(rn);
  subs(zr, rn, operand);
}

void Assembler::neg(const Register& rd, const Operand& operand) {
  Register zr = AppropriateZeroRegFor(rd);
  sub(rd, zr, operand);
}

void Assembler::negs(const Register& rd, const Operand& operand) {
  Register zr = AppropriateZeroRegFor(rd);
  subs(rd, zr, operand);
}

851
void Assembler::adc(const Register& rd, const Register& rn,
852 853 854 855
                    const Operand& operand) {
  AddSubWithCarry(rd, rn, operand, LeaveFlags, ADC);
}

856
void Assembler::adcs(const Register& rd, const Register& rn,
857 858 859 860
                     const Operand& operand) {
  AddSubWithCarry(rd, rn, operand, SetFlags, ADC);
}

861
void Assembler::sbc(const Register& rd, const Register& rn,
862 863 864 865
                    const Operand& operand) {
  AddSubWithCarry(rd, rn, operand, LeaveFlags, SBC);
}

866
void Assembler::sbcs(const Register& rd, const Register& rn,
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
                     const Operand& operand) {
  AddSubWithCarry(rd, rn, operand, SetFlags, SBC);
}

void Assembler::ngc(const Register& rd, const Operand& operand) {
  Register zr = AppropriateZeroRegFor(rd);
  sbc(rd, zr, operand);
}

void Assembler::ngcs(const Register& rd, const Operand& operand) {
  Register zr = AppropriateZeroRegFor(rd);
  sbcs(rd, zr, operand);
}

// Logical instructions.
882
void Assembler::and_(const Register& rd, const Register& rn,
883 884 885 886
                     const Operand& operand) {
  Logical(rd, rn, operand, AND);
}

887
void Assembler::ands(const Register& rd, const Register& rn,
888 889 890 891
                     const Operand& operand) {
  Logical(rd, rn, operand, ANDS);
}

892
void Assembler::tst(const Register& rn, const Operand& operand) {
893 894 895
  ands(AppropriateZeroRegFor(rn), rn, operand);
}

896
void Assembler::bic(const Register& rd, const Register& rn,
897 898 899 900
                    const Operand& operand) {
  Logical(rd, rn, operand, BIC);
}

901
void Assembler::bics(const Register& rd, const Register& rn,
902 903 904 905
                     const Operand& operand) {
  Logical(rd, rn, operand, BICS);
}

906
void Assembler::orr(const Register& rd, const Register& rn,
907 908 909 910
                    const Operand& operand) {
  Logical(rd, rn, operand, ORR);
}

911
void Assembler::orn(const Register& rd, const Register& rn,
912 913 914 915
                    const Operand& operand) {
  Logical(rd, rn, operand, ORN);
}

916
void Assembler::eor(const Register& rd, const Register& rn,
917 918 919 920
                    const Operand& operand) {
  Logical(rd, rn, operand, EOR);
}

921
void Assembler::eon(const Register& rd, const Register& rn,
922 923 924 925
                    const Operand& operand) {
  Logical(rd, rn, operand, EON);
}

926
void Assembler::lslv(const Register& rd, const Register& rn,
927
                     const Register& rm) {
928 929
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(rd.SizeInBits() == rm.SizeInBits());
930 931 932
  Emit(SF(rd) | LSLV | Rm(rm) | Rn(rn) | Rd(rd));
}

933
void Assembler::lsrv(const Register& rd, const Register& rn,
934
                     const Register& rm) {
935 936
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(rd.SizeInBits() == rm.SizeInBits());
937 938 939
  Emit(SF(rd) | LSRV | Rm(rm) | Rn(rn) | Rd(rd));
}

940
void Assembler::asrv(const Register& rd, const Register& rn,
941
                     const Register& rm) {
942 943
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(rd.SizeInBits() == rm.SizeInBits());
944 945 946
  Emit(SF(rd) | ASRV | Rm(rm) | Rn(rn) | Rd(rd));
}

947
void Assembler::rorv(const Register& rd, const Register& rn,
948
                     const Register& rm) {
949 950
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(rd.SizeInBits() == rm.SizeInBits());
951 952 953 954
  Emit(SF(rd) | RORV | Rm(rm) | Rn(rn) | Rd(rd));
}

// Bitfield operations.
955 956
void Assembler::bfm(const Register& rd, const Register& rn, int immr,
                    int imms) {
957
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
958
  Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
959 960
  Emit(SF(rd) | BFM | N | ImmR(immr, rd.SizeInBits()) |
       ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
961 962
}

963 964
void Assembler::sbfm(const Register& rd, const Register& rn, int immr,
                     int imms) {
965
  DCHECK(rd.Is64Bits() || rn.Is32Bits());
966
  Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
967 968
  Emit(SF(rd) | SBFM | N | ImmR(immr, rd.SizeInBits()) |
       ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
969 970
}

971 972
void Assembler::ubfm(const Register& rd, const Register& rn, int immr,
                     int imms) {
973
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
974
  Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
975 976
  Emit(SF(rd) | UBFM | N | ImmR(immr, rd.SizeInBits()) |
       ImmS(imms, rn.SizeInBits()) | Rn(rn) | Rd(rd));
977 978
}

979 980
void Assembler::extr(const Register& rd, const Register& rn, const Register& rm,
                     int lsb) {
981 982
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(rd.SizeInBits() == rm.SizeInBits());
983
  Instr N = SF(rd) >> (kSFOffset - kBitfieldNOffset);
984 985
  Emit(SF(rd) | EXTR | N | Rm(rm) | ImmS(lsb, rn.SizeInBits()) | Rn(rn) |
       Rd(rd));
986 987
}

988
void Assembler::csel(const Register& rd, const Register& rn, const Register& rm,
989 990 991 992
                     Condition cond) {
  ConditionalSelect(rd, rn, rm, cond, CSEL);
}

993 994
void Assembler::csinc(const Register& rd, const Register& rn,
                      const Register& rm, Condition cond) {
995 996 997
  ConditionalSelect(rd, rn, rm, cond, CSINC);
}

998 999
void Assembler::csinv(const Register& rd, const Register& rn,
                      const Register& rm, Condition cond) {
1000 1001 1002
  ConditionalSelect(rd, rn, rm, cond, CSINV);
}

1003 1004
void Assembler::csneg(const Register& rd, const Register& rn,
                      const Register& rm, Condition cond) {
1005 1006 1007
  ConditionalSelect(rd, rn, rm, cond, CSNEG);
}

1008
void Assembler::cset(const Register& rd, Condition cond) {
1009
  DCHECK((cond != al) && (cond != nv));
1010
  Register zr = AppropriateZeroRegFor(rd);
1011
  csinc(rd, zr, zr, NegateCondition(cond));
1012 1013
}

1014
void Assembler::csetm(const Register& rd, Condition cond) {
1015
  DCHECK((cond != al) && (cond != nv));
1016
  Register zr = AppropriateZeroRegFor(rd);
1017
  csinv(rd, zr, zr, NegateCondition(cond));
1018 1019
}

1020
void Assembler::cinc(const Register& rd, const Register& rn, Condition cond) {
1021
  DCHECK((cond != al) && (cond != nv));
1022
  csinc(rd, rn, rn, NegateCondition(cond));
1023 1024
}

1025
void Assembler::cinv(const Register& rd, const Register& rn, Condition cond) {
1026
  DCHECK((cond != al) && (cond != nv));
1027
  csinv(rd, rn, rn, NegateCondition(cond));
1028 1029
}

1030
void Assembler::cneg(const Register& rd, const Register& rn, Condition cond) {
1031
  DCHECK((cond != al) && (cond != nv));
1032
  csneg(rd, rn, rn, NegateCondition(cond));
1033 1034
}

1035 1036
void Assembler::ConditionalSelect(const Register& rd, const Register& rn,
                                  const Register& rm, Condition cond,
1037
                                  ConditionalSelectOp op) {
1038 1039
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(rd.SizeInBits() == rm.SizeInBits());
1040 1041 1042
  Emit(SF(rd) | op | Rm(rm) | Cond(cond) | Rn(rn) | Rd(rd));
}

1043 1044
void Assembler::ccmn(const Register& rn, const Operand& operand,
                     StatusFlags nzcv, Condition cond) {
1045 1046 1047
  ConditionalCompare(rn, operand, nzcv, cond, CCMN);
}

1048 1049
void Assembler::ccmp(const Register& rn, const Operand& operand,
                     StatusFlags nzcv, Condition cond) {
1050 1051 1052
  ConditionalCompare(rn, operand, nzcv, cond, CCMP);
}

1053 1054
void Assembler::DataProcessing3Source(const Register& rd, const Register& rn,
                                      const Register& rm, const Register& ra,
1055 1056 1057 1058
                                      DataProcessing3SourceOp op) {
  Emit(SF(rd) | op | Rm(rm) | Ra(ra) | Rn(rn) | Rd(rd));
}

1059
void Assembler::mul(const Register& rd, const Register& rn,
1060
                    const Register& rm) {
1061
  DCHECK(AreSameSizeAndType(rd, rn, rm));
1062 1063 1064 1065
  Register zr = AppropriateZeroRegFor(rn);
  DataProcessing3Source(rd, rn, rm, zr, MADD);
}

1066
void Assembler::madd(const Register& rd, const Register& rn, const Register& rm,
1067
                     const Register& ra) {
1068
  DCHECK(AreSameSizeAndType(rd, rn, rm, ra));
1069 1070 1071
  DataProcessing3Source(rd, rn, rm, ra, MADD);
}

1072
void Assembler::mneg(const Register& rd, const Register& rn,
1073
                     const Register& rm) {
1074
  DCHECK(AreSameSizeAndType(rd, rn, rm));
1075 1076 1077 1078
  Register zr = AppropriateZeroRegFor(rn);
  DataProcessing3Source(rd, rn, rm, zr, MSUB);
}

1079
void Assembler::msub(const Register& rd, const Register& rn, const Register& rm,
1080
                     const Register& ra) {
1081
  DCHECK(AreSameSizeAndType(rd, rn, rm, ra));
1082 1083 1084
  DataProcessing3Source(rd, rn, rm, ra, MSUB);
}

1085 1086
void Assembler::smaddl(const Register& rd, const Register& rn,
                       const Register& rm, const Register& ra) {
1087 1088
  DCHECK(rd.Is64Bits() && ra.Is64Bits());
  DCHECK(rn.Is32Bits() && rm.Is32Bits());
1089 1090 1091
  DataProcessing3Source(rd, rn, rm, ra, SMADDL_x);
}

1092 1093
void Assembler::smsubl(const Register& rd, const Register& rn,
                       const Register& rm, const Register& ra) {
1094 1095
  DCHECK(rd.Is64Bits() && ra.Is64Bits());
  DCHECK(rn.Is32Bits() && rm.Is32Bits());
1096 1097 1098
  DataProcessing3Source(rd, rn, rm, ra, SMSUBL_x);
}

1099 1100
void Assembler::umaddl(const Register& rd, const Register& rn,
                       const Register& rm, const Register& ra) {
1101 1102
  DCHECK(rd.Is64Bits() && ra.Is64Bits());
  DCHECK(rn.Is32Bits() && rm.Is32Bits());
1103 1104 1105
  DataProcessing3Source(rd, rn, rm, ra, UMADDL_x);
}

1106 1107
void Assembler::umsubl(const Register& rd, const Register& rn,
                       const Register& rm, const Register& ra) {
1108 1109
  DCHECK(rd.Is64Bits() && ra.Is64Bits());
  DCHECK(rn.Is32Bits() && rm.Is32Bits());
1110 1111 1112
  DataProcessing3Source(rd, rn, rm, ra, UMSUBL_x);
}

1113
void Assembler::smull(const Register& rd, const Register& rn,
1114
                      const Register& rm) {
1115 1116
  DCHECK(rd.Is64Bits());
  DCHECK(rn.Is32Bits() && rm.Is32Bits());
1117 1118 1119
  DataProcessing3Source(rd, rn, rm, xzr, SMADDL_x);
}

1120
void Assembler::smulh(const Register& rd, const Register& rn,
1121
                      const Register& rm) {
1122
  DCHECK(AreSameSizeAndType(rd, rn, rm));
1123 1124 1125
  DataProcessing3Source(rd, rn, rm, xzr, SMULH_x);
}

1126
void Assembler::sdiv(const Register& rd, const Register& rn,
1127
                     const Register& rm) {
1128 1129
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(rd.SizeInBits() == rm.SizeInBits());
1130 1131 1132
  Emit(SF(rd) | SDIV | Rm(rm) | Rn(rn) | Rd(rd));
}

1133
void Assembler::udiv(const Register& rd, const Register& rn,
1134
                     const Register& rm) {
1135 1136
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(rd.SizeInBits() == rm.SizeInBits());
1137 1138 1139
  Emit(SF(rd) | UDIV | Rm(rm) | Rn(rn) | Rd(rd));
}

1140
void Assembler::rbit(const Register& rd, const Register& rn) {
1141 1142 1143
  DataProcessing1Source(rd, rn, RBIT);
}

1144
void Assembler::rev16(const Register& rd, const Register& rn) {
1145 1146 1147
  DataProcessing1Source(rd, rn, REV16);
}

1148
void Assembler::rev32(const Register& rd, const Register& rn) {
1149
  DCHECK(rd.Is64Bits());
1150 1151 1152
  DataProcessing1Source(rd, rn, REV);
}

1153
void Assembler::rev(const Register& rd, const Register& rn) {
1154 1155 1156
  DataProcessing1Source(rd, rn, rd.Is64Bits() ? REV_x : REV_w);
}

1157
void Assembler::clz(const Register& rd, const Register& rn) {
1158 1159 1160
  DataProcessing1Source(rd, rn, CLZ);
}

1161
void Assembler::cls(const Register& rd, const Register& rn) {
1162 1163 1164
  DataProcessing1Source(rd, rn, CLS);
}

1165 1166 1167 1168
void Assembler::pacib1716() { Emit(PACIB1716); }
void Assembler::autib1716() { Emit(AUTIB1716); }
void Assembler::pacibsp() { Emit(PACIBSP); }
void Assembler::autibsp() { Emit(AUTIBSP); }
1169

1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185
void Assembler::bti(BranchTargetIdentifier id) {
  SystemHint op;
  switch (id) {
    case BranchTargetIdentifier::kBti:
      op = BTI;
      break;
    case BranchTargetIdentifier::kBtiCall:
      op = BTI_c;
      break;
    case BranchTargetIdentifier::kBtiJump:
      op = BTI_j;
      break;
    case BranchTargetIdentifier::kBtiJumpCall:
      op = BTI_jc;
      break;
    case BranchTargetIdentifier::kNone:
1186
    case BranchTargetIdentifier::kPacibsp:
1187
      // We always want to generate a BTI instruction here, so disallow
1188
      // skipping its generation or generating a PACIBSP instead.
1189 1190 1191 1192 1193
      UNREACHABLE();
  }
  hint(op);
}

1194
void Assembler::ldp(const CPURegister& rt, const CPURegister& rt2,
1195 1196 1197 1198
                    const MemOperand& src) {
  LoadStorePair(rt, rt2, src, LoadPairOpFor(rt, rt2));
}

1199
void Assembler::stp(const CPURegister& rt, const CPURegister& rt2,
1200 1201
                    const MemOperand& dst) {
  LoadStorePair(rt, rt2, dst, StorePairOpFor(rt, rt2));
1202 1203 1204 1205 1206 1207

#if defined(V8_OS_WIN)
  if (xdata_encoder_ && rt == x29 && rt2 == lr && dst.base().IsSP()) {
    xdata_encoder_->onSaveFpLr();
  }
#endif
1208 1209
}

1210
void Assembler::ldpsw(const Register& rt, const Register& rt2,
1211
                      const MemOperand& src) {
1212
  DCHECK(rt.Is64Bits());
1213 1214 1215
  LoadStorePair(rt, rt2, src, LDPSW_x);
}

1216 1217
void Assembler::LoadStorePair(const CPURegister& rt, const CPURegister& rt2,
                              const MemOperand& addr, LoadStorePairOp op) {
1218
  // 'rt' and 'rt2' can only be aliased for stores.
1219
  DCHECK(((op & LoadStorePairLBit) == 0) || rt != rt2);
1220
  DCHECK(AreSameSizeAndType(rt, rt2));
1221 1222
  DCHECK(IsImmLSPair(addr.offset(), CalcLSPairDataSize(op)));
  int offset = static_cast<int>(addr.offset());
1223 1224

  Instr memop = op | Rt(rt) | Rt2(rt2) | RnSP(addr.base()) |
1225
                ImmLSPair(offset, CalcLSPairDataSize(op));
1226 1227 1228 1229 1230 1231

  Instr addrmodeop;
  if (addr.IsImmediateOffset()) {
    addrmodeop = LoadStorePairOffsetFixed;
  } else {
    // Pre-index and post-index modes.
1232 1233
    DCHECK_NE(rt, addr.base());
    DCHECK_NE(rt2, addr.base());
1234
    DCHECK_NE(addr.offset(), 0);
1235 1236 1237
    if (addr.IsPreIndex()) {
      addrmodeop = LoadStorePairPreIndexFixed;
    } else {
1238
      DCHECK(addr.IsPostIndex());
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278
      addrmodeop = LoadStorePairPostIndexFixed;
    }
  }
  Emit(addrmodeop | memop);
}

// Memory instructions.
void Assembler::ldrb(const Register& rt, const MemOperand& src) {
  LoadStore(rt, src, LDRB_w);
}

void Assembler::strb(const Register& rt, const MemOperand& dst) {
  LoadStore(rt, dst, STRB_w);
}

void Assembler::ldrsb(const Register& rt, const MemOperand& src) {
  LoadStore(rt, src, rt.Is64Bits() ? LDRSB_x : LDRSB_w);
}

void Assembler::ldrh(const Register& rt, const MemOperand& src) {
  LoadStore(rt, src, LDRH_w);
}

void Assembler::strh(const Register& rt, const MemOperand& dst) {
  LoadStore(rt, dst, STRH_w);
}

void Assembler::ldrsh(const Register& rt, const MemOperand& src) {
  LoadStore(rt, src, rt.Is64Bits() ? LDRSH_x : LDRSH_w);
}

void Assembler::ldr(const CPURegister& rt, const MemOperand& src) {
  LoadStore(rt, src, LoadOpFor(rt));
}

void Assembler::str(const CPURegister& rt, const MemOperand& src) {
  LoadStore(rt, src, StoreOpFor(rt));
}

void Assembler::ldrsw(const Register& rt, const MemOperand& src) {
1279
  DCHECK(rt.Is64Bits());
1280 1281 1282
  LoadStore(rt, src, LDRSW_x);
}

1283 1284 1285
void Assembler::ldr_pcrel(const CPURegister& rt, int imm19) {
  // The pattern 'ldr xzr, #offset' is used to indicate the beginning of a
  // constant pool. It should not be emitted.
1286
  DCHECK(!rt.IsZero());
1287
  Emit(LoadLiteralOpFor(rt) | ImmLLiteral(imm19) | Rt(rt));
1288 1289
}

1290
Operand Operand::EmbeddedNumber(double number) {
1291
  int32_t smi;
1292
  if (DoubleToSmiInteger(number, &smi)) {
1293 1294
    return Operand(Immediate(Smi::FromInt(smi)));
  }
1295
  Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
1296 1297 1298 1299 1300
  result.heap_object_request_.emplace(number);
  DCHECK(result.IsHeapObjectRequest());
  return result;
}

1301
Operand Operand::EmbeddedStringConstant(const StringConstantBase* str) {
1302
  Operand result(0, RelocInfo::FULL_EMBEDDED_OBJECT);
1303 1304 1305 1306 1307
  result.heap_object_request_.emplace(str);
  DCHECK(result.IsHeapObjectRequest());
  return result;
}

1308
void Assembler::ldr(const CPURegister& rt, const Operand& operand) {
1309
  if (operand.IsHeapObjectRequest()) {
1310
    BlockPoolsScope no_pool_before_ldr_of_heap_object_request(this);
1311 1312
    RequestHeapObject(operand.heap_object_request());
    ldr(rt, operand.immediate_for_heap_object_request());
1313 1314 1315 1316
  } else {
    ldr(rt, operand.immediate());
  }
}
1317

1318
void Assembler::ldr(const CPURegister& rt, const Immediate& imm) {
1319
  BlockPoolsScope no_pool_before_ldr_pcrel_instr(this);
1320 1321 1322 1323
  RecordRelocInfo(imm.rmode(), imm.value());
  // The load will be patched when the constpool is emitted, patching code
  // expect a load literal with offset 0.
  ldr_pcrel(rt, 0);
1324 1325
}

1326 1327 1328
void Assembler::ldar(const Register& rt, const Register& rn) {
  DCHECK(rn.Is64Bits());
  LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? LDAR_w : LDAR_x;
1329
  Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1330 1331 1332 1333 1334
}

void Assembler::ldaxr(const Register& rt, const Register& rn) {
  DCHECK(rn.Is64Bits());
  LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? LDAXR_w : LDAXR_x;
1335
  Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1336 1337 1338 1339 1340
}

void Assembler::stlr(const Register& rt, const Register& rn) {
  DCHECK(rn.Is64Bits());
  LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? STLR_w : STLR_x;
1341
  Emit(op | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1342 1343 1344 1345 1346
}

void Assembler::stlxr(const Register& rs, const Register& rt,
                      const Register& rn) {
  DCHECK(rn.Is64Bits());
1347
  DCHECK(rs != rt && rs != rn);
1348
  LoadStoreAcquireReleaseOp op = rt.Is32Bits() ? STLXR_w : STLXR_x;
1349
  Emit(op | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1350 1351 1352 1353 1354
}

void Assembler::ldarb(const Register& rt, const Register& rn) {
  DCHECK(rt.Is32Bits());
  DCHECK(rn.Is64Bits());
1355
  Emit(LDAR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1356 1357 1358 1359 1360
}

void Assembler::ldaxrb(const Register& rt, const Register& rn) {
  DCHECK(rt.Is32Bits());
  DCHECK(rn.Is64Bits());
1361
  Emit(LDAXR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1362 1363 1364 1365 1366
}

void Assembler::stlrb(const Register& rt, const Register& rn) {
  DCHECK(rt.Is32Bits());
  DCHECK(rn.Is64Bits());
1367
  Emit(STLR_b | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1368 1369 1370 1371 1372 1373 1374
}

void Assembler::stlxrb(const Register& rs, const Register& rt,
                       const Register& rn) {
  DCHECK(rs.Is32Bits());
  DCHECK(rt.Is32Bits());
  DCHECK(rn.Is64Bits());
1375
  DCHECK(rs != rt && rs != rn);
1376
  Emit(STLXR_b | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1377 1378 1379 1380 1381
}

void Assembler::ldarh(const Register& rt, const Register& rn) {
  DCHECK(rt.Is32Bits());
  DCHECK(rn.Is64Bits());
1382
  Emit(LDAR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1383 1384 1385 1386 1387
}

void Assembler::ldaxrh(const Register& rt, const Register& rn) {
  DCHECK(rt.Is32Bits());
  DCHECK(rn.Is64Bits());
1388
  Emit(LDAXR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1389 1390 1391 1392 1393
}

void Assembler::stlrh(const Register& rt, const Register& rn) {
  DCHECK(rt.Is32Bits());
  DCHECK(rn.Is64Bits());
1394
  Emit(STLR_h | Rs(x31) | Rt2(x31) | RnSP(rn) | Rt(rt));
1395 1396 1397 1398 1399 1400 1401
}

void Assembler::stlxrh(const Register& rs, const Register& rt,
                       const Register& rn) {
  DCHECK(rs.Is32Bits());
  DCHECK(rt.Is32Bits());
  DCHECK(rn.Is64Bits());
1402
  DCHECK(rs != rt && rs != rn);
1403
  Emit(STLXR_h | Rs(rs) | Rt2(x31) | RnSP(rn) | Rt(rt));
1404
}
1405

1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
void Assembler::NEON3DifferentL(const VRegister& vd, const VRegister& vn,
                                const VRegister& vm, NEON3DifferentOp vop) {
  DCHECK(AreSameFormat(vn, vm));
  DCHECK((vn.Is1H() && vd.Is1S()) || (vn.Is1S() && vd.Is1D()) ||
         (vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
         (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
         (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
  Instr format, op = vop;
  if (vd.IsScalar()) {
    op |= NEON_Q | NEONScalar;
    format = SFormat(vn);
1417
  } else {
1418
    format = VFormat(vn);
1419
  }
1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511
  Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
}

void Assembler::NEON3DifferentW(const VRegister& vd, const VRegister& vn,
                                const VRegister& vm, NEON3DifferentOp vop) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK((vm.Is8B() && vd.Is8H()) || (vm.Is4H() && vd.Is4S()) ||
         (vm.Is2S() && vd.Is2D()) || (vm.Is16B() && vd.Is8H()) ||
         (vm.Is8H() && vd.Is4S()) || (vm.Is4S() && vd.Is2D()));
  Emit(VFormat(vm) | vop | Rm(vm) | Rn(vn) | Rd(vd));
}

void Assembler::NEON3DifferentHN(const VRegister& vd, const VRegister& vn,
                                 const VRegister& vm, NEON3DifferentOp vop) {
  DCHECK(AreSameFormat(vm, vn));
  DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
         (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
         (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
  Emit(VFormat(vd) | vop | Rm(vm) | Rn(vn) | Rd(vd));
}

#define NEON_3DIFF_LONG_LIST(V)                                                \
  V(pmull, NEON_PMULL, vn.IsVector() && vn.Is8B())                             \
  V(pmull2, NEON_PMULL2, vn.IsVector() && vn.Is16B())                          \
  V(saddl, NEON_SADDL, vn.IsVector() && vn.IsD())                              \
  V(saddl2, NEON_SADDL2, vn.IsVector() && vn.IsQ())                            \
  V(sabal, NEON_SABAL, vn.IsVector() && vn.IsD())                              \
  V(sabal2, NEON_SABAL2, vn.IsVector() && vn.IsQ())                            \
  V(uabal, NEON_UABAL, vn.IsVector() && vn.IsD())                              \
  V(uabal2, NEON_UABAL2, vn.IsVector() && vn.IsQ())                            \
  V(sabdl, NEON_SABDL, vn.IsVector() && vn.IsD())                              \
  V(sabdl2, NEON_SABDL2, vn.IsVector() && vn.IsQ())                            \
  V(uabdl, NEON_UABDL, vn.IsVector() && vn.IsD())                              \
  V(uabdl2, NEON_UABDL2, vn.IsVector() && vn.IsQ())                            \
  V(smlal, NEON_SMLAL, vn.IsVector() && vn.IsD())                              \
  V(smlal2, NEON_SMLAL2, vn.IsVector() && vn.IsQ())                            \
  V(umlal, NEON_UMLAL, vn.IsVector() && vn.IsD())                              \
  V(umlal2, NEON_UMLAL2, vn.IsVector() && vn.IsQ())                            \
  V(smlsl, NEON_SMLSL, vn.IsVector() && vn.IsD())                              \
  V(smlsl2, NEON_SMLSL2, vn.IsVector() && vn.IsQ())                            \
  V(umlsl, NEON_UMLSL, vn.IsVector() && vn.IsD())                              \
  V(umlsl2, NEON_UMLSL2, vn.IsVector() && vn.IsQ())                            \
  V(smull, NEON_SMULL, vn.IsVector() && vn.IsD())                              \
  V(smull2, NEON_SMULL2, vn.IsVector() && vn.IsQ())                            \
  V(umull, NEON_UMULL, vn.IsVector() && vn.IsD())                              \
  V(umull2, NEON_UMULL2, vn.IsVector() && vn.IsQ())                            \
  V(ssubl, NEON_SSUBL, vn.IsVector() && vn.IsD())                              \
  V(ssubl2, NEON_SSUBL2, vn.IsVector() && vn.IsQ())                            \
  V(uaddl, NEON_UADDL, vn.IsVector() && vn.IsD())                              \
  V(uaddl2, NEON_UADDL2, vn.IsVector() && vn.IsQ())                            \
  V(usubl, NEON_USUBL, vn.IsVector() && vn.IsD())                              \
  V(usubl2, NEON_USUBL2, vn.IsVector() && vn.IsQ())                            \
  V(sqdmlal, NEON_SQDMLAL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S())   \
  V(sqdmlal2, NEON_SQDMLAL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
  V(sqdmlsl, NEON_SQDMLSL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S())   \
  V(sqdmlsl2, NEON_SQDMLSL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S()) \
  V(sqdmull, NEON_SQDMULL, vn.Is1H() || vn.Is1S() || vn.Is4H() || vn.Is2S())   \
  V(sqdmull2, NEON_SQDMULL2, vn.Is1H() || vn.Is1S() || vn.Is8H() || vn.Is4S())

#define DEFINE_ASM_FUNC(FN, OP, AS)                            \
  void Assembler::FN(const VRegister& vd, const VRegister& vn, \
                     const VRegister& vm) {                    \
    DCHECK(AS);                                                \
    NEON3DifferentL(vd, vn, vm, OP);                           \
  }
NEON_3DIFF_LONG_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC

#define NEON_3DIFF_HN_LIST(V)        \
  V(addhn, NEON_ADDHN, vd.IsD())     \
  V(addhn2, NEON_ADDHN2, vd.IsQ())   \
  V(raddhn, NEON_RADDHN, vd.IsD())   \
  V(raddhn2, NEON_RADDHN2, vd.IsQ()) \
  V(subhn, NEON_SUBHN, vd.IsD())     \
  V(subhn2, NEON_SUBHN2, vd.IsQ())   \
  V(rsubhn, NEON_RSUBHN, vd.IsD())   \
  V(rsubhn2, NEON_RSUBHN2, vd.IsQ())

#define DEFINE_ASM_FUNC(FN, OP, AS)                            \
  void Assembler::FN(const VRegister& vd, const VRegister& vn, \
                     const VRegister& vm) {                    \
    DCHECK(AS);                                                \
    NEON3DifferentHN(vd, vn, vm, OP);                          \
  }
NEON_3DIFF_HN_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC

void Assembler::NEONPerm(const VRegister& vd, const VRegister& vn,
                         const VRegister& vm, NEONPermOp op) {
  DCHECK(AreSameFormat(vd, vn, vm));
  DCHECK(!vd.Is1D());
  Emit(VFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
1512 1513
}

1514 1515 1516 1517
void Assembler::trn1(const VRegister& vd, const VRegister& vn,
                     const VRegister& vm) {
  NEONPerm(vd, vn, vm, NEON_TRN1);
}
1518

1519 1520 1521
void Assembler::trn2(const VRegister& vd, const VRegister& vn,
                     const VRegister& vm) {
  NEONPerm(vd, vn, vm, NEON_TRN2);
1522 1523
}

1524 1525 1526 1527
void Assembler::uzp1(const VRegister& vd, const VRegister& vn,
                     const VRegister& vm) {
  NEONPerm(vd, vn, vm, NEON_UZP1);
}
1528

1529 1530 1531
void Assembler::uzp2(const VRegister& vd, const VRegister& vn,
                     const VRegister& vm) {
  NEONPerm(vd, vn, vm, NEON_UZP2);
1532 1533
}

1534 1535 1536 1537
void Assembler::zip1(const VRegister& vd, const VRegister& vn,
                     const VRegister& vm) {
  NEONPerm(vd, vn, vm, NEON_ZIP1);
}
1538

1539 1540 1541
void Assembler::zip2(const VRegister& vd, const VRegister& vn,
                     const VRegister& vm) {
  NEONPerm(vd, vn, vm, NEON_ZIP2);
1542 1543
}

1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607
void Assembler::NEONShiftImmediate(const VRegister& vd, const VRegister& vn,
                                   NEONShiftImmediateOp op, int immh_immb) {
  DCHECK(AreSameFormat(vd, vn));
  Instr q, scalar;
  if (vn.IsScalar()) {
    q = NEON_Q;
    scalar = NEONScalar;
  } else {
    q = vd.IsD() ? 0 : NEON_Q;
    scalar = 0;
  }
  Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
}

void Assembler::NEONShiftLeftImmediate(const VRegister& vd, const VRegister& vn,
                                       int shift, NEONShiftImmediateOp op) {
  int laneSizeInBits = vn.LaneSizeInBits();
  DCHECK((shift >= 0) && (shift < laneSizeInBits));
  NEONShiftImmediate(vd, vn, op, (laneSizeInBits + shift) << 16);
}

void Assembler::NEONShiftRightImmediate(const VRegister& vd,
                                        const VRegister& vn, int shift,
                                        NEONShiftImmediateOp op) {
  int laneSizeInBits = vn.LaneSizeInBits();
  DCHECK((shift >= 1) && (shift <= laneSizeInBits));
  NEONShiftImmediate(vd, vn, op, ((2 * laneSizeInBits) - shift) << 16);
}

void Assembler::NEONShiftImmediateL(const VRegister& vd, const VRegister& vn,
                                    int shift, NEONShiftImmediateOp op) {
  int laneSizeInBits = vn.LaneSizeInBits();
  DCHECK((shift >= 0) && (shift < laneSizeInBits));
  int immh_immb = (laneSizeInBits + shift) << 16;

  DCHECK((vn.Is8B() && vd.Is8H()) || (vn.Is4H() && vd.Is4S()) ||
         (vn.Is2S() && vd.Is2D()) || (vn.Is16B() && vd.Is8H()) ||
         (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
  Instr q;
  q = vn.IsD() ? 0 : NEON_Q;
  Emit(q | op | immh_immb | Rn(vn) | Rd(vd));
}

void Assembler::NEONShiftImmediateN(const VRegister& vd, const VRegister& vn,
                                    int shift, NEONShiftImmediateOp op) {
  Instr q, scalar;
  int laneSizeInBits = vd.LaneSizeInBits();
  DCHECK((shift >= 1) && (shift <= laneSizeInBits));
  int immh_immb = (2 * laneSizeInBits - shift) << 16;

  if (vn.IsScalar()) {
    DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
           (vd.Is1S() && vn.Is1D()));
    q = NEON_Q;
    scalar = NEONScalar;
  } else {
    DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
           (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
           (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
    scalar = 0;
    q = vd.IsD() ? 0 : NEON_Q;
  }
  Emit(q | op | scalar | immh_immb | Rn(vn) | Rd(vd));
}
1608

1609 1610 1611
void Assembler::shl(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftLeftImmediate(vd, vn, shift, NEON_SHL);
1612 1613
}

1614 1615 1616 1617
void Assembler::sli(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftLeftImmediate(vd, vn, shift, NEON_SLI);
}
1618

1619 1620
void Assembler::sqshl(const VRegister& vd, const VRegister& vn, int shift) {
  NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHL_imm);
1621 1622
}

1623 1624 1625
void Assembler::sqshlu(const VRegister& vd, const VRegister& vn, int shift) {
  NEONShiftLeftImmediate(vd, vn, shift, NEON_SQSHLU);
}
1626

1627 1628
void Assembler::uqshl(const VRegister& vd, const VRegister& vn, int shift) {
  NEONShiftLeftImmediate(vd, vn, shift, NEON_UQSHL_imm);
1629 1630
}

1631 1632 1633 1634
void Assembler::sshll(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsD());
  NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
}
1635

1636 1637 1638
void Assembler::sshll2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsQ());
  NEONShiftImmediateL(vd, vn, shift, NEON_SSHLL);
1639 1640
}

1641 1642 1643
void Assembler::sxtl(const VRegister& vd, const VRegister& vn) {
  sshll(vd, vn, 0);
}
1644

1645 1646
void Assembler::sxtl2(const VRegister& vd, const VRegister& vn) {
  sshll2(vd, vn, 0);
1647 1648
}

1649 1650 1651 1652
void Assembler::ushll(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsD());
  NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
}
1653

1654 1655 1656
void Assembler::ushll2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsQ());
  NEONShiftImmediateL(vd, vn, shift, NEON_USHLL);
1657 1658
}

1659 1660 1661
void Assembler::uxtl(const VRegister& vd, const VRegister& vn) {
  ushll(vd, vn, 0);
}
1662

1663 1664
void Assembler::uxtl2(const VRegister& vd, const VRegister& vn) {
  ushll2(vd, vn, 0);
1665 1666
}

1667 1668 1669 1670
void Assembler::sri(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_SRI);
}
1671

1672 1673 1674
void Assembler::sshr(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_SSHR);
1675 1676
}

1677 1678 1679 1680
void Assembler::ushr(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_USHR);
}
1681

1682 1683 1684
void Assembler::srshr(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_SRSHR);
1685 1686
}

1687 1688 1689 1690
void Assembler::urshr(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_URSHR);
}
1691

1692 1693 1694
void Assembler::ssra(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_SSRA);
1695 1696
}

1697 1698 1699 1700
void Assembler::usra(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_USRA);
}
1701

1702 1703 1704
void Assembler::srsra(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_SRSRA);
1705 1706
}

1707 1708 1709 1710
void Assembler::ursra(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEONShiftRightImmediate(vd, vn, shift, NEON_URSRA);
}
1711

1712 1713 1714
void Assembler::shrn(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsD());
  NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
1715 1716
}

1717 1718 1719 1720
void Assembler::shrn2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsQ());
  NEONShiftImmediateN(vd, vn, shift, NEON_SHRN);
}
1721

1722 1723 1724
void Assembler::rshrn(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsD());
  NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
1725 1726
}

1727 1728 1729 1730
void Assembler::rshrn2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsQ());
  NEONShiftImmediateN(vd, vn, shift, NEON_RSHRN);
}
1731

1732 1733 1734
void Assembler::sqshrn(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
  NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
1735 1736
}

1737 1738 1739 1740
void Assembler::sqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsQ());
  NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRN);
}
1741

1742 1743 1744
void Assembler::sqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
  NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
1745 1746
}

1747 1748 1749 1750
void Assembler::sqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsQ());
  NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRN);
}
1751

1752 1753 1754
void Assembler::sqshrun(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
  NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
1755 1756
}

1757 1758 1759 1760
void Assembler::sqshrun2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsQ());
  NEONShiftImmediateN(vd, vn, shift, NEON_SQSHRUN);
}
1761

1762 1763 1764
void Assembler::sqrshrun(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
  NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
1765 1766
}

1767 1768 1769 1770
void Assembler::sqrshrun2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsQ());
  NEONShiftImmediateN(vd, vn, shift, NEON_SQRSHRUN);
}
1771

1772 1773 1774
void Assembler::uqshrn(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
  NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
1775 1776
}

1777 1778 1779 1780
void Assembler::uqshrn2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsQ());
  NEONShiftImmediateN(vd, vn, shift, NEON_UQSHRN);
}
1781

1782 1783 1784
void Assembler::uqrshrn(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vd.IsD() || (vn.IsScalar() && vd.IsScalar()));
  NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
1785 1786
}

1787 1788 1789 1790
void Assembler::uqrshrn2(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK(vn.IsVector() && vd.IsQ());
  NEONShiftImmediateN(vd, vn, shift, NEON_UQRSHRN);
}
1791

1792 1793 1794 1795
void Assembler::uaddw(const VRegister& vd, const VRegister& vn,
                      const VRegister& vm) {
  DCHECK(vm.IsD());
  NEON3DifferentW(vd, vn, vm, NEON_UADDW);
1796 1797
}

1798 1799 1800 1801 1802
void Assembler::uaddw2(const VRegister& vd, const VRegister& vn,
                       const VRegister& vm) {
  DCHECK(vm.IsQ());
  NEON3DifferentW(vd, vn, vm, NEON_UADDW2);
}
1803

1804 1805 1806 1807
void Assembler::saddw(const VRegister& vd, const VRegister& vn,
                      const VRegister& vm) {
  DCHECK(vm.IsD());
  NEON3DifferentW(vd, vn, vm, NEON_SADDW);
1808 1809
}

1810 1811 1812 1813 1814
void Assembler::saddw2(const VRegister& vd, const VRegister& vn,
                       const VRegister& vm) {
  DCHECK(vm.IsQ());
  NEON3DifferentW(vd, vn, vm, NEON_SADDW2);
}
1815

1816 1817 1818 1819
void Assembler::usubw(const VRegister& vd, const VRegister& vn,
                      const VRegister& vm) {
  DCHECK(vm.IsD());
  NEON3DifferentW(vd, vn, vm, NEON_USUBW);
1820 1821
}

1822 1823 1824 1825 1826
void Assembler::usubw2(const VRegister& vd, const VRegister& vn,
                       const VRegister& vm) {
  DCHECK(vm.IsQ());
  NEON3DifferentW(vd, vn, vm, NEON_USUBW2);
}
1827

1828 1829 1830 1831
void Assembler::ssubw(const VRegister& vd, const VRegister& vn,
                      const VRegister& vm) {
  DCHECK(vm.IsD());
  NEON3DifferentW(vd, vn, vm, NEON_SSUBW);
1832 1833
}

1834 1835 1836 1837 1838
void Assembler::ssubw2(const VRegister& vd, const VRegister& vn,
                       const VRegister& vm) {
  DCHECK(vm.IsQ());
  NEON3DifferentW(vd, vn, vm, NEON_SSUBW2);
}
1839

1840 1841 1842 1843 1844 1845 1846 1847 1848
void Assembler::mov(const Register& rd, const Register& rm) {
  // Moves involving the stack pointer are encoded as add immediate with
  // second operand of zero. Otherwise, orr with first operand zr is
  // used.
  if (rd.IsSP() || rm.IsSP()) {
    add(rd, rm, 0);
  } else {
    orr(rd, AppropriateZeroRegFor(rd), rm);
  }
1849 1850
}

1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874
void Assembler::ins(const VRegister& vd, int vd_index, const Register& rn) {
  // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
  // number of lanes, and T is b, h, s or d.
  int lane_size = vd.LaneSizeInBytes();
  NEONFormatField format;
  switch (lane_size) {
    case 1:
      format = NEON_16B;
      DCHECK(rn.IsW());
      break;
    case 2:
      format = NEON_8H;
      DCHECK(rn.IsW());
      break;
    case 4:
      format = NEON_4S;
      DCHECK(rn.IsW());
      break;
    default:
      DCHECK_EQ(lane_size, 8);
      DCHECK(rn.IsX());
      format = NEON_2D;
      break;
  }
1875

1876 1877 1878
  DCHECK((0 <= vd_index) &&
         (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
  Emit(NEON_INS_GENERAL | ImmNEON5(format, vd_index) | Rn(rn) | Rd(vd));
1879 1880
}

1881 1882 1883 1884
void Assembler::mov(const Register& rd, const VRegister& vn, int vn_index) {
  DCHECK_GE(vn.SizeInBytes(), 4);
  umov(rd, vn, vn_index);
}
1885

1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908
void Assembler::smov(const Register& rd, const VRegister& vn, int vn_index) {
  // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
  // number of lanes, and T is b, h, s.
  int lane_size = vn.LaneSizeInBytes();
  NEONFormatField format;
  Instr q = 0;
  switch (lane_size) {
    case 1:
      format = NEON_16B;
      break;
    case 2:
      format = NEON_8H;
      break;
    default:
      DCHECK_EQ(lane_size, 4);
      DCHECK(rd.IsX());
      format = NEON_4S;
      break;
  }
  q = rd.IsW() ? 0 : NEON_Q;
  DCHECK((0 <= vn_index) &&
         (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
  Emit(q | NEON_SMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
1909 1910
}

1911 1912 1913 1914 1915
void Assembler::cls(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(!vd.Is1D() && !vd.Is2D());
  Emit(VFormat(vn) | NEON_CLS | Rn(vn) | Rd(vd));
}
1916

1917 1918 1919 1920
void Assembler::clz(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(!vd.Is1D() && !vd.Is2D());
  Emit(VFormat(vn) | NEON_CLZ | Rn(vn) | Rd(vd));
1921 1922
}

1923 1924 1925 1926 1927
void Assembler::cnt(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(vd.Is8B() || vd.Is16B());
  Emit(VFormat(vn) | NEON_CNT | Rn(vn) | Rd(vd));
}
1928

1929 1930 1931 1932
void Assembler::rev16(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(vd.Is8B() || vd.Is16B());
  Emit(VFormat(vn) | NEON_REV16 | Rn(vn) | Rd(vd));
1933 1934
}

1935 1936 1937 1938 1939
void Assembler::rev32(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H());
  Emit(VFormat(vn) | NEON_REV32 | Rn(vn) | Rd(vd));
}
1940

1941 1942 1943 1944
void Assembler::rev64(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(!vd.Is1D() && !vd.Is2D());
  Emit(VFormat(vn) | NEON_REV64 | Rn(vn) | Rd(vd));
1945 1946
}

1947 1948 1949 1950 1951
void Assembler::ursqrte(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(vd.Is2S() || vd.Is4S());
  Emit(VFormat(vn) | NEON_URSQRTE | Rn(vn) | Rd(vd));
}
1952

1953 1954 1955 1956
void Assembler::urecpe(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(vd.Is2S() || vd.Is4S());
  Emit(VFormat(vn) | NEON_URECPE | Rn(vn) | Rd(vd));
1957 1958
}

1959 1960 1961 1962
void Assembler::NEONAddlp(const VRegister& vd, const VRegister& vn,
                          NEON2RegMiscOp op) {
  DCHECK((op == NEON_SADDLP) || (op == NEON_UADDLP) || (op == NEON_SADALP) ||
         (op == NEON_UADALP));
1963

1964 1965 1966 1967
  DCHECK((vn.Is8B() && vd.Is4H()) || (vn.Is4H() && vd.Is2S()) ||
         (vn.Is2S() && vd.Is1D()) || (vn.Is16B() && vd.Is8H()) ||
         (vn.Is8H() && vd.Is4S()) || (vn.Is4S() && vd.Is2D()));
  Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
1968 1969
}

1970 1971 1972
void Assembler::saddlp(const VRegister& vd, const VRegister& vn) {
  NEONAddlp(vd, vn, NEON_SADDLP);
}
1973

1974 1975
void Assembler::uaddlp(const VRegister& vd, const VRegister& vn) {
  NEONAddlp(vd, vn, NEON_UADDLP);
1976 1977
}

1978 1979 1980
void Assembler::sadalp(const VRegister& vd, const VRegister& vn) {
  NEONAddlp(vd, vn, NEON_SADALP);
}
1981

1982 1983
void Assembler::uadalp(const VRegister& vd, const VRegister& vn) {
  NEONAddlp(vd, vn, NEON_UADALP);
1984 1985
}

1986 1987 1988 1989 1990 1991 1992
void Assembler::NEONAcrossLanesL(const VRegister& vd, const VRegister& vn,
                                 NEONAcrossLanesOp op) {
  DCHECK((vn.Is8B() && vd.Is1H()) || (vn.Is16B() && vd.Is1H()) ||
         (vn.Is4H() && vd.Is1S()) || (vn.Is8H() && vd.Is1S()) ||
         (vn.Is4S() && vd.Is1D()));
  Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
}
1993

1994 1995
void Assembler::saddlv(const VRegister& vd, const VRegister& vn) {
  NEONAcrossLanesL(vd, vn, NEON_SADDLV);
1996 1997
}

1998 1999 2000
void Assembler::uaddlv(const VRegister& vd, const VRegister& vn) {
  NEONAcrossLanesL(vd, vn, NEON_UADDLV);
}
2001

2002 2003 2004 2005 2006 2007 2008
void Assembler::NEONAcrossLanes(const VRegister& vd, const VRegister& vn,
                                NEONAcrossLanesOp op) {
  DCHECK((vn.Is8B() && vd.Is1B()) || (vn.Is16B() && vd.Is1B()) ||
         (vn.Is4H() && vd.Is1H()) || (vn.Is8H() && vd.Is1H()) ||
         (vn.Is4S() && vd.Is1S()));
  if ((op & NEONAcrossLanesFPFMask) == NEONAcrossLanesFPFixed) {
    Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
2009
  } else {
2010
    Emit(VFormat(vn) | op | Rn(vn) | Rd(vd));
2011
  }
2012 2013
}

2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031 2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062
#define NEON_ACROSSLANES_LIST(V)      \
  V(fmaxv, NEON_FMAXV, vd.Is1S())     \
  V(fminv, NEON_FMINV, vd.Is1S())     \
  V(fmaxnmv, NEON_FMAXNMV, vd.Is1S()) \
  V(fminnmv, NEON_FMINNMV, vd.Is1S()) \
  V(addv, NEON_ADDV, true)            \
  V(smaxv, NEON_SMAXV, true)          \
  V(sminv, NEON_SMINV, true)          \
  V(umaxv, NEON_UMAXV, true)          \
  V(uminv, NEON_UMINV, true)

#define DEFINE_ASM_FUNC(FN, OP, AS)                              \
  void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
    DCHECK(AS);                                                  \
    NEONAcrossLanes(vd, vn, OP);                                 \
  }
NEON_ACROSSLANES_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC

void Assembler::mov(const VRegister& vd, int vd_index, const Register& rn) {
  ins(vd, vd_index, rn);
}

void Assembler::umov(const Register& rd, const VRegister& vn, int vn_index) {
  // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
  // number of lanes, and T is b, h, s or d.
  int lane_size = vn.LaneSizeInBytes();
  NEONFormatField format;
  Instr q = 0;
  switch (lane_size) {
    case 1:
      format = NEON_16B;
      DCHECK(rd.IsW());
      break;
    case 2:
      format = NEON_8H;
      DCHECK(rd.IsW());
      break;
    case 4:
      format = NEON_4S;
      DCHECK(rd.IsW());
      break;
    default:
      DCHECK_EQ(lane_size, 8);
      DCHECK(rd.IsX());
      format = NEON_2D;
      q = NEON_Q;
      break;
  }
2063

2064 2065 2066
  DCHECK((0 <= vn_index) &&
         (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
  Emit(q | NEON_UMOV | ImmNEON5(format, vn_index) | Rn(vn) | Rd(rd));
2067
}
2068

2069 2070 2071 2072
void Assembler::mov(const VRegister& vd, const VRegister& vn, int vn_index) {
  DCHECK(vd.IsScalar());
  dup(vd, vn, vn_index);
}
2073

2074 2075 2076 2077 2078
void Assembler::dup(const VRegister& vd, const Register& rn) {
  DCHECK(!vd.Is1D());
  DCHECK_EQ(vd.Is2D(), rn.IsX());
  Instr q = vd.IsD() ? 0 : NEON_Q;
  Emit(q | NEON_DUP_GENERAL | ImmNEON5(VFormat(vd), 0) | Rn(rn) | Rd(vd));
2079
}
2080

2081 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100 2101 2102
void Assembler::ins(const VRegister& vd, int vd_index, const VRegister& vn,
                    int vn_index) {
  DCHECK(AreSameFormat(vd, vn));
  // We support vd arguments of the form vd.VxT() or vd.T(), where x is the
  // number of lanes, and T is b, h, s or d.
  int lane_size = vd.LaneSizeInBytes();
  NEONFormatField format;
  switch (lane_size) {
    case 1:
      format = NEON_16B;
      break;
    case 2:
      format = NEON_8H;
      break;
    case 4:
      format = NEON_4S;
      break;
    default:
      DCHECK_EQ(lane_size, 8);
      format = NEON_2D;
      break;
  }
2103

2104 2105 2106 2107 2108 2109
  DCHECK((0 <= vd_index) &&
         (vd_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
  DCHECK((0 <= vn_index) &&
         (vn_index < LaneCountFromFormat(static_cast<VectorFormat>(format))));
  Emit(NEON_INS_ELEMENT | ImmNEON5(format, vd_index) |
       ImmNEON4(format, vn_index) | Rn(vn) | Rd(vd));
2110
}
2111

2112 2113 2114 2115 2116 2117 2118
void Assembler::NEONTable(const VRegister& vd, const VRegister& vn,
                          const VRegister& vm, NEONTableOp op) {
  DCHECK(vd.Is16B() || vd.Is8B());
  DCHECK(vn.Is16B());
  DCHECK(AreSameFormat(vd, vm));
  Emit(op | (vd.IsQ() ? NEON_Q : 0) | Rm(vm) | Rn(vn) | Rd(vd));
}
2119

2120 2121 2122
void Assembler::tbl(const VRegister& vd, const VRegister& vn,
                    const VRegister& vm) {
  NEONTable(vd, vn, vm, NEON_TBL_1v);
2123
}
2124

2125 2126 2127 2128 2129 2130 2131
void Assembler::tbl(const VRegister& vd, const VRegister& vn,
                    const VRegister& vn2, const VRegister& vm) {
  USE(vn2);
  DCHECK(AreSameFormat(vn, vn2));
  DCHECK(AreConsecutive(vn, vn2));
  NEONTable(vd, vn, vm, NEON_TBL_2v);
}
2132

2133 2134 2135 2136 2137 2138 2139 2140
void Assembler::tbl(const VRegister& vd, const VRegister& vn,
                    const VRegister& vn2, const VRegister& vn3,
                    const VRegister& vm) {
  USE(vn2);
  USE(vn3);
  DCHECK(AreSameFormat(vn, vn2, vn3));
  DCHECK(AreConsecutive(vn, vn2, vn3));
  NEONTable(vd, vn, vm, NEON_TBL_3v);
2141
}
2142

2143 2144 2145 2146 2147 2148 2149 2150 2151 2152
void Assembler::tbl(const VRegister& vd, const VRegister& vn,
                    const VRegister& vn2, const VRegister& vn3,
                    const VRegister& vn4, const VRegister& vm) {
  USE(vn2);
  USE(vn3);
  USE(vn4);
  DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
  DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
  NEONTable(vd, vn, vm, NEON_TBL_4v);
}
2153

2154 2155 2156
void Assembler::tbx(const VRegister& vd, const VRegister& vn,
                    const VRegister& vm) {
  NEONTable(vd, vn, vm, NEON_TBX_1v);
2157
}
2158

2159 2160 2161 2162 2163 2164 2165
void Assembler::tbx(const VRegister& vd, const VRegister& vn,
                    const VRegister& vn2, const VRegister& vm) {
  USE(vn2);
  DCHECK(AreSameFormat(vn, vn2));
  DCHECK(AreConsecutive(vn, vn2));
  NEONTable(vd, vn, vm, NEON_TBX_2v);
}
2166

2167 2168 2169 2170 2171 2172 2173 2174
void Assembler::tbx(const VRegister& vd, const VRegister& vn,
                    const VRegister& vn2, const VRegister& vn3,
                    const VRegister& vm) {
  USE(vn2);
  USE(vn3);
  DCHECK(AreSameFormat(vn, vn2, vn3));
  DCHECK(AreConsecutive(vn, vn2, vn3));
  NEONTable(vd, vn, vm, NEON_TBX_3v);
2175
}
2176

2177 2178 2179 2180 2181 2182 2183 2184 2185 2186
void Assembler::tbx(const VRegister& vd, const VRegister& vn,
                    const VRegister& vn2, const VRegister& vn3,
                    const VRegister& vn4, const VRegister& vm) {
  USE(vn2);
  USE(vn3);
  USE(vn4);
  DCHECK(AreSameFormat(vn, vn2, vn3, vn4));
  DCHECK(AreConsecutive(vn, vn2, vn3, vn4));
  NEONTable(vd, vn, vm, NEON_TBX_4v);
}
2187

2188 2189 2190
void Assembler::mov(const VRegister& vd, int vd_index, const VRegister& vn,
                    int vn_index) {
  ins(vd, vd_index, vn, vn_index);
2191 2192
}

2193 2194 2195
void Assembler::mvn(const Register& rd, const Operand& operand) {
  orn(rd, AppropriateZeroRegFor(rd), operand);
}
2196

2197 2198 2199 2200 2201 2202 2203 2204
void Assembler::mrs(const Register& rt, SystemRegister sysreg) {
  DCHECK(rt.Is64Bits());
  Emit(MRS | ImmSystemRegister(sysreg) | Rt(rt));
}

void Assembler::msr(SystemRegister sysreg, const Register& rt) {
  DCHECK(rt.Is64Bits());
  Emit(MSR | Rt(rt) | ImmSystemRegister(sysreg));
2205
}
2206

2207
void Assembler::hint(SystemHint code) { Emit(HINT | ImmHint(code) | Rt(xzr)); }
2208

2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224
// NEON structure loads and stores.
Instr Assembler::LoadStoreStructAddrModeField(const MemOperand& addr) {
  Instr addr_field = RnSP(addr.base());

  if (addr.IsPostIndex()) {
    static_assert(NEONLoadStoreMultiStructPostIndex ==
                      static_cast<NEONLoadStoreMultiStructPostIndexOp>(
                          NEONLoadStoreSingleStructPostIndex),
                  "Opcodes must match for NEON post index memop.");

    addr_field |= NEONLoadStoreMultiStructPostIndex;
    if (addr.offset() == 0) {
      addr_field |= RmNot31(addr.regoffset());
    } else {
      // The immediate post index addressing mode is indicated by rm = 31.
      // The immediate is implied by the number of vector registers used.
2225
      addr_field |= (0x1F << Rm_offset);
2226
    }
2227
  } else {
2228
    DCHECK(addr.IsImmediateOffset() && (addr.offset() == 0));
2229
  }
2230
  return addr_field;
2231
}
2232

2233 2234 2235 2236 2237 2238 2239 2240
void Assembler::LoadStoreStructVerify(const VRegister& vt,
                                      const MemOperand& addr, Instr op) {
#ifdef DEBUG
  // Assert that addressing mode is either offset (with immediate 0), post
  // index by immediate of the size of the register list, or post index by a
  // value in a core register.
  if (addr.IsImmediateOffset()) {
    DCHECK_EQ(addr.offset(), 0);
2241
  } else {
2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252
    int offset = vt.SizeInBytes();
    switch (op) {
      case NEON_LD1_1v:
      case NEON_ST1_1v:
        offset *= 1;
        break;
      case NEONLoadStoreSingleStructLoad1:
      case NEONLoadStoreSingleStructStore1:
      case NEON_LD1R:
        offset = (offset / vt.LaneCount()) * 1;
        break;
2253

2254 2255 2256 2257 2258 2259 2260 2261 2262 2263 2264
      case NEON_LD1_2v:
      case NEON_ST1_2v:
      case NEON_LD2:
      case NEON_ST2:
        offset *= 2;
        break;
      case NEONLoadStoreSingleStructLoad2:
      case NEONLoadStoreSingleStructStore2:
      case NEON_LD2R:
        offset = (offset / vt.LaneCount()) * 2;
        break;
2265

2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291
      case NEON_LD1_3v:
      case NEON_ST1_3v:
      case NEON_LD3:
      case NEON_ST3:
        offset *= 3;
        break;
      case NEONLoadStoreSingleStructLoad3:
      case NEONLoadStoreSingleStructStore3:
      case NEON_LD3R:
        offset = (offset / vt.LaneCount()) * 3;
        break;

      case NEON_LD1_4v:
      case NEON_ST1_4v:
      case NEON_LD4:
      case NEON_ST4:
        offset *= 4;
        break;
      case NEONLoadStoreSingleStructLoad4:
      case NEONLoadStoreSingleStructStore4:
      case NEON_LD4R:
        offset = (offset / vt.LaneCount()) * 4;
        break;
      default:
        UNREACHABLE();
    }
2292
    DCHECK(addr.regoffset() != NoReg || addr.offset() == offset);
2293
  }
2294 2295 2296 2297 2298
#else
  USE(vt);
  USE(addr);
  USE(op);
#endif
2299 2300
}

2301 2302 2303 2304 2305 2306
void Assembler::LoadStoreStruct(const VRegister& vt, const MemOperand& addr,
                                NEONLoadStoreMultiStructOp op) {
  LoadStoreStructVerify(vt, addr, op);
  DCHECK(vt.IsVector() || vt.Is1D());
  Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
}
2307

2308 2309 2310 2311 2312
void Assembler::LoadStoreStructSingleAllLanes(const VRegister& vt,
                                              const MemOperand& addr,
                                              NEONLoadStoreSingleStructOp op) {
  LoadStoreStructVerify(vt, addr, op);
  Emit(op | LoadStoreStructAddrModeField(addr) | LSVFormat(vt) | Rt(vt));
2313 2314
}

2315 2316 2317
void Assembler::ld1(const VRegister& vt, const MemOperand& src) {
  LoadStoreStruct(vt, src, NEON_LD1_1v);
}
2318

2319 2320 2321 2322 2323 2324 2325
void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
                    const MemOperand& src) {
  USE(vt2);
  DCHECK(AreSameFormat(vt, vt2));
  DCHECK(AreConsecutive(vt, vt2));
  LoadStoreStruct(vt, src, NEON_LD1_2v);
}
2326

2327 2328 2329 2330 2331 2332 2333
void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  DCHECK(AreSameFormat(vt, vt2, vt3));
  DCHECK(AreConsecutive(vt, vt2, vt3));
  LoadStoreStruct(vt, src, NEON_LD1_3v);
2334 2335
}

2336 2337 2338 2339 2340 2341 2342 2343 2344 2345
void Assembler::ld1(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const VRegister& vt4,
                    const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  USE(vt4);
  DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
  DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
  LoadStoreStruct(vt, src, NEON_LD1_4v);
}
2346

2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584
void Assembler::ld2(const VRegister& vt, const VRegister& vt2,
                    const MemOperand& src) {
  USE(vt2);
  DCHECK(AreSameFormat(vt, vt2));
  DCHECK(AreConsecutive(vt, vt2));
  LoadStoreStruct(vt, src, NEON_LD2);
}

void Assembler::ld2(const VRegister& vt, const VRegister& vt2, int lane,
                    const MemOperand& src) {
  USE(vt2);
  DCHECK(AreSameFormat(vt, vt2));
  DCHECK(AreConsecutive(vt, vt2));
  LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad2);
}

void Assembler::ld2r(const VRegister& vt, const VRegister& vt2,
                     const MemOperand& src) {
  USE(vt2);
  DCHECK(AreSameFormat(vt, vt2));
  DCHECK(AreConsecutive(vt, vt2));
  LoadStoreStructSingleAllLanes(vt, src, NEON_LD2R);
}

void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  DCHECK(AreSameFormat(vt, vt2, vt3));
  DCHECK(AreConsecutive(vt, vt2, vt3));
  LoadStoreStruct(vt, src, NEON_LD3);
}

void Assembler::ld3(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, int lane, const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  DCHECK(AreSameFormat(vt, vt2, vt3));
  DCHECK(AreConsecutive(vt, vt2, vt3));
  LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad3);
}

void Assembler::ld3r(const VRegister& vt, const VRegister& vt2,
                     const VRegister& vt3, const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  DCHECK(AreSameFormat(vt, vt2, vt3));
  DCHECK(AreConsecutive(vt, vt2, vt3));
  LoadStoreStructSingleAllLanes(vt, src, NEON_LD3R);
}

void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const VRegister& vt4,
                    const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  USE(vt4);
  DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
  DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
  LoadStoreStruct(vt, src, NEON_LD4);
}

void Assembler::ld4(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const VRegister& vt4, int lane,
                    const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  USE(vt4);
  DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
  DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
  LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad4);
}

void Assembler::ld4r(const VRegister& vt, const VRegister& vt2,
                     const VRegister& vt3, const VRegister& vt4,
                     const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  USE(vt4);
  DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
  DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
  LoadStoreStructSingleAllLanes(vt, src, NEON_LD4R);
}

void Assembler::st1(const VRegister& vt, const MemOperand& src) {
  LoadStoreStruct(vt, src, NEON_ST1_1v);
}

void Assembler::st1(const VRegister& vt, const VRegister& vt2,
                    const MemOperand& src) {
  USE(vt2);
  DCHECK(AreSameFormat(vt, vt2));
  DCHECK(AreConsecutive(vt, vt2));
  LoadStoreStruct(vt, src, NEON_ST1_2v);
}

void Assembler::st1(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  DCHECK(AreSameFormat(vt, vt2, vt3));
  DCHECK(AreConsecutive(vt, vt2, vt3));
  LoadStoreStruct(vt, src, NEON_ST1_3v);
}

void Assembler::st1(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const VRegister& vt4,
                    const MemOperand& src) {
  USE(vt2);
  USE(vt3);
  USE(vt4);
  DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
  DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
  LoadStoreStruct(vt, src, NEON_ST1_4v);
}

void Assembler::st2(const VRegister& vt, const VRegister& vt2,
                    const MemOperand& dst) {
  USE(vt2);
  DCHECK(AreSameFormat(vt, vt2));
  DCHECK(AreConsecutive(vt, vt2));
  LoadStoreStruct(vt, dst, NEON_ST2);
}

void Assembler::st2(const VRegister& vt, const VRegister& vt2, int lane,
                    const MemOperand& dst) {
  USE(vt2);
  DCHECK(AreSameFormat(vt, vt2));
  DCHECK(AreConsecutive(vt, vt2));
  LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore2);
}

void Assembler::st3(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const MemOperand& dst) {
  USE(vt2);
  USE(vt3);
  DCHECK(AreSameFormat(vt, vt2, vt3));
  DCHECK(AreConsecutive(vt, vt2, vt3));
  LoadStoreStruct(vt, dst, NEON_ST3);
}

void Assembler::st3(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, int lane, const MemOperand& dst) {
  USE(vt2);
  USE(vt3);
  DCHECK(AreSameFormat(vt, vt2, vt3));
  DCHECK(AreConsecutive(vt, vt2, vt3));
  LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore3);
}

void Assembler::st4(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const VRegister& vt4,
                    const MemOperand& dst) {
  USE(vt2);
  USE(vt3);
  USE(vt4);
  DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
  DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
  LoadStoreStruct(vt, dst, NEON_ST4);
}

void Assembler::st4(const VRegister& vt, const VRegister& vt2,
                    const VRegister& vt3, const VRegister& vt4, int lane,
                    const MemOperand& dst) {
  USE(vt2);
  USE(vt3);
  USE(vt4);
  DCHECK(AreSameFormat(vt, vt2, vt3, vt4));
  DCHECK(AreConsecutive(vt, vt2, vt3, vt4));
  LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore4);
}

void Assembler::LoadStoreStructSingle(const VRegister& vt, uint32_t lane,
                                      const MemOperand& addr,
                                      NEONLoadStoreSingleStructOp op) {
  LoadStoreStructVerify(vt, addr, op);

  // We support vt arguments of the form vt.VxT() or vt.T(), where x is the
  // number of lanes, and T is b, h, s or d.
  unsigned lane_size = vt.LaneSizeInBytes();
  DCHECK_LT(lane, kQRegSize / lane_size);

  // Lane size is encoded in the opcode field. Lane index is encoded in the Q,
  // S and size fields.
  lane *= lane_size;

  // Encodings for S[0]/D[0] and S[2]/D[1] are distinguished using the least-
  // significant bit of the size field, so we increment lane here to account for
  // that.
  if (lane_size == 8) lane++;

  Instr size = (lane << NEONLSSize_offset) & NEONLSSize_mask;
  Instr s = (lane << (NEONS_offset - 2)) & NEONS_mask;
  Instr q = (lane << (NEONQ_offset - 3)) & NEONQ_mask;

  Instr instr = op;
  switch (lane_size) {
    case 1:
      instr |= NEONLoadStoreSingle_b;
      break;
    case 2:
      instr |= NEONLoadStoreSingle_h;
      break;
    case 4:
      instr |= NEONLoadStoreSingle_s;
      break;
    default:
      DCHECK_EQ(lane_size, 8U);
      instr |= NEONLoadStoreSingle_d;
  }

  Emit(instr | LoadStoreStructAddrModeField(addr) | q | size | s | Rt(vt));
}

void Assembler::ld1(const VRegister& vt, int lane, const MemOperand& src) {
  LoadStoreStructSingle(vt, lane, src, NEONLoadStoreSingleStructLoad1);
}

void Assembler::ld1r(const VRegister& vt, const MemOperand& src) {
  LoadStoreStructSingleAllLanes(vt, src, NEON_LD1R);
}

void Assembler::st1(const VRegister& vt, int lane, const MemOperand& dst) {
  LoadStoreStructSingle(vt, lane, dst, NEONLoadStoreSingleStructStore1);
}

void Assembler::dmb(BarrierDomain domain, BarrierType type) {
  Emit(DMB | ImmBarrierDomain(domain) | ImmBarrierType(type));
}

void Assembler::dsb(BarrierDomain domain, BarrierType type) {
  Emit(DSB | ImmBarrierDomain(domain) | ImmBarrierType(type));
}

void Assembler::isb() {
  Emit(ISB | ImmBarrierDomain(FullSystem) | ImmBarrierType(BarrierAll));
}

2585 2586
void Assembler::csdb() { hint(CSDB); }

2587 2588 2589 2590 2591 2592 2593
void Assembler::fmov(const VRegister& vd, double imm) {
  if (vd.IsScalar()) {
    DCHECK(vd.Is1D());
    Emit(FMOV_d_imm | Rd(vd) | ImmFP(imm));
  } else {
    DCHECK(vd.Is2D());
    Instr op = NEONModifiedImmediate_MOVI | NEONModifiedImmediateOpBit;
2594
    Emit(NEON_Q | op | ImmNEONFP(imm) | NEONCmode(0xF) | Rd(vd));
2595 2596 2597 2598 2599 2600 2601 2602 2603 2604 2605
  }
}

void Assembler::fmov(const VRegister& vd, float imm) {
  if (vd.IsScalar()) {
    DCHECK(vd.Is1S());
    Emit(FMOV_s_imm | Rd(vd) | ImmFP(imm));
  } else {
    DCHECK(vd.Is2S() | vd.Is4S());
    Instr op = NEONModifiedImmediate_MOVI;
    Instr q = vd.Is4S() ? NEON_Q : 0;
2606
    Emit(q | op | ImmNEONFP(imm) | NEONCmode(0xF) | Rd(vd));
2607 2608 2609 2610 2611 2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763
  }
}

void Assembler::fmov(const Register& rd, const VRegister& fn) {
  DCHECK_EQ(rd.SizeInBits(), fn.SizeInBits());
  FPIntegerConvertOp op = rd.Is32Bits() ? FMOV_ws : FMOV_xd;
  Emit(op | Rd(rd) | Rn(fn));
}

void Assembler::fmov(const VRegister& vd, const Register& rn) {
  DCHECK_EQ(vd.SizeInBits(), rn.SizeInBits());
  FPIntegerConvertOp op = vd.Is32Bits() ? FMOV_sw : FMOV_dx;
  Emit(op | Rd(vd) | Rn(rn));
}

void Assembler::fmov(const VRegister& vd, const VRegister& vn) {
  DCHECK_EQ(vd.SizeInBits(), vn.SizeInBits());
  Emit(FPType(vd) | FMOV | Rd(vd) | Rn(vn));
}

void Assembler::fmov(const VRegister& vd, int index, const Register& rn) {
  DCHECK((index == 1) && vd.Is1D() && rn.IsX());
  USE(index);
  Emit(FMOV_d1_x | Rd(vd) | Rn(rn));
}

void Assembler::fmov(const Register& rd, const VRegister& vn, int index) {
  DCHECK((index == 1) && vn.Is1D() && rd.IsX());
  USE(index);
  Emit(FMOV_x_d1 | Rd(rd) | Rn(vn));
}

void Assembler::fmadd(const VRegister& fd, const VRegister& fn,
                      const VRegister& fm, const VRegister& fa) {
  FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMADD_s : FMADD_d);
}

void Assembler::fmsub(const VRegister& fd, const VRegister& fn,
                      const VRegister& fm, const VRegister& fa) {
  FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FMSUB_s : FMSUB_d);
}

void Assembler::fnmadd(const VRegister& fd, const VRegister& fn,
                       const VRegister& fm, const VRegister& fa) {
  FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMADD_s : FNMADD_d);
}

void Assembler::fnmsub(const VRegister& fd, const VRegister& fn,
                       const VRegister& fm, const VRegister& fa) {
  FPDataProcessing3Source(fd, fn, fm, fa, fd.Is32Bits() ? FNMSUB_s : FNMSUB_d);
}

void Assembler::fnmul(const VRegister& vd, const VRegister& vn,
                      const VRegister& vm) {
  DCHECK(AreSameSizeAndType(vd, vn, vm));
  Instr op = vd.Is1S() ? FNMUL_s : FNMUL_d;
  Emit(FPType(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
}

void Assembler::fcmp(const VRegister& fn, const VRegister& fm) {
  DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
  Emit(FPType(fn) | FCMP | Rm(fm) | Rn(fn));
}

void Assembler::fcmp(const VRegister& fn, double value) {
  USE(value);
  // Although the fcmp instruction can strictly only take an immediate value of
  // +0.0, we don't need to check for -0.0 because the sign of 0.0 doesn't
  // affect the result of the comparison.
  DCHECK_EQ(value, 0.0);
  Emit(FPType(fn) | FCMP_zero | Rn(fn));
}

void Assembler::fccmp(const VRegister& fn, const VRegister& fm,
                      StatusFlags nzcv, Condition cond) {
  DCHECK_EQ(fn.SizeInBits(), fm.SizeInBits());
  Emit(FPType(fn) | FCCMP | Rm(fm) | Cond(cond) | Rn(fn) | Nzcv(nzcv));
}

void Assembler::fcsel(const VRegister& fd, const VRegister& fn,
                      const VRegister& fm, Condition cond) {
  DCHECK_EQ(fd.SizeInBits(), fn.SizeInBits());
  DCHECK_EQ(fd.SizeInBits(), fm.SizeInBits());
  Emit(FPType(fd) | FCSEL | Rm(fm) | Cond(cond) | Rn(fn) | Rd(fd));
}

void Assembler::NEONFPConvertToInt(const Register& rd, const VRegister& vn,
                                   Instr op) {
  Emit(SF(rd) | FPType(vn) | op | Rn(vn) | Rd(rd));
}

void Assembler::NEONFPConvertToInt(const VRegister& vd, const VRegister& vn,
                                   Instr op) {
  if (vn.IsScalar()) {
    DCHECK((vd.Is1S() && vn.Is1S()) || (vd.Is1D() && vn.Is1D()));
    op |= NEON_Q | NEONScalar;
  }
  Emit(FPFormat(vn) | op | Rn(vn) | Rd(vd));
}

void Assembler::fcvt(const VRegister& vd, const VRegister& vn) {
  FPDataProcessing1SourceOp op;
  if (vd.Is1D()) {
    DCHECK(vn.Is1S() || vn.Is1H());
    op = vn.Is1S() ? FCVT_ds : FCVT_dh;
  } else if (vd.Is1S()) {
    DCHECK(vn.Is1D() || vn.Is1H());
    op = vn.Is1D() ? FCVT_sd : FCVT_sh;
  } else {
    DCHECK(vd.Is1H());
    DCHECK(vn.Is1D() || vn.Is1S());
    op = vn.Is1D() ? FCVT_hd : FCVT_hs;
  }
  FPDataProcessing1Source(vd, vn, op);
}

void Assembler::fcvtl(const VRegister& vd, const VRegister& vn) {
  DCHECK((vd.Is4S() && vn.Is4H()) || (vd.Is2D() && vn.Is2S()));
  Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
  Emit(format | NEON_FCVTL | Rn(vn) | Rd(vd));
}

void Assembler::fcvtl2(const VRegister& vd, const VRegister& vn) {
  DCHECK((vd.Is4S() && vn.Is8H()) || (vd.Is2D() && vn.Is4S()));
  Instr format = vd.Is2D() ? (1 << NEONSize_offset) : 0;
  Emit(NEON_Q | format | NEON_FCVTL | Rn(vn) | Rd(vd));
}

void Assembler::fcvtn(const VRegister& vd, const VRegister& vn) {
  DCHECK((vn.Is4S() && vd.Is4H()) || (vn.Is2D() && vd.Is2S()));
  Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
  Emit(format | NEON_FCVTN | Rn(vn) | Rd(vd));
}

void Assembler::fcvtn2(const VRegister& vd, const VRegister& vn) {
  DCHECK((vn.Is4S() && vd.Is8H()) || (vn.Is2D() && vd.Is4S()));
  Instr format = vn.Is2D() ? (1 << NEONSize_offset) : 0;
  Emit(NEON_Q | format | NEON_FCVTN | Rn(vn) | Rd(vd));
}

void Assembler::fcvtxn(const VRegister& vd, const VRegister& vn) {
  Instr format = 1 << NEONSize_offset;
  if (vd.IsScalar()) {
    DCHECK(vd.Is1S() && vn.Is1D());
    Emit(format | NEON_FCVTXN_scalar | Rn(vn) | Rd(vd));
  } else {
    DCHECK(vd.Is2S() && vn.Is2D());
    Emit(format | NEON_FCVTXN | Rn(vn) | Rd(vd));
  }
}

void Assembler::fcvtxn2(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.Is4S() && vn.Is2D());
  Instr format = 1 << NEONSize_offset;
  Emit(NEON_Q | format | NEON_FCVTXN | Rn(vn) | Rd(vd));
}

2764 2765 2766 2767 2768
void Assembler::fjcvtzs(const Register& rd, const VRegister& vn) {
  DCHECK(rd.IsW() && vn.Is1D());
  Emit(FJCVTZS | Rn(vn) | Rd(rd));
}

2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 2920 2921 2922 2923 2924 2925 2926 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026 3027 3028 3029 3030 3031 3032 3033 3034 3035 3036 3037 3038 3039 3040 3041 3042 3043 3044 3045 3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093
#define NEON_FP2REGMISC_FCVT_LIST(V) \
  V(fcvtnu, NEON_FCVTNU, FCVTNU)     \
  V(fcvtns, NEON_FCVTNS, FCVTNS)     \
  V(fcvtpu, NEON_FCVTPU, FCVTPU)     \
  V(fcvtps, NEON_FCVTPS, FCVTPS)     \
  V(fcvtmu, NEON_FCVTMU, FCVTMU)     \
  V(fcvtms, NEON_FCVTMS, FCVTMS)     \
  V(fcvtau, NEON_FCVTAU, FCVTAU)     \
  V(fcvtas, NEON_FCVTAS, FCVTAS)

#define DEFINE_ASM_FUNCS(FN, VEC_OP, SCA_OP)                     \
  void Assembler::FN(const Register& rd, const VRegister& vn) {  \
    NEONFPConvertToInt(rd, vn, SCA_OP);                          \
  }                                                              \
  void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
    NEONFPConvertToInt(vd, vn, VEC_OP);                          \
  }
NEON_FP2REGMISC_FCVT_LIST(DEFINE_ASM_FUNCS)
#undef DEFINE_ASM_FUNCS

void Assembler::scvtf(const VRegister& vd, const VRegister& vn, int fbits) {
  DCHECK_GE(fbits, 0);
  if (fbits == 0) {
    NEONFP2RegMisc(vd, vn, NEON_SCVTF);
  } else {
    DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
    NEONShiftRightImmediate(vd, vn, fbits, NEON_SCVTF_imm);
  }
}

void Assembler::ucvtf(const VRegister& vd, const VRegister& vn, int fbits) {
  DCHECK_GE(fbits, 0);
  if (fbits == 0) {
    NEONFP2RegMisc(vd, vn, NEON_UCVTF);
  } else {
    DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
    NEONShiftRightImmediate(vd, vn, fbits, NEON_UCVTF_imm);
  }
}

void Assembler::scvtf(const VRegister& vd, const Register& rn, int fbits) {
  DCHECK_GE(fbits, 0);
  if (fbits == 0) {
    Emit(SF(rn) | FPType(vd) | SCVTF | Rn(rn) | Rd(vd));
  } else {
    Emit(SF(rn) | FPType(vd) | SCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
         Rd(vd));
  }
}

void Assembler::ucvtf(const VRegister& fd, const Register& rn, int fbits) {
  DCHECK_GE(fbits, 0);
  if (fbits == 0) {
    Emit(SF(rn) | FPType(fd) | UCVTF | Rn(rn) | Rd(fd));
  } else {
    Emit(SF(rn) | FPType(fd) | UCVTF_fixed | FPScale(64 - fbits) | Rn(rn) |
         Rd(fd));
  }
}

void Assembler::NEON3Same(const VRegister& vd, const VRegister& vn,
                          const VRegister& vm, NEON3SameOp vop) {
  DCHECK(AreSameFormat(vd, vn, vm));
  DCHECK(vd.IsVector() || !vd.IsQ());

  Instr format, op = vop;
  if (vd.IsScalar()) {
    op |= NEON_Q | NEONScalar;
    format = SFormat(vd);
  } else {
    format = VFormat(vd);
  }

  Emit(format | op | Rm(vm) | Rn(vn) | Rd(vd));
}

void Assembler::NEONFP3Same(const VRegister& vd, const VRegister& vn,
                            const VRegister& vm, Instr op) {
  DCHECK(AreSameFormat(vd, vn, vm));
  Emit(FPFormat(vd) | op | Rm(vm) | Rn(vn) | Rd(vd));
}

#define NEON_FP2REGMISC_LIST(V)                 \
  V(fabs, NEON_FABS, FABS)                      \
  V(fneg, NEON_FNEG, FNEG)                      \
  V(fsqrt, NEON_FSQRT, FSQRT)                   \
  V(frintn, NEON_FRINTN, FRINTN)                \
  V(frinta, NEON_FRINTA, FRINTA)                \
  V(frintp, NEON_FRINTP, FRINTP)                \
  V(frintm, NEON_FRINTM, FRINTM)                \
  V(frintx, NEON_FRINTX, FRINTX)                \
  V(frintz, NEON_FRINTZ, FRINTZ)                \
  V(frinti, NEON_FRINTI, FRINTI)                \
  V(frsqrte, NEON_FRSQRTE, NEON_FRSQRTE_scalar) \
  V(frecpe, NEON_FRECPE, NEON_FRECPE_scalar)

#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP)                      \
  void Assembler::FN(const VRegister& vd, const VRegister& vn) { \
    Instr op;                                                    \
    if (vd.IsScalar()) {                                         \
      DCHECK(vd.Is1S() || vd.Is1D());                            \
      op = SCA_OP;                                               \
    } else {                                                     \
      DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S());               \
      op = VEC_OP;                                               \
    }                                                            \
    NEONFP2RegMisc(vd, vn, op);                                  \
  }
NEON_FP2REGMISC_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC

void Assembler::shll(const VRegister& vd, const VRegister& vn, int shift) {
  DCHECK((vd.Is8H() && vn.Is8B() && shift == 8) ||
         (vd.Is4S() && vn.Is4H() && shift == 16) ||
         (vd.Is2D() && vn.Is2S() && shift == 32));
  USE(shift);
  Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
}

void Assembler::shll2(const VRegister& vd, const VRegister& vn, int shift) {
  USE(shift);
  DCHECK((vd.Is8H() && vn.Is16B() && shift == 8) ||
         (vd.Is4S() && vn.Is8H() && shift == 16) ||
         (vd.Is2D() && vn.Is4S() && shift == 32));
  Emit(VFormat(vn) | NEON_SHLL | Rn(vn) | Rd(vd));
}

void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
                               NEON2RegMiscOp vop, double value) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK_EQ(value, 0.0);
  USE(value);

  Instr op = vop;
  if (vd.IsScalar()) {
    DCHECK(vd.Is1S() || vd.Is1D());
    op |= NEON_Q | NEONScalar;
  } else {
    DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S());
  }

  Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
}

void Assembler::fcmeq(const VRegister& vd, const VRegister& vn, double value) {
  NEONFP2RegMisc(vd, vn, NEON_FCMEQ_zero, value);
}

void Assembler::fcmge(const VRegister& vd, const VRegister& vn, double value) {
  NEONFP2RegMisc(vd, vn, NEON_FCMGE_zero, value);
}

void Assembler::fcmgt(const VRegister& vd, const VRegister& vn, double value) {
  NEONFP2RegMisc(vd, vn, NEON_FCMGT_zero, value);
}

void Assembler::fcmle(const VRegister& vd, const VRegister& vn, double value) {
  NEONFP2RegMisc(vd, vn, NEON_FCMLE_zero, value);
}

void Assembler::fcmlt(const VRegister& vd, const VRegister& vn, double value) {
  NEONFP2RegMisc(vd, vn, NEON_FCMLT_zero, value);
}

void Assembler::frecpx(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsScalar());
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(vd.Is1S() || vd.Is1D());
  Emit(FPFormat(vd) | NEON_FRECPX_scalar | Rn(vn) | Rd(vd));
}

void Assembler::fcvtzs(const Register& rd, const VRegister& vn, int fbits) {
  DCHECK(vn.Is1S() || vn.Is1D());
  DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
  if (fbits == 0) {
    Emit(SF(rd) | FPType(vn) | FCVTZS | Rn(vn) | Rd(rd));
  } else {
    Emit(SF(rd) | FPType(vn) | FCVTZS_fixed | FPScale(64 - fbits) | Rn(vn) |
         Rd(rd));
  }
}

void Assembler::fcvtzs(const VRegister& vd, const VRegister& vn, int fbits) {
  DCHECK_GE(fbits, 0);
  if (fbits == 0) {
    NEONFP2RegMisc(vd, vn, NEON_FCVTZS);
  } else {
    DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
    NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZS_imm);
  }
}

void Assembler::fcvtzu(const Register& rd, const VRegister& vn, int fbits) {
  DCHECK(vn.Is1S() || vn.Is1D());
  DCHECK((fbits >= 0) && (fbits <= rd.SizeInBits()));
  if (fbits == 0) {
    Emit(SF(rd) | FPType(vn) | FCVTZU | Rn(vn) | Rd(rd));
  } else {
    Emit(SF(rd) | FPType(vn) | FCVTZU_fixed | FPScale(64 - fbits) | Rn(vn) |
         Rd(rd));
  }
}

void Assembler::fcvtzu(const VRegister& vd, const VRegister& vn, int fbits) {
  DCHECK_GE(fbits, 0);
  if (fbits == 0) {
    NEONFP2RegMisc(vd, vn, NEON_FCVTZU);
  } else {
    DCHECK(vd.Is1D() || vd.Is1S() || vd.Is2D() || vd.Is2S() || vd.Is4S());
    NEONShiftRightImmediate(vd, vn, fbits, NEON_FCVTZU_imm);
  }
}

void Assembler::NEONFP2RegMisc(const VRegister& vd, const VRegister& vn,
                               Instr op) {
  DCHECK(AreSameFormat(vd, vn));
  Emit(FPFormat(vd) | op | Rn(vn) | Rd(vd));
}

void Assembler::NEON2RegMisc(const VRegister& vd, const VRegister& vn,
                             NEON2RegMiscOp vop, int value) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK_EQ(value, 0);
  USE(value);

  Instr format, op = vop;
  if (vd.IsScalar()) {
    op |= NEON_Q | NEONScalar;
    format = SFormat(vd);
  } else {
    format = VFormat(vd);
  }

  Emit(format | op | Rn(vn) | Rd(vd));
}

void Assembler::cmeq(const VRegister& vd, const VRegister& vn, int value) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEON2RegMisc(vd, vn, NEON_CMEQ_zero, value);
}

void Assembler::cmge(const VRegister& vd, const VRegister& vn, int value) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEON2RegMisc(vd, vn, NEON_CMGE_zero, value);
}

void Assembler::cmgt(const VRegister& vd, const VRegister& vn, int value) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEON2RegMisc(vd, vn, NEON_CMGT_zero, value);
}

void Assembler::cmle(const VRegister& vd, const VRegister& vn, int value) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEON2RegMisc(vd, vn, NEON_CMLE_zero, value);
}

void Assembler::cmlt(const VRegister& vd, const VRegister& vn, int value) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEON2RegMisc(vd, vn, NEON_CMLT_zero, value);
}

#define NEON_3SAME_LIST(V)                                         \
  V(add, NEON_ADD, vd.IsVector() || vd.Is1D())                     \
  V(addp, NEON_ADDP, vd.IsVector() || vd.Is1D())                   \
  V(sub, NEON_SUB, vd.IsVector() || vd.Is1D())                     \
  V(cmeq, NEON_CMEQ, vd.IsVector() || vd.Is1D())                   \
  V(cmge, NEON_CMGE, vd.IsVector() || vd.Is1D())                   \
  V(cmgt, NEON_CMGT, vd.IsVector() || vd.Is1D())                   \
  V(cmhi, NEON_CMHI, vd.IsVector() || vd.Is1D())                   \
  V(cmhs, NEON_CMHS, vd.IsVector() || vd.Is1D())                   \
  V(cmtst, NEON_CMTST, vd.IsVector() || vd.Is1D())                 \
  V(sshl, NEON_SSHL, vd.IsVector() || vd.Is1D())                   \
  V(ushl, NEON_USHL, vd.IsVector() || vd.Is1D())                   \
  V(srshl, NEON_SRSHL, vd.IsVector() || vd.Is1D())                 \
  V(urshl, NEON_URSHL, vd.IsVector() || vd.Is1D())                 \
  V(sqdmulh, NEON_SQDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS())   \
  V(sqrdmulh, NEON_SQRDMULH, vd.IsLaneSizeH() || vd.IsLaneSizeS()) \
  V(shadd, NEON_SHADD, vd.IsVector() && !vd.IsLaneSizeD())         \
  V(uhadd, NEON_UHADD, vd.IsVector() && !vd.IsLaneSizeD())         \
  V(srhadd, NEON_SRHADD, vd.IsVector() && !vd.IsLaneSizeD())       \
  V(urhadd, NEON_URHADD, vd.IsVector() && !vd.IsLaneSizeD())       \
  V(shsub, NEON_SHSUB, vd.IsVector() && !vd.IsLaneSizeD())         \
  V(uhsub, NEON_UHSUB, vd.IsVector() && !vd.IsLaneSizeD())         \
  V(smax, NEON_SMAX, vd.IsVector() && !vd.IsLaneSizeD())           \
  V(smaxp, NEON_SMAXP, vd.IsVector() && !vd.IsLaneSizeD())         \
  V(smin, NEON_SMIN, vd.IsVector() && !vd.IsLaneSizeD())           \
  V(sminp, NEON_SMINP, vd.IsVector() && !vd.IsLaneSizeD())         \
  V(umax, NEON_UMAX, vd.IsVector() && !vd.IsLaneSizeD())           \
  V(umaxp, NEON_UMAXP, vd.IsVector() && !vd.IsLaneSizeD())         \
  V(umin, NEON_UMIN, vd.IsVector() && !vd.IsLaneSizeD())           \
  V(uminp, NEON_UMINP, vd.IsVector() && !vd.IsLaneSizeD())         \
  V(saba, NEON_SABA, vd.IsVector() && !vd.IsLaneSizeD())           \
  V(sabd, NEON_SABD, vd.IsVector() && !vd.IsLaneSizeD())           \
  V(uaba, NEON_UABA, vd.IsVector() && !vd.IsLaneSizeD())           \
  V(uabd, NEON_UABD, vd.IsVector() && !vd.IsLaneSizeD())           \
  V(mla, NEON_MLA, vd.IsVector() && !vd.IsLaneSizeD())             \
  V(mls, NEON_MLS, vd.IsVector() && !vd.IsLaneSizeD())             \
  V(mul, NEON_MUL, vd.IsVector() && !vd.IsLaneSizeD())             \
  V(and_, NEON_AND, vd.Is8B() || vd.Is16B())                       \
  V(orr, NEON_ORR, vd.Is8B() || vd.Is16B())                        \
  V(orn, NEON_ORN, vd.Is8B() || vd.Is16B())                        \
  V(eor, NEON_EOR, vd.Is8B() || vd.Is16B())                        \
  V(bic, NEON_BIC, vd.Is8B() || vd.Is16B())                        \
  V(bit, NEON_BIT, vd.Is8B() || vd.Is16B())                        \
  V(bif, NEON_BIF, vd.Is8B() || vd.Is16B())                        \
  V(bsl, NEON_BSL, vd.Is8B() || vd.Is16B())                        \
  V(pmul, NEON_PMUL, vd.Is8B() || vd.Is16B())                      \
  V(uqadd, NEON_UQADD, true)                                       \
  V(sqadd, NEON_SQADD, true)                                       \
  V(uqsub, NEON_UQSUB, true)                                       \
  V(sqsub, NEON_SQSUB, true)                                       \
  V(sqshl, NEON_SQSHL, true)                                       \
  V(uqshl, NEON_UQSHL, true)                                       \
  V(sqrshl, NEON_SQRSHL, true)                                     \
  V(uqrshl, NEON_UQRSHL, true)

#define DEFINE_ASM_FUNC(FN, OP, AS)                            \
  void Assembler::FN(const VRegister& vd, const VRegister& vn, \
                     const VRegister& vm) {                    \
    DCHECK(AS);                                                \
    NEON3Same(vd, vn, vm, OP);                                 \
  }
NEON_3SAME_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC

3094
#define NEON_FP3SAME_LIST_V2(V)                 \
3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133
  V(fadd, NEON_FADD, FADD)                      \
  V(fsub, NEON_FSUB, FSUB)                      \
  V(fmul, NEON_FMUL, FMUL)                      \
  V(fdiv, NEON_FDIV, FDIV)                      \
  V(fmax, NEON_FMAX, FMAX)                      \
  V(fmaxnm, NEON_FMAXNM, FMAXNM)                \
  V(fmin, NEON_FMIN, FMIN)                      \
  V(fminnm, NEON_FMINNM, FMINNM)                \
  V(fmulx, NEON_FMULX, NEON_FMULX_scalar)       \
  V(frecps, NEON_FRECPS, NEON_FRECPS_scalar)    \
  V(frsqrts, NEON_FRSQRTS, NEON_FRSQRTS_scalar) \
  V(fabd, NEON_FABD, NEON_FABD_scalar)          \
  V(fmla, NEON_FMLA, 0)                         \
  V(fmls, NEON_FMLS, 0)                         \
  V(facge, NEON_FACGE, NEON_FACGE_scalar)       \
  V(facgt, NEON_FACGT, NEON_FACGT_scalar)       \
  V(fcmeq, NEON_FCMEQ, NEON_FCMEQ_scalar)       \
  V(fcmge, NEON_FCMGE, NEON_FCMGE_scalar)       \
  V(fcmgt, NEON_FCMGT, NEON_FCMGT_scalar)       \
  V(faddp, NEON_FADDP, 0)                       \
  V(fmaxp, NEON_FMAXP, 0)                       \
  V(fminp, NEON_FMINP, 0)                       \
  V(fmaxnmp, NEON_FMAXNMP, 0)                   \
  V(fminnmp, NEON_FMINNMP, 0)

#define DEFINE_ASM_FUNC(FN, VEC_OP, SCA_OP)                    \
  void Assembler::FN(const VRegister& vd, const VRegister& vn, \
                     const VRegister& vm) {                    \
    Instr op;                                                  \
    if ((SCA_OP != 0) && vd.IsScalar()) {                      \
      DCHECK(vd.Is1S() || vd.Is1D());                          \
      op = SCA_OP;                                             \
    } else {                                                   \
      DCHECK(vd.IsVector());                                   \
      DCHECK(vd.Is2S() || vd.Is2D() || vd.Is4S());             \
      op = VEC_OP;                                             \
    }                                                          \
    NEONFP3Same(vd, vn, vm, op);                               \
  }
3134
NEON_FP3SAME_LIST_V2(DEFINE_ASM_FUNC)
3135 3136 3137 3138 3139 3140 3141 3142 3143 3144 3145 3146 3147 3148 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 3161 3162 3163 3164 3165 3166 3167 3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186 3187 3188 3189 3190 3191
#undef DEFINE_ASM_FUNC

void Assembler::addp(const VRegister& vd, const VRegister& vn) {
  DCHECK((vd.Is1D() && vn.Is2D()));
  Emit(SFormat(vd) | NEON_ADDP_scalar | Rn(vn) | Rd(vd));
}

void Assembler::faddp(const VRegister& vd, const VRegister& vn) {
  DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
  Emit(FPFormat(vd) | NEON_FADDP_scalar | Rn(vn) | Rd(vd));
}

void Assembler::fmaxp(const VRegister& vd, const VRegister& vn) {
  DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
  Emit(FPFormat(vd) | NEON_FMAXP_scalar | Rn(vn) | Rd(vd));
}

void Assembler::fminp(const VRegister& vd, const VRegister& vn) {
  DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
  Emit(FPFormat(vd) | NEON_FMINP_scalar | Rn(vn) | Rd(vd));
}

void Assembler::fmaxnmp(const VRegister& vd, const VRegister& vn) {
  DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
  Emit(FPFormat(vd) | NEON_FMAXNMP_scalar | Rn(vn) | Rd(vd));
}

void Assembler::fminnmp(const VRegister& vd, const VRegister& vn) {
  DCHECK((vd.Is1S() && vn.Is2S()) || (vd.Is1D() && vn.Is2D()));
  Emit(FPFormat(vd) | NEON_FMINNMP_scalar | Rn(vn) | Rd(vd));
}

void Assembler::orr(const VRegister& vd, const int imm8, const int left_shift) {
  NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_ORR);
}

void Assembler::mov(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  if (vd.IsD()) {
    orr(vd.V8B(), vn.V8B(), vn.V8B());
  } else {
    DCHECK(vd.IsQ());
    orr(vd.V16B(), vn.V16B(), vn.V16B());
  }
}

void Assembler::bic(const VRegister& vd, const int imm8, const int left_shift) {
  NEONModifiedImmShiftLsl(vd, imm8, left_shift, NEONModifiedImmediate_BIC);
}

void Assembler::movi(const VRegister& vd, const uint64_t imm, Shift shift,
                     const int shift_amount) {
  DCHECK((shift == LSL) || (shift == MSL));
  if (vd.Is2D() || vd.Is1D()) {
    DCHECK_EQ(shift_amount, 0);
    int imm8 = 0;
    for (int i = 0; i < 8; ++i) {
3192 3193 3194
      int byte = (imm >> (i * 8)) & 0xFF;
      DCHECK((byte == 0) || (byte == 0xFF));
      if (byte == 0xFF) {
3195 3196 3197 3198 3199
        imm8 |= (1 << i);
      }
    }
    Instr q = vd.Is2D() ? NEON_Q : 0;
    Emit(q | NEONModImmOp(1) | NEONModifiedImmediate_MOVI |
3200
         ImmNEONabcdefgh(imm8) | NEONCmode(0xE) | Rd(vd));
3201 3202 3203 3204 3205 3206 3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 3231 3232 3233 3234 3235 3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258 3259 3260 3261 3262 3263 3264 3265 3266 3267 3268 3269 3270 3271 3272 3273 3274 3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293 3294 3295 3296 3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378 3379 3380 3381 3382 3383 3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422 3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490 3491 3492 3493 3494
  } else if (shift == LSL) {
    NEONModifiedImmShiftLsl(vd, static_cast<int>(imm), shift_amount,
                            NEONModifiedImmediate_MOVI);
  } else {
    NEONModifiedImmShiftMsl(vd, static_cast<int>(imm), shift_amount,
                            NEONModifiedImmediate_MOVI);
  }
}

void Assembler::mvn(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  if (vd.IsD()) {
    not_(vd.V8B(), vn.V8B());
  } else {
    DCHECK(vd.IsQ());
    not_(vd.V16B(), vn.V16B());
  }
}

void Assembler::mvni(const VRegister& vd, const int imm8, Shift shift,
                     const int shift_amount) {
  DCHECK((shift == LSL) || (shift == MSL));
  if (shift == LSL) {
    NEONModifiedImmShiftLsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
  } else {
    NEONModifiedImmShiftMsl(vd, imm8, shift_amount, NEONModifiedImmediate_MVNI);
  }
}

void Assembler::NEONFPByElement(const VRegister& vd, const VRegister& vn,
                                const VRegister& vm, int vm_index,
                                NEONByIndexedElementOp vop) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK((vd.Is2S() && vm.Is1S()) || (vd.Is4S() && vm.Is1S()) ||
         (vd.Is1S() && vm.Is1S()) || (vd.Is2D() && vm.Is1D()) ||
         (vd.Is1D() && vm.Is1D()));
  DCHECK((vm.Is1S() && (vm_index < 4)) || (vm.Is1D() && (vm_index < 2)));

  Instr op = vop;
  int index_num_bits = vm.Is1S() ? 2 : 1;
  if (vd.IsScalar()) {
    op |= NEON_Q | NEONScalar;
  }

  Emit(FPFormat(vd) | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) |
       Rn(vn) | Rd(vd));
}

void Assembler::NEONByElement(const VRegister& vd, const VRegister& vn,
                              const VRegister& vm, int vm_index,
                              NEONByIndexedElementOp vop) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK((vd.Is4H() && vm.Is1H()) || (vd.Is8H() && vm.Is1H()) ||
         (vd.Is1H() && vm.Is1H()) || (vd.Is2S() && vm.Is1S()) ||
         (vd.Is4S() && vm.Is1S()) || (vd.Is1S() && vm.Is1S()));
  DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
         (vm.Is1S() && (vm_index < 4)));

  Instr format, op = vop;
  int index_num_bits = vm.Is1H() ? 3 : 2;
  if (vd.IsScalar()) {
    op |= NEONScalar | NEON_Q;
    format = SFormat(vn);
  } else {
    format = VFormat(vn);
  }
  Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
       Rd(vd));
}

void Assembler::NEONByElementL(const VRegister& vd, const VRegister& vn,
                               const VRegister& vm, int vm_index,
                               NEONByIndexedElementOp vop) {
  DCHECK((vd.Is4S() && vn.Is4H() && vm.Is1H()) ||
         (vd.Is4S() && vn.Is8H() && vm.Is1H()) ||
         (vd.Is1S() && vn.Is1H() && vm.Is1H()) ||
         (vd.Is2D() && vn.Is2S() && vm.Is1S()) ||
         (vd.Is2D() && vn.Is4S() && vm.Is1S()) ||
         (vd.Is1D() && vn.Is1S() && vm.Is1S()));

  DCHECK((vm.Is1H() && (vm.code() < 16) && (vm_index < 8)) ||
         (vm.Is1S() && (vm_index < 4)));

  Instr format, op = vop;
  int index_num_bits = vm.Is1H() ? 3 : 2;
  if (vd.IsScalar()) {
    op |= NEONScalar | NEON_Q;
    format = SFormat(vn);
  } else {
    format = VFormat(vn);
  }
  Emit(format | op | ImmNEONHLM(vm_index, index_num_bits) | Rm(vm) | Rn(vn) |
       Rd(vd));
}

#define NEON_BYELEMENT_LIST(V)              \
  V(mul, NEON_MUL_byelement, vn.IsVector()) \
  V(mla, NEON_MLA_byelement, vn.IsVector()) \
  V(mls, NEON_MLS_byelement, vn.IsVector()) \
  V(sqdmulh, NEON_SQDMULH_byelement, true)  \
  V(sqrdmulh, NEON_SQRDMULH_byelement, true)

#define DEFINE_ASM_FUNC(FN, OP, AS)                            \
  void Assembler::FN(const VRegister& vd, const VRegister& vn, \
                     const VRegister& vm, int vm_index) {      \
    DCHECK(AS);                                                \
    NEONByElement(vd, vn, vm, vm_index, OP);                   \
  }
NEON_BYELEMENT_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC

#define NEON_FPBYELEMENT_LIST(V) \
  V(fmul, NEON_FMUL_byelement)   \
  V(fmla, NEON_FMLA_byelement)   \
  V(fmls, NEON_FMLS_byelement)   \
  V(fmulx, NEON_FMULX_byelement)

#define DEFINE_ASM_FUNC(FN, OP)                                \
  void Assembler::FN(const VRegister& vd, const VRegister& vn, \
                     const VRegister& vm, int vm_index) {      \
    NEONFPByElement(vd, vn, vm, vm_index, OP);                 \
  }
NEON_FPBYELEMENT_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC

#define NEON_BYELEMENT_LONG_LIST(V)                              \
  V(sqdmull, NEON_SQDMULL_byelement, vn.IsScalar() || vn.IsD())  \
  V(sqdmull2, NEON_SQDMULL_byelement, vn.IsVector() && vn.IsQ()) \
  V(sqdmlal, NEON_SQDMLAL_byelement, vn.IsScalar() || vn.IsD())  \
  V(sqdmlal2, NEON_SQDMLAL_byelement, vn.IsVector() && vn.IsQ()) \
  V(sqdmlsl, NEON_SQDMLSL_byelement, vn.IsScalar() || vn.IsD())  \
  V(sqdmlsl2, NEON_SQDMLSL_byelement, vn.IsVector() && vn.IsQ()) \
  V(smull, NEON_SMULL_byelement, vn.IsVector() && vn.IsD())      \
  V(smull2, NEON_SMULL_byelement, vn.IsVector() && vn.IsQ())     \
  V(umull, NEON_UMULL_byelement, vn.IsVector() && vn.IsD())      \
  V(umull2, NEON_UMULL_byelement, vn.IsVector() && vn.IsQ())     \
  V(smlal, NEON_SMLAL_byelement, vn.IsVector() && vn.IsD())      \
  V(smlal2, NEON_SMLAL_byelement, vn.IsVector() && vn.IsQ())     \
  V(umlal, NEON_UMLAL_byelement, vn.IsVector() && vn.IsD())      \
  V(umlal2, NEON_UMLAL_byelement, vn.IsVector() && vn.IsQ())     \
  V(smlsl, NEON_SMLSL_byelement, vn.IsVector() && vn.IsD())      \
  V(smlsl2, NEON_SMLSL_byelement, vn.IsVector() && vn.IsQ())     \
  V(umlsl, NEON_UMLSL_byelement, vn.IsVector() && vn.IsD())      \
  V(umlsl2, NEON_UMLSL_byelement, vn.IsVector() && vn.IsQ())

#define DEFINE_ASM_FUNC(FN, OP, AS)                            \
  void Assembler::FN(const VRegister& vd, const VRegister& vn, \
                     const VRegister& vm, int vm_index) {      \
    DCHECK(AS);                                                \
    NEONByElementL(vd, vn, vm, vm_index, OP);                  \
  }
NEON_BYELEMENT_LONG_LIST(DEFINE_ASM_FUNC)
#undef DEFINE_ASM_FUNC

void Assembler::suqadd(const VRegister& vd, const VRegister& vn) {
  NEON2RegMisc(vd, vn, NEON_SUQADD);
}

void Assembler::usqadd(const VRegister& vd, const VRegister& vn) {
  NEON2RegMisc(vd, vn, NEON_USQADD);
}

void Assembler::abs(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEON2RegMisc(vd, vn, NEON_ABS);
}

void Assembler::sqabs(const VRegister& vd, const VRegister& vn) {
  NEON2RegMisc(vd, vn, NEON_SQABS);
}

void Assembler::neg(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsVector() || vd.Is1D());
  NEON2RegMisc(vd, vn, NEON_NEG);
}

void Assembler::sqneg(const VRegister& vd, const VRegister& vn) {
  NEON2RegMisc(vd, vn, NEON_SQNEG);
}

void Assembler::NEONXtn(const VRegister& vd, const VRegister& vn,
                        NEON2RegMiscOp vop) {
  Instr format, op = vop;
  if (vd.IsScalar()) {
    DCHECK((vd.Is1B() && vn.Is1H()) || (vd.Is1H() && vn.Is1S()) ||
           (vd.Is1S() && vn.Is1D()));
    op |= NEON_Q | NEONScalar;
    format = SFormat(vd);
  } else {
    DCHECK((vd.Is8B() && vn.Is8H()) || (vd.Is4H() && vn.Is4S()) ||
           (vd.Is2S() && vn.Is2D()) || (vd.Is16B() && vn.Is8H()) ||
           (vd.Is8H() && vn.Is4S()) || (vd.Is4S() && vn.Is2D()));
    format = VFormat(vd);
  }
  Emit(format | op | Rn(vn) | Rd(vd));
}

void Assembler::xtn(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsVector() && vd.IsD());
  NEONXtn(vd, vn, NEON_XTN);
}

void Assembler::xtn2(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsVector() && vd.IsQ());
  NEONXtn(vd, vn, NEON_XTN);
}

void Assembler::sqxtn(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsScalar() || vd.IsD());
  NEONXtn(vd, vn, NEON_SQXTN);
}

void Assembler::sqxtn2(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsVector() && vd.IsQ());
  NEONXtn(vd, vn, NEON_SQXTN);
}

void Assembler::sqxtun(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsScalar() || vd.IsD());
  NEONXtn(vd, vn, NEON_SQXTUN);
}

void Assembler::sqxtun2(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsVector() && vd.IsQ());
  NEONXtn(vd, vn, NEON_SQXTUN);
}

void Assembler::uqxtn(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsScalar() || vd.IsD());
  NEONXtn(vd, vn, NEON_UQXTN);
}

void Assembler::uqxtn2(const VRegister& vd, const VRegister& vn) {
  DCHECK(vd.IsVector() && vd.IsQ());
  NEONXtn(vd, vn, NEON_UQXTN);
}

// NEON NOT and RBIT are distinguised by bit 22, the bottom bit of "size".
void Assembler::not_(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(vd.Is8B() || vd.Is16B());
  Emit(VFormat(vd) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
}

void Assembler::rbit(const VRegister& vd, const VRegister& vn) {
  DCHECK(AreSameFormat(vd, vn));
  DCHECK(vd.Is8B() || vd.Is16B());
  Emit(VFormat(vn) | (1 << NEONSize_offset) | NEON_RBIT_NOT | Rn(vn) | Rd(vd));
}

void Assembler::ext(const VRegister& vd, const VRegister& vn,
                    const VRegister& vm, int index) {
  DCHECK(AreSameFormat(vd, vn, vm));
  DCHECK(vd.Is8B() || vd.Is16B());
  DCHECK((0 <= index) && (index < vd.LaneCount()));
  Emit(VFormat(vd) | NEON_EXT | Rm(vm) | ImmNEONExt(index) | Rn(vn) | Rd(vd));
}

void Assembler::dup(const VRegister& vd, const VRegister& vn, int vn_index) {
  Instr q, scalar;

  // We support vn arguments of the form vn.VxT() or vn.T(), where x is the
  // number of lanes, and T is b, h, s or d.
  int lane_size = vn.LaneSizeInBytes();
  NEONFormatField format;
  switch (lane_size) {
    case 1:
      format = NEON_16B;
      break;
    case 2:
      format = NEON_8H;
      break;
    case 4:
      format = NEON_4S;
      break;
    default:
      DCHECK_EQ(lane_size, 8);
      format = NEON_2D;
      break;
  }

  if (vd.IsScalar()) {
    q = NEON_Q;
    scalar = NEONScalar;
  } else {
    DCHECK(!vd.Is1D());
    q = vd.IsD() ? 0 : NEON_Q;
    scalar = 0;
  }
  Emit(q | scalar | NEON_DUP_ELEMENT | ImmNEON5(format, vn_index) | Rn(vn) |
       Rd(vd));
}

void Assembler::dcptr(Label* label) {
3495
  BlockPoolsScope no_pool_inbetween(this);
3496 3497 3498 3499 3500 3501 3502 3503
  RecordRelocInfo(RelocInfo::INTERNAL_REFERENCE);
  if (label->is_bound()) {
    // The label is bound, so it does not need to be updated and the internal
    // reference should be emitted.
    //
    // In this case, label->pos() returns the offset of the label from the
    // start of the buffer.
    internal_reference_positions_.push_back(pc_offset());
3504
    dc64(reinterpret_cast<uintptr_t>(buffer_start_ + label->pos()));
3505 3506 3507 3508 3509 3510 3511 3512 3513
  } else {
    int32_t offset;
    if (label->is_linked()) {
      // The label is linked, so the internal reference should be added
      // onto the end of the label's link chain.
      //
      // In this case, label->pos() returns the offset of the last linked
      // instruction from the start of the buffer.
      offset = label->pos() - pc_offset();
3514
      DCHECK_NE(offset, kStartOfLabelLinkChain);
3515 3516 3517 3518 3519 3520 3521 3522 3523 3524 3525 3526 3527
    } else {
      // The label is unused, so it now becomes linked and the internal
      // reference is at the start of the new link chain.
      offset = kStartOfLabelLinkChain;
    }
    // The instruction at pc is now the last link in the label's chain.
    label->link_to(pc_offset());

    // Traditionally the offset to the previous instruction in the chain is
    // encoded in the instruction payload (e.g. branch range) but internal
    // references are not instructions so while unbound they are encoded as
    // two consecutive brk instructions. The two 16-bit immediates are used
    // to encode the offset.
3528
    offset >>= kInstrSizeLog2;
3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549
    DCHECK(is_int32(offset));
    uint32_t high16 = unsigned_bitextract_32(31, 16, offset);
    uint32_t low16 = unsigned_bitextract_32(15, 0, offset);

    brk(high16);
    brk(low16);
  }
}

// Below, a difference in case for the same letter indicates a
// negated bit. If b is 1, then B is 0.
uint32_t Assembler::FPToImm8(double imm) {
  DCHECK(IsImmFP64(imm));
  // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
  //       0000.0000.0000.0000.0000.0000.0000.0000
  uint64_t bits = bit_cast<uint64_t>(imm);
  // bit7: a000.0000
  uint64_t bit7 = ((bits >> 63) & 0x1) << 7;
  // bit6: 0b00.0000
  uint64_t bit6 = ((bits >> 61) & 0x1) << 6;
  // bit5_to_0: 00cd.efgh
3550
  uint64_t bit5_to_0 = (bits >> 48) & 0x3F;
3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567

  return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0);
}

Instr Assembler::ImmFP(double imm) { return FPToImm8(imm) << ImmFP_offset; }
Instr Assembler::ImmNEONFP(double imm) {
  return ImmNEONabcdefgh(FPToImm8(imm));
}

// Code generation helpers.
void Assembler::MoveWide(const Register& rd, uint64_t imm, int shift,
                         MoveWideImmediateOp mov_op) {
  // Ignore the top 32 bits of an immediate if we're moving to a W register.
  if (rd.Is32Bits()) {
    // Check that the top 32 bits are zero (a positive 32-bit number) or top
    // 33 bits are one (a negative 32-bit number, sign extended to 64 bits).
    DCHECK(((imm >> kWRegSizeInBits) == 0) ||
3568
           ((imm >> (kWRegSizeInBits - 1)) == 0x1FFFFFFFF));
3569 3570
    imm &= kWRegMask;
  }
3571

3572 3573
  if (shift >= 0) {
    // Explicit shift specified.
3574 3575
    DCHECK((shift == 0) || (shift == 16) || (shift == 32) || (shift == 48));
    DCHECK(rd.Is64Bits() || (shift == 0) || (shift == 16));
3576 3577 3578 3579 3580
    shift /= 16;
  } else {
    // Calculate a new immediate and shift combination to encode the immediate
    // argument.
    shift = 0;
3581
    if ((imm & ~0xFFFFULL) == 0) {
3582
      // Nothing to do.
3583
    } else if ((imm & ~(0xFFFFULL << 16)) == 0) {
3584 3585
      imm >>= 16;
      shift = 1;
3586
    } else if ((imm & ~(0xFFFFULL << 32)) == 0) {
3587
      DCHECK(rd.Is64Bits());
3588 3589
      imm >>= 32;
      shift = 2;
3590
    } else if ((imm & ~(0xFFFFULL << 48)) == 0) {
3591
      DCHECK(rd.Is64Bits());
3592 3593 3594 3595 3596
      imm >>= 48;
      shift = 3;
    }
  }

3597
  DCHECK(is_uint16(imm));
3598

3599 3600
  Emit(SF(rd) | MoveWideImmediateFixed | mov_op | Rd(rd) |
       ImmMoveWide(static_cast<int>(imm)) | ShiftMoveWide(shift));
3601 3602
}

3603 3604 3605
void Assembler::AddSub(const Register& rd, const Register& rn,
                       const Operand& operand, FlagsUpdate S, AddSubOp op) {
  DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
3606
  DCHECK(!operand.NeedsRelocation(this));
3607
  if (operand.IsImmediate()) {
3608
    int64_t immediate = operand.ImmediateValue();
3609
    DCHECK(IsImmAddSub(immediate));
3610 3611
    Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
    Emit(SF(rd) | AddSubImmediateFixed | op | Flags(S) |
3612
         ImmAddSub(static_cast<int>(immediate)) | dest_reg | RnSP(rn));
3613
  } else if (operand.IsShiftedRegister()) {
3614 3615
    DCHECK_EQ(operand.reg().SizeInBits(), rd.SizeInBits());
    DCHECK_NE(operand.shift(), ROR);
3616 3617 3618 3619 3620 3621 3622 3623 3624

    // For instructions of the form:
    //   add/sub   wsp, <Wn>, <Wm> [, LSL #0-3 ]
    //   add/sub   <Wd>, wsp, <Wm> [, LSL #0-3 ]
    //   add/sub   wsp, wsp, <Wm> [, LSL #0-3 ]
    //   adds/subs <Wd>, wsp, <Wm> [, LSL #0-3 ]
    // or their 64-bit register equivalents, convert the operand from shifted to
    // extended register mode, and emit an add/sub extended instruction.
    if (rn.IsSP() || rd.IsSP()) {
3625
      DCHECK(!(rd.IsSP() && (S == SetFlags)));
3626 3627 3628 3629 3630 3631
      DataProcExtendedRegister(rd, rn, operand.ToExtendedRegister(), S,
                               AddSubExtendedFixed | op);
    } else {
      DataProcShiftedRegister(rd, rn, operand, S, AddSubShiftedFixed | op);
    }
  } else {
3632
    DCHECK(operand.IsExtendedRegister());
3633 3634 3635 3636
    DataProcExtendedRegister(rd, rn, operand, S, AddSubExtendedFixed | op);
  }
}

3637 3638
void Assembler::AddSubWithCarry(const Register& rd, const Register& rn,
                                const Operand& operand, FlagsUpdate S,
3639
                                AddSubWithCarryOp op) {
3640 3641
  DCHECK_EQ(rd.SizeInBits(), rn.SizeInBits());
  DCHECK_EQ(rd.SizeInBits(), operand.reg().SizeInBits());
3642 3643
  DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
  DCHECK(!operand.NeedsRelocation(this));
3644 3645 3646 3647
  Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) | Rn(rn) | Rd(rd));
}

void Assembler::hlt(int code) {
3648
  DCHECK(is_uint16(code));
3649 3650 3651 3652
  Emit(HLT | ImmException(code));
}

void Assembler::brk(int code) {
3653
  DCHECK(is_uint16(code));
3654 3655 3656
  Emit(BRK | ImmException(code));
}

3657 3658
void Assembler::EmitStringData(const char* string) {
  size_t len = strlen(string) + 1;
3659
  DCHECK_LE(RoundUp(len, kInstrSize), static_cast<size_t>(kGap));
3660
  EmitData(string, static_cast<int>(len));
3661
  // Pad with nullptr characters until pc_ is aligned.
3662
  const char pad[] = {'\0', '\0', '\0', '\0'};
3663
  static_assert(sizeof(pad) == kInstrSize,
3664
                "Size of padding must match instruction size.");
3665
  EmitData(pad, RoundUp(pc_offset(), kInstrSize) - pc_offset());
3666 3667
}

3668
void Assembler::debug(const char* message, uint32_t code, Instr params) {
3669
  if (options().enable_simulator_code) {
3670
    // The arguments to the debug marker need to be contiguous in memory, so
3671 3672
    // make sure we don't try to emit pools.
    BlockPoolsScope scope(this);
3673 3674 3675 3676

    Label start;
    bind(&start);

3677
    // Refer to instructions-arm64.h for a description of the marker and its
3678 3679
    // arguments.
    hlt(kImmExceptionIsDebug);
3680
    DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugCodeOffset);
3681
    dc32(code);
3682
    DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugParamsOffset);
3683
    dc32(params);
3684
    DCHECK_EQ(SizeOfCodeGeneratedSince(&start), kDebugMessageOffset);
3685 3686 3687 3688
    EmitStringData(message);
    hlt(kImmExceptionIsUnreachable);

    return;
3689
  }
3690 3691

  if (params & BREAK) {
3692
    brk(0);
3693 3694 3695
  }
}

3696 3697
void Assembler::Logical(const Register& rd, const Register& rn,
                        const Operand& operand, LogicalOp op) {
3698 3699
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
  DCHECK(!operand.NeedsRelocation(this));
3700
  if (operand.IsImmediate()) {
3701
    int64_t immediate = operand.ImmediateValue();
3702 3703
    unsigned reg_size = rd.SizeInBits();

3704 3705
    DCHECK_NE(immediate, 0);
    DCHECK_NE(immediate, -1);
3706
    DCHECK(rd.Is64Bits() || is_uint32(immediate));
3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722

    // If the operation is NOT, invert the operation and immediate.
    if ((op & NOT) == NOT) {
      op = static_cast<LogicalOp>(op & ~NOT);
      immediate = rd.Is64Bits() ? ~immediate : (~immediate & kWRegMask);
    }

    unsigned n, imm_s, imm_r;
    if (IsImmLogical(immediate, reg_size, &n, &imm_s, &imm_r)) {
      // Immediate can be encoded in the instruction.
      LogicalImmediate(rd, rn, n, imm_s, imm_r, op);
    } else {
      // This case is handled in the macro assembler.
      UNREACHABLE();
    }
  } else {
3723 3724
    DCHECK(operand.IsShiftedRegister());
    DCHECK(operand.reg().SizeInBits() == rd.SizeInBits());
3725 3726 3727 3728 3729
    Instr dp_op = static_cast<Instr>(op | LogicalShiftedFixed);
    DataProcShiftedRegister(rd, rn, operand, LeaveFlags, dp_op);
  }
}

3730 3731
void Assembler::LogicalImmediate(const Register& rd, const Register& rn,
                                 unsigned n, unsigned imm_s, unsigned imm_r,
3732 3733 3734 3735 3736 3737 3738 3739
                                 LogicalOp op) {
  unsigned reg_size = rd.SizeInBits();
  Instr dest_reg = (op == ANDS) ? Rd(rd) : RdSP(rd);
  Emit(SF(rd) | LogicalImmediateFixed | op | BitN(n, reg_size) |
       ImmSetBits(imm_s, reg_size) | ImmRotate(imm_r, reg_size) | dest_reg |
       Rn(rn));
}

3740 3741
void Assembler::ConditionalCompare(const Register& rn, const Operand& operand,
                                   StatusFlags nzcv, Condition cond,
3742 3743
                                   ConditionalCompareOp op) {
  Instr ccmpop;
3744
  DCHECK(!operand.NeedsRelocation(this));
3745
  if (operand.IsImmediate()) {
3746
    int64_t immediate = operand.ImmediateValue();
3747
    DCHECK(IsImmConditionalCompare(immediate));
3748 3749
    ccmpop = ConditionalCompareImmediateFixed | op |
             ImmCondCmp(static_cast<unsigned>(immediate));
3750
  } else {
3751
    DCHECK(operand.IsShiftedRegister() && (operand.shift_amount() == 0));
3752 3753 3754 3755 3756
    ccmpop = ConditionalCompareRegisterFixed | op | Rm(operand.reg());
  }
  Emit(SF(rn) | ccmpop | Cond(cond) | Rn(rn) | Nzcv(nzcv));
}

3757
void Assembler::DataProcessing1Source(const Register& rd, const Register& rn,
3758
                                      DataProcessing1SourceOp op) {
3759
  DCHECK(rd.SizeInBits() == rn.SizeInBits());
3760 3761 3762
  Emit(SF(rn) | op | Rn(rn) | Rd(rd));
}

3763 3764
void Assembler::FPDataProcessing1Source(const VRegister& vd,
                                        const VRegister& vn,
3765
                                        FPDataProcessing1SourceOp op) {
3766
  Emit(FPType(vn) | op | Rn(vn) | Rd(vd));
3767 3768
}

3769 3770 3771
void Assembler::FPDataProcessing2Source(const VRegister& fd,
                                        const VRegister& fn,
                                        const VRegister& fm,
3772
                                        FPDataProcessing2SourceOp op) {
3773 3774
  DCHECK(fd.SizeInBits() == fn.SizeInBits());
  DCHECK(fd.SizeInBits() == fm.SizeInBits());
3775 3776 3777
  Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd));
}

3778 3779 3780 3781
void Assembler::FPDataProcessing3Source(const VRegister& fd,
                                        const VRegister& fn,
                                        const VRegister& fm,
                                        const VRegister& fa,
3782
                                        FPDataProcessing3SourceOp op) {
3783
  DCHECK(AreSameSizeAndType(fd, fn, fm, fa));
3784 3785 3786
  Emit(FPType(fd) | op | Rm(fm) | Rn(fn) | Rd(fd) | Ra(fa));
}

3787 3788 3789 3790 3791 3792 3793 3794 3795 3796 3797 3798 3799 3800 3801 3802 3803 3804 3805 3806 3807 3808 3809 3810 3811 3812 3813 3814 3815 3816 3817 3818 3819 3820 3821 3822 3823 3824 3825
void Assembler::NEONModifiedImmShiftLsl(const VRegister& vd, const int imm8,
                                        const int left_shift,
                                        NEONModifiedImmediateOp op) {
  DCHECK(vd.Is8B() || vd.Is16B() || vd.Is4H() || vd.Is8H() || vd.Is2S() ||
         vd.Is4S());
  DCHECK((left_shift == 0) || (left_shift == 8) || (left_shift == 16) ||
         (left_shift == 24));
  DCHECK(is_uint8(imm8));

  int cmode_1, cmode_2, cmode_3;
  if (vd.Is8B() || vd.Is16B()) {
    DCHECK_EQ(op, NEONModifiedImmediate_MOVI);
    cmode_1 = 1;
    cmode_2 = 1;
    cmode_3 = 1;
  } else {
    cmode_1 = (left_shift >> 3) & 1;
    cmode_2 = left_shift >> 4;
    cmode_3 = 0;
    if (vd.Is4H() || vd.Is8H()) {
      DCHECK((left_shift == 0) || (left_shift == 8));
      cmode_3 = 1;
    }
  }
  int cmode = (cmode_3 << 3) | (cmode_2 << 2) | (cmode_1 << 1);

  Instr q = vd.IsQ() ? NEON_Q : 0;

  Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
}

void Assembler::NEONModifiedImmShiftMsl(const VRegister& vd, const int imm8,
                                        const int shift_amount,
                                        NEONModifiedImmediateOp op) {
  DCHECK(vd.Is2S() || vd.Is4S());
  DCHECK((shift_amount == 8) || (shift_amount == 16));
  DCHECK(is_uint8(imm8));

  int cmode_0 = (shift_amount >> 4) & 1;
3826
  int cmode = 0xC | cmode_0;
3827 3828 3829 3830 3831

  Instr q = vd.IsQ() ? NEON_Q : 0;

  Emit(q | op | ImmNEONabcdefgh(imm8) | NEONCmode(cmode) | Rd(vd));
}
3832

3833
void Assembler::EmitShift(const Register& rd, const Register& rn, Shift shift,
3834 3835 3836 3837 3838 3839 3840 3841 3842 3843 3844 3845 3846 3847 3848 3849 3850 3851 3852
                          unsigned shift_amount) {
  switch (shift) {
    case LSL:
      lsl(rd, rn, shift_amount);
      break;
    case LSR:
      lsr(rd, rn, shift_amount);
      break;
    case ASR:
      asr(rd, rn, shift_amount);
      break;
    case ROR:
      ror(rd, rn, shift_amount);
      break;
    default:
      UNREACHABLE();
  }
}

3853 3854
void Assembler::EmitExtendShift(const Register& rd, const Register& rn,
                                Extend extend, unsigned left_shift) {
3855
  DCHECK(rd.SizeInBits() >= rn.SizeInBits());
3856 3857 3858 3859 3860 3861 3862 3863 3864 3865 3866 3867
  unsigned reg_size = rd.SizeInBits();
  // Use the correct size of register.
  Register rn_ = Register::Create(rn.code(), rd.SizeInBits());
  // Bits extracted are high_bit:0.
  unsigned high_bit = (8 << (extend & 0x3)) - 1;
  // Number of bits left in the result that are not introduced by the shift.
  unsigned non_shift_bits = (reg_size - left_shift) & (reg_size - 1);

  if ((non_shift_bits > high_bit) || (non_shift_bits == 0)) {
    switch (extend) {
      case UXTB:
      case UXTH:
3868 3869 3870
      case UXTW:
        ubfm(rd, rn_, non_shift_bits, high_bit);
        break;
3871 3872
      case SXTB:
      case SXTH:
3873 3874 3875
      case SXTW:
        sbfm(rd, rn_, non_shift_bits, high_bit);
        break;
3876 3877
      case UXTX:
      case SXTX: {
3878
        DCHECK_EQ(rn.SizeInBits(), kXRegSizeInBits);
3879 3880 3881 3882
        // Nothing to extend. Just shift.
        lsl(rd, rn_, left_shift);
        break;
      }
3883 3884
      default:
        UNREACHABLE();
3885 3886 3887 3888 3889 3890 3891
    }
  } else {
    // No need to extend as the extended bits would be shifted away.
    lsl(rd, rn_, left_shift);
  }
}

3892 3893
void Assembler::DataProcShiftedRegister(const Register& rd, const Register& rn,
                                        const Operand& operand, FlagsUpdate S,
3894
                                        Instr op) {
3895 3896 3897
  DCHECK(operand.IsShiftedRegister());
  DCHECK(rn.Is64Bits() || (rn.Is32Bits() && is_uint5(operand.shift_amount())));
  DCHECK(!operand.NeedsRelocation(this));
3898 3899 3900
  Emit(SF(rd) | op | Flags(S) | ShiftDP(operand.shift()) |
       ImmDPShift(operand.shift_amount()) | Rm(operand.reg()) | Rn(rn) |
       Rd(rd));
3901 3902
}

3903 3904
void Assembler::DataProcExtendedRegister(const Register& rd, const Register& rn,
                                         const Operand& operand, FlagsUpdate S,
3905
                                         Instr op) {
3906
  DCHECK(!operand.NeedsRelocation(this));
3907 3908 3909 3910 3911 3912 3913 3914
  Instr dest_reg = (S == SetFlags) ? Rd(rd) : RdSP(rd);
  Emit(SF(rd) | op | Flags(S) | Rm(operand.reg()) |
       ExtendMode(operand.extend()) | ImmExtendShift(operand.shift_amount()) |
       dest_reg | RnSP(rn));
}

bool Assembler::IsImmAddSub(int64_t immediate) {
  return is_uint12(immediate) ||
3915
         (is_uint12(immediate >> 12) && ((immediate & 0xFFF) == 0));
3916 3917
}

3918
void Assembler::LoadStore(const CPURegister& rt, const MemOperand& addr,
3919 3920 3921 3922
                          LoadStoreOp op) {
  Instr memop = op | Rt(rt) | RnSP(addr.base());

  if (addr.IsImmediateOffset()) {
3923
    unsigned size = CalcLSDataSize(op);
3924 3925
    if (IsImmLSScaled(addr.offset(), size)) {
      int offset = static_cast<int>(addr.offset());
3926 3927 3928
      // Use the scaled addressing mode.
      Emit(LoadStoreUnsignedOffsetFixed | memop |
           ImmLSUnsigned(offset >> size));
3929 3930
    } else if (IsImmLSUnscaled(addr.offset())) {
      int offset = static_cast<int>(addr.offset());
3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948
      // Use the unscaled addressing mode.
      Emit(LoadStoreUnscaledOffsetFixed | memop | ImmLS(offset));
    } else {
      // This case is handled in the macro assembler.
      UNREACHABLE();
    }
  } else if (addr.IsRegisterOffset()) {
    Extend ext = addr.extend();
    Shift shift = addr.shift();
    unsigned shift_amount = addr.shift_amount();

    // LSL is encoded in the option field as UXTX.
    if (shift == LSL) {
      ext = UXTX;
    }

    // Shifts are encoded in one bit, indicating a left shift by the memory
    // access size.
3949
    DCHECK((shift_amount == 0) ||
3950 3951 3952 3953 3954
           (shift_amount == static_cast<unsigned>(CalcLSDataSize(op))));
    Emit(LoadStoreRegisterOffsetFixed | memop | Rm(addr.regoffset()) |
         ExtendMode(ext) | ImmShiftLS((shift_amount > 0) ? 1 : 0));
  } else {
    // Pre-index and post-index modes.
3955
    DCHECK_NE(rt, addr.base());
3956 3957
    if (IsImmLSUnscaled(addr.offset())) {
      int offset = static_cast<int>(addr.offset());
3958 3959 3960
      if (addr.IsPreIndex()) {
        Emit(LoadStorePreIndexFixed | memop | ImmLS(offset));
      } else {
3961
        DCHECK(addr.IsPostIndex());
3962 3963 3964 3965 3966 3967 3968 3969 3970
        Emit(LoadStorePostIndexFixed | memop | ImmLS(offset));
      }
    } else {
      // This case is handled in the macro assembler.
      UNREACHABLE();
    }
  }
}

3971
bool Assembler::IsImmLSUnscaled(int64_t offset) { return is_int9(offset); }
3972

3973
bool Assembler::IsImmLSScaled(int64_t offset, unsigned size) {
3974 3975 3976
  bool offset_is_size_multiple =
      (static_cast<int64_t>(static_cast<uint64_t>(offset >> size) << size) ==
       offset);
3977 3978 3979
  return offset_is_size_multiple && is_uint12(offset >> size);
}

3980
bool Assembler::IsImmLSPair(int64_t offset, unsigned size) {
3981 3982 3983
  bool offset_is_size_multiple =
      (static_cast<int64_t>(static_cast<uint64_t>(offset >> size) << size) ==
       offset);
3984 3985 3986
  return offset_is_size_multiple && is_int7(offset >> size);
}

3987
bool Assembler::IsImmLLiteral(int64_t offset) {
3988
  int inst_size = static_cast<int>(kInstrSizeLog2);
3989
  bool offset_is_inst_multiple =
3990 3991
      (static_cast<int64_t>(static_cast<uint64_t>(offset >> inst_size)
                            << inst_size) == offset);
3992 3993
  DCHECK_GT(offset, 0);
  offset >>= kLoadLiteralScaleLog2;
3994 3995 3996
  return offset_is_inst_multiple && is_intn(offset, ImmLLiteral_width);
}

3997 3998 3999 4000 4001 4002 4003
// Test if a given value can be encoded in the immediate field of a logical
// instruction.
// If it can be encoded, the function returns true, and values pointed to by n,
// imm_s and imm_r are updated with immediates encoded in the format required
// by the corresponding fields in the logical instruction.
// If it can not be encoded, the function returns false, and the values pointed
// to by n, imm_s and imm_r are undefined.
4004 4005
bool Assembler::IsImmLogical(uint64_t value, unsigned width, unsigned* n,
                             unsigned* imm_s, unsigned* imm_r) {
4006
  DCHECK((n != nullptr) && (imm_s != nullptr) && (imm_r != nullptr));
4007
  DCHECK((width == kWRegSizeInBits) || (width == kXRegSizeInBits));
4008

4009 4010
  bool negate = false;

4011 4012 4013
  // Logical immediates are encoded using parameters n, imm_s and imm_r using
  // the following table:
  //
4014 4015 4016 4017 4018 4019 4020
  //    N   imms    immr    size        S             R
  //    1  ssssss  rrrrrr    64    UInt(ssssss)  UInt(rrrrrr)
  //    0  0sssss  xrrrrr    32    UInt(sssss)   UInt(rrrrr)
  //    0  10ssss  xxrrrr    16    UInt(ssss)    UInt(rrrr)
  //    0  110sss  xxxrrr     8    UInt(sss)     UInt(rrr)
  //    0  1110ss  xxxxrr     4    UInt(ss)      UInt(rr)
  //    0  11110s  xxxxxr     2    UInt(s)       UInt(r)
4021 4022
  // (s bits must not be all set)
  //
4023 4024 4025
  // A pattern is constructed of size bits, where the least significant S+1 bits
  // are set. The pattern is rotated right by R, and repeated across a 32 or
  // 64-bit value, depending on destination register width.
4026
  //
4027 4028 4029 4030 4031 4032 4033
  // Put another way: the basic format of a logical immediate is a single
  // contiguous stretch of 1 bits, repeated across the whole word at intervals
  // given by a power of 2. To identify them quickly, we first locate the
  // lowest stretch of 1 bits, then the next 1 bit above that; that combination
  // is different for every logical immediate, so it gives us all the
  // information we need to identify the only logical immediate that our input
  // could be, and then we simply check if that's the value we actually have.
4034
  //
4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045
  // (The rotation parameter does give the possibility of the stretch of 1 bits
  // going 'round the end' of the word. To deal with that, we observe that in
  // any situation where that happens the bitwise NOT of the value is also a
  // valid logical immediate. So we simply invert the input whenever its low bit
  // is set, and then we know that the rotated case can't arise.)

  if (value & 1) {
    // If the low bit is 1, negate the value, and set a flag to remember that we
    // did (so that we can adjust the return values appropriately).
    negate = true;
    value = ~value;
4046
  }
4047

4048 4049 4050 4051 4052
  if (width == kWRegSizeInBits) {
    // To handle 32-bit logical immediates, the very easiest thing is to repeat
    // the input value twice to make a 64-bit word. The correct encoding of that
    // as a logical immediate will also be the correct encoding of the 32-bit
    // value.
4053

4054 4055 4056 4057 4058
    // The most-significant 32 bits may not be zero (ie. negate is true) so
    // shift the value left before duplicating it.
    value <<= kWRegSizeInBits;
    value |= value >> kWRegSizeInBits;
  }
4059

4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 4076 4077 4078 4079 4080 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093
  // The basic analysis idea: imagine our input word looks like this.
  //
  //    0011111000111110001111100011111000111110001111100011111000111110
  //                                                          c  b    a
  //                                                          |<--d-->|
  //
  // We find the lowest set bit (as an actual power-of-2 value, not its index)
  // and call it a. Then we add a to our original number, which wipes out the
  // bottommost stretch of set bits and replaces it with a 1 carried into the
  // next zero bit. Then we look for the new lowest set bit, which is in
  // position b, and subtract it, so now our number is just like the original
  // but with the lowest stretch of set bits completely gone. Now we find the
  // lowest set bit again, which is position c in the diagram above. Then we'll
  // measure the distance d between bit positions a and c (using CLZ), and that
  // tells us that the only valid logical immediate that could possibly be equal
  // to this number is the one in which a stretch of bits running from a to just
  // below b is replicated every d bits.
  uint64_t a = LargestPowerOf2Divisor(value);
  uint64_t value_plus_a = value + a;
  uint64_t b = LargestPowerOf2Divisor(value_plus_a);
  uint64_t value_plus_a_minus_b = value_plus_a - b;
  uint64_t c = LargestPowerOf2Divisor(value_plus_a_minus_b);

  int d, clz_a, out_n;
  uint64_t mask;

  if (c != 0) {
    // The general case, in which there is more than one stretch of set bits.
    // Compute the repeat distance d, and set up a bitmask covering the basic
    // unit of repetition (i.e. a word with the bottom d bits set). Also, in all
    // of these cases the N bit of the output will be zero.
    clz_a = CountLeadingZeros(a, kXRegSizeInBits);
    int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
    d = clz_a - clz_c;
4094
    mask = ((uint64_t{1} << d) - 1);
4095 4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106 4107 4108 4109 4110 4111 4112 4113 4114
    out_n = 0;
  } else {
    // Handle degenerate cases.
    //
    // If any of those 'find lowest set bit' operations didn't find a set bit at
    // all, then the word will have been zero thereafter, so in particular the
    // last lowest_set_bit operation will have returned zero. So we can test for
    // all the special case conditions in one go by seeing if c is zero.
    if (a == 0) {
      // The input was zero (or all 1 bits, which will come to here too after we
      // inverted it at the start of the function), for which we just return
      // false.
      return false;
    } else {
      // Otherwise, if c was zero but a was not, then there's just one stretch
      // of set bits in our word, meaning that we have the trivial case of
      // d == 64 and only one 'repetition'. Set up all the same variables as in
      // the general case above, and set the N bit in the output.
      clz_a = CountLeadingZeros(a, kXRegSizeInBits);
      d = 64;
4115
      mask = ~uint64_t{0};
4116
      out_n = 1;
4117
    }
4118
  }
4119

4120
  // If the repeat period d is not a power of two, it can't be encoded.
4121
  if (!base::bits::IsPowerOfTwo(d)) {
4122 4123
    return false;
  }
4124

4125 4126 4127 4128 4129
  if (((b - a) & ~mask) != 0) {
    // If the bit stretch (b - a) does not fit within the mask derived from the
    // repeat period, then fail.
    return false;
  }
4130

4131 4132 4133 4134 4135 4136 4137 4138
  // The only possible option is b - a repeated every d bits. Now we're going to
  // actually construct the valid logical immediate derived from that
  // specification, and see if it equals our original input.
  //
  // To repeat a value every d bits, we multiply it by a number of the form
  // (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
  // be derived using a table lookup on CLZ(d).
  static const uint64_t multipliers[] = {
4139 4140
      0x0000000000000001UL, 0x0000000100000001UL, 0x0001000100010001UL,
      0x0101010101010101UL, 0x1111111111111111UL, 0x5555555555555555UL,
4141 4142 4143
  };
  int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
  // Ensure that the index to the multipliers array is within bounds.
4144
  DCHECK((multiplier_idx >= 0) &&
4145
         (static_cast<size_t>(multiplier_idx) < arraysize(multipliers)));
4146 4147 4148 4149 4150
  uint64_t multiplier = multipliers[multiplier_idx];
  uint64_t candidate = (b - a) * multiplier;

  if (value != candidate) {
    // The candidate pattern doesn't match our input value, so fail.
4151
    return false;
4152
  }
4153 4154 4155 4156 4157 4158 4159

  // We have a match! This is a valid logical immediate, so now we have to
  // construct the bits and pieces of the instruction encoding that generates
  // it.

  // Count the set bits in our basic stretch. The special case of clz(0) == -1
  // makes the answer come out right for stretches that reach the very top of
4160
  // the word (e.g. numbers like 0xFFFFC00000000000).
4161 4162 4163 4164 4165 4166 4167 4168 4169 4170 4171 4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187 4188 4189
  int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
  int s = clz_a - clz_b;

  // Decide how many bits to rotate right by, to put the low bit of that basic
  // stretch in position a.
  int r;
  if (negate) {
    // If we inverted the input right at the start of this function, here's
    // where we compensate: the number of set bits becomes the number of clear
    // bits, and the rotation count is based on position b rather than position
    // a (since b is the location of the 'lowest' 1 bit after inversion).
    s = d - s;
    r = (clz_b + 1) & (d - 1);
  } else {
    r = (clz_a + 1) & (d - 1);
  }

  // Now we're done, except for having to encode the S output in such a way that
  // it gives both the number of set bits and the length of the repeated
  // segment. The s field is encoded like this:
  //
  //     imms    size        S
  //    ssssss    64    UInt(ssssss)
  //    0sssss    32    UInt(sssss)
  //    10ssss    16    UInt(ssss)
  //    110sss     8    UInt(sss)
  //    1110ss     4    UInt(ss)
  //    11110s     2    UInt(s)
  //
4190
  // So we 'or' (-d * 2) with our computed s to form imms.
4191
  *n = out_n;
4192
  *imm_s = ((-d * 2) | (s - 1)) & 0x3F;
4193 4194 4195
  *imm_r = r;

  return true;
4196 4197 4198 4199 4200 4201 4202 4203 4204
}

bool Assembler::IsImmConditionalCompare(int64_t immediate) {
  return is_uint5(immediate);
}

bool Assembler::IsImmFP32(float imm) {
  // Valid values will have the form:
  // aBbb.bbbc.defg.h000.0000.0000.0000.0000
4205
  uint32_t bits = bit_cast<uint32_t>(imm);
4206
  // bits[19..0] are cleared.
4207
  if ((bits & 0x7FFFF) != 0) {
4208 4209 4210 4211
    return false;
  }

  // bits[29..25] are all set or all cleared.
4212 4213
  uint32_t b_pattern = (bits >> 16) & 0x3E00;
  if (b_pattern != 0 && b_pattern != 0x3E00) {
4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228
    return false;
  }

  // bit[30] and bit[29] are opposite.
  if (((bits ^ (bits << 1)) & 0x40000000) == 0) {
    return false;
  }

  return true;
}

bool Assembler::IsImmFP64(double imm) {
  // Valid values will have the form:
  // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
  // 0000.0000.0000.0000.0000.0000.0000.0000
4229
  uint64_t bits = bit_cast<uint64_t>(imm);
4230
  // bits[47..0] are cleared.
4231
  if ((bits & 0xFFFFFFFFFFFFL) != 0) {
4232 4233 4234 4235
    return false;
  }

  // bits[61..54] are all set or all cleared.
4236 4237
  uint32_t b_pattern = (bits >> 48) & 0x3FC0;
  if (b_pattern != 0 && b_pattern != 0x3FC0) {
4238 4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250
    return false;
  }

  // bit[62] and bit[61] are opposite.
  if (((bits ^ (bits << 1)) & 0x4000000000000000L) == 0) {
    return false;
  }

  return true;
}

void Assembler::GrowBuffer() {
  // Compute new buffer size.
4251 4252
  int old_size = buffer_->size();
  int new_size = std::min(2 * old_size, old_size + 1 * MB);
4253 4254 4255

  // Some internal data structures overflow for very large buffers,
  // they must ensure that kMaximalBufferSize is not too large.
4256
  if (new_size > kMaximalBufferSize) {
4257
    V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
4258
  }
4259 4260

  // Set up new buffer.
4261 4262 4263
  std::unique_ptr<AssemblerBuffer> new_buffer = buffer_->Grow(new_size);
  DCHECK_EQ(new_size, new_buffer->size());
  byte* new_start = new_buffer->start();
4264 4265

  // Copy the data.
4266 4267 4268 4269 4270 4271
  intptr_t pc_delta = new_start - buffer_start_;
  intptr_t rc_delta = (new_start + new_size) - (buffer_start_ + old_size);
  size_t reloc_size = (buffer_start_ + old_size) - reloc_info_writer.pos();
  memmove(new_start, buffer_start_, pc_offset());
  memmove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(),
          reloc_size);
4272 4273

  // Switch buffers.
4274 4275 4276
  buffer_ = std::move(new_buffer);
  buffer_start_ = new_start;
  pc_ += pc_delta;
4277 4278 4279 4280 4281 4282 4283
  reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta,
                               reloc_info_writer.last_pc() + pc_delta);

  // None of our relocation types are pc relative pointing outside the code
  // buffer nor pc absolute pointing inside the code buffer, so there is no need
  // to relocate any emitted relocation entries.

4284 4285
  // Relocate internal references.
  for (auto pos : internal_reference_positions_) {
4286 4287 4288 4289
    Address address = reinterpret_cast<intptr_t>(buffer_start_) + pos;
    intptr_t internal_ref = ReadUnalignedValue<intptr_t>(address);
    internal_ref += pc_delta;
    WriteUnalignedValue<intptr_t>(address, internal_ref);
4290 4291
  }

4292
  // Pending relocation entries are also relative, no need to relocate.
4293 4294
}

4295 4296
void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data,
                                ConstantPoolMode constant_pool_mode) {
4297
  if ((rmode == RelocInfo::INTERNAL_REFERENCE) ||
4298
      (rmode == RelocInfo::CONST_POOL) || (rmode == RelocInfo::VENEER_POOL) ||
4299 4300
      (rmode == RelocInfo::DEOPT_SCRIPT_OFFSET) ||
      (rmode == RelocInfo::DEOPT_INLINING_ID) ||
4301
      (rmode == RelocInfo::DEOPT_REASON) || (rmode == RelocInfo::DEOPT_ID)) {
4302
    // Adjust code for new modes.
4303 4304
    DCHECK(RelocInfo::IsDeoptReason(rmode) || RelocInfo::IsDeoptId(rmode) ||
           RelocInfo::IsDeoptPosition(rmode) ||
4305
           RelocInfo::IsInternalReference(rmode) ||
4306
           RelocInfo::IsConstPool(rmode) || RelocInfo::IsVeneerPool(rmode));
4307
    // These modes do not need an entry in the constant pool.
4308
  } else if (constant_pool_mode == NEEDS_POOL_ENTRY) {
4309 4310 4311 4312
    if (RelocInfo::IsEmbeddedObjectMode(rmode)) {
      Handle<HeapObject> handle(reinterpret_cast<Address*>(data));
      data = AddEmbeddedObject(handle);
    }
4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323
    if (rmode == RelocInfo::COMPRESSED_EMBEDDED_OBJECT) {
      if (constpool_.RecordEntry(static_cast<uint32_t>(data), rmode) ==
          RelocInfoStatus::kMustOmitForDuplicate) {
        return;
      }
    } else {
      if (constpool_.RecordEntry(static_cast<uint64_t>(data), rmode) ==
          RelocInfoStatus::kMustOmitForDuplicate) {
        return;
      }
    }
4324
  }
4325 4326
  // For modes that cannot use the constant pool, a different sequence of
  // instructions will be emitted by this function's caller.
4327

4328 4329
  if (!ShouldRecordRelocInfo(rmode)) return;

4330 4331
  // Callers should ensure that constant pool emission is blocked until the
  // instruction the reloc info is associated with has been emitted.
4332
  DCHECK(constpool_.IsBlocked());
4333

4334
  // We do not try to reuse pool constants.
4335
  RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, Code());
4336 4337 4338

  DCHECK_GE(buffer_space(), kMaxRelocSize);  // too late to grow buffer here
  reloc_info_writer.Write(&rinfo);
4339 4340
}

4341
void Assembler::near_jump(int offset, RelocInfo::Mode rmode) {
4342
  BlockPoolsScope no_pool_before_b_instr(this);
4343 4344 4345 4346 4347
  if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode, offset, NO_POOL_ENTRY);
  b(offset);
}

void Assembler::near_call(int offset, RelocInfo::Mode rmode) {
4348
  BlockPoolsScope no_pool_before_bl_instr(this);
4349 4350 4351 4352 4353
  if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode, offset, NO_POOL_ENTRY);
  bl(offset);
}

void Assembler::near_call(HeapObjectRequest request) {
4354
  BlockPoolsScope no_pool_before_bl_instr(this);
4355
  RequestHeapObject(request);
4356
  EmbeddedObjectIndex index = AddEmbeddedObject(Handle<Code>());
4357
  RecordRelocInfo(RelocInfo::CODE_TARGET, index, NO_POOL_ENTRY);
4358 4359
  DCHECK(is_int32(index));
  bl(static_cast<int>(index));
4360
}
4361

4362
// Constant Pool
4363

4364 4365 4366 4367 4368 4369 4370 4371 4372 4373 4374
void ConstantPool::EmitPrologue(Alignment require_alignment) {
  // Recorded constant pool size is expressed in number of 32-bits words,
  // and includes prologue and alignment, but not the jump around the pool
  // and the size of the marker itself.
  const int marker_size = 1;
  int word_count =
      ComputeSize(Jump::kOmitted, require_alignment) / kInt32Size - marker_size;
  assm_->Emit(LDR_x_lit | Assembler::ImmLLiteral(word_count) |
              Assembler::Rt(xzr));
  assm_->EmitPoolGuard();
}
4375

4376 4377 4378 4379 4380 4381 4382 4383 4384
int ConstantPool::PrologueSize(Jump require_jump) const {
  // Prologue is:
  //   b   over  ;; if require_jump
  //   ldr xzr, #pool_size
  //   blr xzr
  int prologue_size = require_jump == Jump::kRequired ? kInstrSize : 0;
  prologue_size += 2 * kInstrSize;
  return prologue_size;
}
4385

4386 4387 4388 4389 4390 4391 4392 4393
void ConstantPool::SetLoadOffsetToConstPoolEntry(int load_offset,
                                                 Instruction* entry_offset,
                                                 const ConstantPoolKey& key) {
  Instruction* instr = assm_->InstructionAt(load_offset);
  // Instruction to patch must be 'ldr rd, [pc, #offset]' with offset == 0.
  DCHECK(instr->IsLdrLiteral() && instr->ImmLLiteral() == 0);
  instr->SetImmPCOffsetTarget(assm_->options(), entry_offset);
}
4394

4395 4396 4397 4398 4399 4400 4401 4402
void ConstantPool::Check(Emission force_emit, Jump require_jump,
                         size_t margin) {
  // Some short sequence of instruction must not be broken up by constant pool
  // emission, such sequences are protected by a ConstPool::BlockScope.
  if (IsBlocked()) {
    // Something is wrong if emission is forced and blocked at the same time.
    DCHECK_EQ(force_emit, Emission::kIfNeeded);
    return;
4403
  }
4404

4405 4406 4407 4408 4409 4410 4411 4412 4413 4414 4415 4416 4417 4418 4419 4420 4421 4422 4423
  // We emit a constant pool only if :
  //  * it is not empty
  //  * emission is forced by parameter force_emit (e.g. at function end).
  //  * emission is mandatory or opportune according to {ShouldEmitNow}.
  if (!IsEmpty() && (force_emit == Emission::kForced ||
                     ShouldEmitNow(require_jump, margin))) {
    // Emit veneers for branches that would go out of range during emission of
    // the constant pool.
    int worst_case_size = ComputeSize(Jump::kRequired, Alignment::kRequired);
    assm_->CheckVeneerPool(false, require_jump == Jump::kRequired,
                           assm_->kVeneerDistanceMargin + worst_case_size +
                               static_cast<int>(margin));

    // Check that the code buffer is large enough before emitting the constant
    // pool (this includes the gap to the relocation information).
    int needed_space = worst_case_size + assm_->kGap;
    while (assm_->buffer_space() <= needed_space) {
      assm_->GrowBuffer();
    }
4424

4425 4426 4427
    EmitAndClear(require_jump);
  }
  // Since a constant pool is (now) empty, move the check offset forward by
4428
  // the standard interval.
4429
  SetNextCheckIn(ConstantPool::kCheckInterval);
4430
}
4431

4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445
// Pool entries are accessed with pc relative load therefore this cannot be more
// than 1 * MB. Since constant pool emission checks are interval based, and we
// want to keep entries close to the code, we try to emit every 64KB.
const size_t ConstantPool::kMaxDistToPool32 = 1 * MB;
const size_t ConstantPool::kMaxDistToPool64 = 1 * MB;
const size_t ConstantPool::kCheckInterval = 128 * kInstrSize;
const size_t ConstantPool::kApproxDistToPool32 = 64 * KB;
const size_t ConstantPool::kApproxDistToPool64 = kApproxDistToPool32;

const size_t ConstantPool::kOpportunityDistToPool32 = 64 * KB;
const size_t ConstantPool::kOpportunityDistToPool64 = 64 * KB;
const size_t ConstantPool::kApproxMaxEntryCount = 512;

bool Assembler::ShouldEmitVeneer(int max_reachable_pc, size_t margin) {
4446
  // Account for the branch around the veneers and the guard.
4447
  int protection_offset = 2 * kInstrSize;
4448 4449 4450
  return static_cast<intptr_t>(pc_offset() + margin + protection_offset +
                               unresolved_branches_.size() *
                                   kMaxVeneerCodeSize) >= max_reachable_pc;
4451 4452
}

4453
void Assembler::RecordVeneerPool(int location_offset, int size) {
4454
  Assembler::BlockPoolsScope block_pools(this, PoolEmissionCheck::kSkip);
4455
  RelocInfo rinfo(reinterpret_cast<Address>(buffer_start_) + location_offset,
4456
                  RelocInfo::VENEER_POOL, static_cast<intptr_t>(size), Code());
4457 4458 4459
  reloc_info_writer.Write(&rinfo);
}

4460 4461
void Assembler::EmitVeneers(bool force_emit, bool need_protection,
                            size_t margin) {
4462
  BlockPoolsScope scope(this, PoolEmissionCheck::kSkip);
4463 4464
  RecordComment("[ Veneers");

4465 4466 4467 4468 4469 4470 4471 4472 4473
  // The exact size of the veneer pool must be recorded (see the comment at the
  // declaration site of RecordConstPool()), but computing the number of
  // veneers that will be generated is not obvious. So instead we remember the
  // current position and will record the size after the pool has been
  // generated.
  Label size_check;
  bind(&size_check);
  int veneer_pool_relocinfo_loc = pc_offset();

4474 4475 4476 4477 4478 4479 4480
  Label end;
  if (need_protection) {
    b(&end);
  }

  EmitVeneersGuard();

4481
#ifdef DEBUG
4482
  Label veneer_size_check;
4483
#endif
4484 4485 4486 4487 4488

  std::multimap<int, FarBranchInfo>::iterator it, it_to_delete;

  it = unresolved_branches_.begin();
  while (it != unresolved_branches_.end()) {
4489
    if (force_emit || ShouldEmitVeneer(it->first, margin)) {
4490 4491 4492 4493
      Instruction* branch = InstructionAt(it->second.pc_offset_);
      Label* label = it->second.label_;

#ifdef DEBUG
4494
      bind(&veneer_size_check);
4495 4496 4497 4498 4499
#endif
      // Patch the branch to point to the current position, and emit a branch
      // to the label.
      Instruction* veneer = reinterpret_cast<Instruction*>(pc_);
      RemoveBranchFromLabelLinkChain(branch, label, veneer);
4500
      branch->SetImmPCOffsetTarget(options(), veneer);
4501 4502
      b(label);
#ifdef DEBUG
4503
      DCHECK(SizeOfCodeGeneratedSince(&veneer_size_check) <=
4504
             static_cast<uint64_t>(kMaxVeneerCodeSize));
4505
      veneer_size_check.Unuse();
4506 4507 4508 4509 4510 4511 4512 4513 4514
#endif

      it_to_delete = it++;
      unresolved_branches_.erase(it_to_delete);
    } else {
      ++it;
    }
  }

4515
  // Record the veneer pool size.
4516
  int pool_size = static_cast<int>(SizeOfCodeGeneratedSince(&size_check));
4517 4518
  RecordVeneerPool(veneer_pool_relocinfo_loc, pool_size);

4519 4520 4521 4522
  if (unresolved_branches_.empty()) {
    next_veneer_pool_check_ = kMaxInt;
  } else {
    next_veneer_pool_check_ =
4523
        unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
4524 4525 4526 4527 4528 4529 4530
  }

  bind(&end);

  RecordComment("]");
}

4531
void Assembler::CheckVeneerPool(bool force_emit, bool require_jump,
4532
                                size_t margin) {
4533
  // There is nothing to do if there are no pending veneer pool entries.
4534
  if (unresolved_branches_.empty()) {
4535
    DCHECK_EQ(next_veneer_pool_check_, kMaxInt);
4536 4537 4538
    return;
  }

4539
  DCHECK(pc_offset() < unresolved_branches_first_limit());
4540 4541 4542 4543 4544

  // Some short sequence of instruction mustn't be broken up by veneer pool
  // emission, such sequences are protected by calls to BlockVeneerPoolFor and
  // BlockVeneerPoolScope.
  if (is_veneer_pool_blocked()) {
4545
    DCHECK(!force_emit);
4546 4547 4548 4549 4550 4551 4552
    return;
  }

  if (!require_jump) {
    // Prefer emitting veneers protected by an existing instruction.
    margin *= kVeneerNoProtectionFactor;
  }
4553 4554
  if (force_emit || ShouldEmitVeneers(margin)) {
    EmitVeneers(force_emit, require_jump, margin);
4555 4556
  } else {
    next_veneer_pool_check_ =
4557
        unresolved_branches_first_limit() - kVeneerDistanceCheckMargin;
4558 4559 4560
  }
}

4561
int Assembler::buffer_space() const {
4562
  return static_cast<int>(reloc_info_writer.pos() - pc_);
4563 4564 4565 4566 4567
}

void Assembler::RecordConstPool(int size) {
  // We only need this for debugger support, to correctly compute offsets in the
  // code.
4568
  Assembler::BlockPoolsScope block_pools(this);
4569 4570 4571
  RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
}

4572
void PatchingAssembler::PatchAdrFar(int64_t target_offset) {
4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583
  // The code at the current instruction should be:
  //   adr  rd, 0
  //   nop  (adr_far)
  //   nop  (adr_far)
  //   movz scratch, 0

  // Verify the expected code.
  Instruction* expected_adr = InstructionAt(0);
  CHECK(expected_adr->IsAdr() && (expected_adr->ImmPCRel() == 0));
  int rd_code = expected_adr->Rd();
  for (int i = 0; i < kAdrFarPatchableNNops; ++i) {
4584
    CHECK(InstructionAt((i + 1) * kInstrSize)->IsNop(ADR_FAR_NOP));
4585 4586
  }
  Instruction* expected_movz =
4587
      InstructionAt((kAdrFarPatchableNInstrs - 1) * kInstrSize);
4588
  CHECK(expected_movz->IsMovz() && (expected_movz->ImmMoveWide() == 0) &&
4589 4590 4591 4592 4593
        (expected_movz->ShiftMoveWide() == 0));
  int scratch_code = expected_movz->Rd();

  // Patch to load the correct address.
  Register rd = Register::XRegFromCode(rd_code);
4594 4595 4596 4597 4598
  Register scratch = Register::XRegFromCode(scratch_code);
  // Addresses are only 48 bits.
  adr(rd, target_offset & 0xFFFF);
  movz(scratch, (target_offset >> 16) & 0xFFFF, 16);
  movk(scratch, (target_offset >> 32) & 0xFFFF, 32);
4599
  DCHECK_EQ(target_offset >> 48, 0);
4600
  add(rd, rd, scratch);
4601 4602
}

4603 4604 4605 4606 4607 4608 4609 4610 4611
void PatchingAssembler::PatchSubSp(uint32_t immediate) {
  // The code at the current instruction should be:
  //   sub sp, sp, #0

  // Verify the expected code.
  Instruction* expected_adr = InstructionAt(0);
  CHECK(expected_adr->IsAddSubImmediate());
  sub(sp, sp, immediate);
}
4612

4613 4614 4615 4616 4617 4618 4619 4620 4621 4622 4623
#undef NEON_3DIFF_LONG_LIST
#undef NEON_3DIFF_HN_LIST
#undef NEON_ACROSSLANES_LIST
#undef NEON_FP2REGMISC_FCVT_LIST
#undef NEON_FP2REGMISC_LIST
#undef NEON_3SAME_LIST
#undef NEON_FP3SAME_LIST_V2
#undef NEON_BYELEMENT_LIST
#undef NEON_FPBYELEMENT_LIST
#undef NEON_BYELEMENT_LONG_LIST

4624 4625
}  // namespace internal
}  // namespace v8
4626

4627
#endif  // V8_TARGET_ARCH_ARM64