assembler-arm.cc 179 KB
Newer Older
1 2 3
// Copyright (c) 1994-2006 Sun Microsystems Inc.
// All Rights Reserved.
//
4
// Redistribution and use in source and binary forms, with or without
5 6 7 8 9
// modification, are permitted provided that the following conditions
// are met:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
10
//
11 12 13 14 15 16 17 18
// - Redistribution in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the
// distribution.
//
// - Neither the name of Sun Microsystems or the names of contributors may
// be used to endorse or promote products derived from this software without
// specific prior written permission.
19 20 21
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 23 24 25 26 27 28 29 30 31 32
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.

33 34
// The original source code covered by the above license above has been
// modified significantly by Google Inc.
35
// Copyright 2012 the V8 project authors. All rights reserved.
36

37 38
#include "src/arm/assembler-arm.h"

39
#if V8_TARGET_ARCH_ARM
40

41
#include "src/arm/assembler-arm-inl.h"
42
#include "src/assembler-inl.h"
43
#include "src/base/bits.h"
44
#include "src/base/cpu.h"
45
#include "src/code-stubs.h"
46
#include "src/deoptimizer.h"
47
#include "src/macro-assembler.h"
48
#include "src/objects-inl.h"
49

50 51
namespace v8 {
namespace internal {
52

53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
static const unsigned kArmv6 = 0u;
static const unsigned kArmv7 = kArmv6 | (1u << ARMv7);
static const unsigned kArmv7WithSudiv = kArmv7 | (1u << ARMv7_SUDIV);
static const unsigned kArmv8 = kArmv7WithSudiv | (1u << ARMv8);

static unsigned CpuFeaturesFromCommandLine() {
  unsigned result;
  if (strcmp(FLAG_arm_arch, "armv8") == 0) {
    result = kArmv8;
  } else if (strcmp(FLAG_arm_arch, "armv7+sudiv") == 0) {
    result = kArmv7WithSudiv;
  } else if (strcmp(FLAG_arm_arch, "armv7") == 0) {
    result = kArmv7;
  } else if (strcmp(FLAG_arm_arch, "armv6") == 0) {
    result = kArmv6;
  } else {
    fprintf(stderr, "Error: unrecognised value for --arm-arch ('%s').\n",
            FLAG_arm_arch);
    fprintf(stderr,
            "Supported values are:  armv8\n"
            "                       armv7+sudiv\n"
            "                       armv7\n"
            "                       armv6\n");
76
    FATAL("arm-arch");
77
  }
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196

  // If any of the old (deprecated) flags are specified, print a warning, but
  // otherwise try to respect them for now.
  // TODO(jbramley): When all the old bots have been updated, remove this.
  if (FLAG_enable_armv7.has_value || FLAG_enable_vfp3.has_value ||
      FLAG_enable_32dregs.has_value || FLAG_enable_neon.has_value ||
      FLAG_enable_sudiv.has_value || FLAG_enable_armv8.has_value) {
    // As an approximation of the old behaviour, set the default values from the
    // arm_arch setting, then apply the flags over the top.
    bool enable_armv7 = (result & (1u << ARMv7)) != 0;
    bool enable_vfp3 = (result & (1u << ARMv7)) != 0;
    bool enable_32dregs = (result & (1u << ARMv7)) != 0;
    bool enable_neon = (result & (1u << ARMv7)) != 0;
    bool enable_sudiv = (result & (1u << ARMv7_SUDIV)) != 0;
    bool enable_armv8 = (result & (1u << ARMv8)) != 0;
    if (FLAG_enable_armv7.has_value) {
      fprintf(stderr,
              "Warning: --enable_armv7 is deprecated. "
              "Use --arm_arch instead.\n");
      enable_armv7 = FLAG_enable_armv7.value;
    }
    if (FLAG_enable_vfp3.has_value) {
      fprintf(stderr,
              "Warning: --enable_vfp3 is deprecated. "
              "Use --arm_arch instead.\n");
      enable_vfp3 = FLAG_enable_vfp3.value;
    }
    if (FLAG_enable_32dregs.has_value) {
      fprintf(stderr,
              "Warning: --enable_32dregs is deprecated. "
              "Use --arm_arch instead.\n");
      enable_32dregs = FLAG_enable_32dregs.value;
    }
    if (FLAG_enable_neon.has_value) {
      fprintf(stderr,
              "Warning: --enable_neon is deprecated. "
              "Use --arm_arch instead.\n");
      enable_neon = FLAG_enable_neon.value;
    }
    if (FLAG_enable_sudiv.has_value) {
      fprintf(stderr,
              "Warning: --enable_sudiv is deprecated. "
              "Use --arm_arch instead.\n");
      enable_sudiv = FLAG_enable_sudiv.value;
    }
    if (FLAG_enable_armv8.has_value) {
      fprintf(stderr,
              "Warning: --enable_armv8 is deprecated. "
              "Use --arm_arch instead.\n");
      enable_armv8 = FLAG_enable_armv8.value;
    }
    // Emulate the old implications.
    if (enable_armv8) {
      enable_vfp3 = true;
      enable_neon = true;
      enable_32dregs = true;
      enable_sudiv = true;
    }
    // Select the best available configuration.
    if (enable_armv7 && enable_vfp3 && enable_32dregs && enable_neon) {
      if (enable_sudiv) {
        if (enable_armv8) {
          result = kArmv8;
        } else {
          result = kArmv7WithSudiv;
        }
      } else {
        result = kArmv7;
      }
    } else {
      result = kArmv6;
    }
  }
  return result;
}

// Get the CPU features enabled by the build.
// For cross compilation the preprocessor symbols such as
// CAN_USE_ARMV7_INSTRUCTIONS and CAN_USE_VFP3_INSTRUCTIONS can be used to
// enable ARMv7 and VFPv3 instructions when building the snapshot. However,
// these flags should be consistent with a supported ARM configuration:
//  "armv6":       ARMv6 + VFPv2
//  "armv7":       ARMv7 + VFPv3-D32 + NEON
//  "armv7+sudiv": ARMv7 + VFPv4-D32 + NEON + SUDIV
//  "armv8":       ARMv8 (+ all of the above)
static constexpr unsigned CpuFeaturesFromCompiler() {
// TODO(jbramley): Once the build flags are simplified, these tests should
// also be simplified.

// Check *architectural* implications.
#if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
#error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_ARMV7_INSTRUCTIONS"
#endif
#if defined(CAN_USE_ARMV8_INSTRUCTIONS) && !defined(CAN_USE_SUDIV)
#error "CAN_USE_ARMV8_INSTRUCTIONS should imply CAN_USE_SUDIV"
#endif
#if defined(CAN_USE_ARMV7_INSTRUCTIONS) != defined(CAN_USE_VFP3_INSTRUCTIONS)
// V8 requires VFP, and all ARMv7 devices with VFP have VFPv3. Similarly,
// VFPv3 isn't available before ARMv7.
#error "CAN_USE_ARMV7_INSTRUCTIONS should match CAN_USE_VFP3_INSTRUCTIONS"
#endif
#if defined(CAN_USE_NEON) && !defined(CAN_USE_ARMV7_INSTRUCTIONS)
#error "CAN_USE_NEON should imply CAN_USE_ARMV7_INSTRUCTIONS"
#endif

// Find compiler-implied features.
#if defined(CAN_USE_ARMV8_INSTRUCTIONS) &&                           \
    defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
    defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
  return kArmv8;
#elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_SUDIV) && \
    defined(CAN_USE_NEON) && defined(CAN_USE_VFP3_INSTRUCTIONS)
  return kArmv7WithSudiv;
#elif defined(CAN_USE_ARMV7_INSTRUCTIONS) && defined(CAN_USE_NEON) && \
    defined(CAN_USE_VFP3_INSTRUCTIONS)
  return kArmv7;
#else
  return kArmv6;
#endif
197 198 199
}


200
void CpuFeatures::ProbeImpl(bool cross_compile) {
201
  dcache_line_size_ = 64;
202

203
  unsigned command_line = CpuFeaturesFromCommandLine();
204
  // Only use statically determined features for cross compile (snapshot).
205 206 207 208
  if (cross_compile) {
    supported_ |= command_line & CpuFeaturesFromCompiler();
    return;
  }
209

210
#ifndef __arm__
211
  // For the simulator build, use whatever the flags specify.
212 213
  supported_ |= command_line;

214
#else  // __arm__
215
  // Probe for additional features at runtime.
216
  base::CPU cpu;
217 218 219 220 221 222 223 224 225 226 227 228
  // Runtime detection is slightly fuzzy, and some inferences are necessary.
  unsigned runtime = kArmv6;
  // NEON and VFPv3 imply at least ARMv7-A.
  if (cpu.has_neon() && cpu.has_vfp3_d32()) {
    DCHECK(cpu.has_vfp3());
    runtime |= kArmv7;
    if (cpu.has_idiva()) {
      runtime |= kArmv7WithSudiv;
      if (cpu.architecture() >= 8) {
        runtime |= kArmv8;
      }
    }
229 230
  }

231 232 233 234 235 236
  // Use the best of the features found by CPU detection and those inferred from
  // the build system. In both cases, restrict available features using the
  // command-line. Note that the command-line flags are very permissive (kArmv8)
  // by default.
  supported_ |= command_line & CpuFeaturesFromCompiler();
  supported_ |= command_line & runtime;
237

238 239
  // Additional tuning options.

240
  // ARM Cortex-A9 and Cortex-A5 have 32 byte cachelines.
241 242 243
  if (cpu.implementer() == base::CPU::ARM &&
      (cpu.part() == base::CPU::ARM_CORTEX_A5 ||
       cpu.part() == base::CPU::ARM_CORTEX_A9)) {
244
    dcache_line_size_ = 32;
245
  }
246
#endif
247

248 249
  DCHECK_IMPLIES(IsSupported(ARMv7_SUDIV), IsSupported(ARMv7));
  DCHECK_IMPLIES(IsSupported(ARMv8), IsSupported(ARMv7_SUDIV));
250 251
}

252

253
void CpuFeatures::PrintTarget() {
254
  const char* arm_arch = nullptr;
255 256
  const char* arm_target_type = "";
  const char* arm_no_probe = "";
257 258
  const char* arm_fpu = "";
  const char* arm_thumb = "";
259
  const char* arm_float_abi = nullptr;
260

261 262 263 264 265 266 267 268
#if !defined __arm__
  arm_target_type = " simulator";
#endif

#if defined ARM_TEST_NO_FEATURE_PROBE
  arm_no_probe = " noprobe";
#endif

269 270 271
#if defined CAN_USE_ARMV8_INSTRUCTIONS
  arm_arch = "arm v8";
#elif defined CAN_USE_ARMV7_INSTRUCTIONS
272 273 274 275 276
  arm_arch = "arm v7";
#else
  arm_arch = "arm v6";
#endif

277
#if defined CAN_USE_NEON
278
  arm_fpu = " neon";
279
#elif defined CAN_USE_VFP3_INSTRUCTIONS
280 281 282 283 284
#  if defined CAN_USE_VFP32DREGS
  arm_fpu = " vfp3";
#  else
  arm_fpu = " vfp3-d16";
#  endif
285
#else
286
  arm_fpu = " vfp2";
287 288 289
#endif

#ifdef __arm__
290
  arm_float_abi = base::OS::ArmUsingHardFloat() ? "hard" : "softfp";
291
#elif USE_EABI_HARDFLOAT
292
  arm_float_abi = "hard";
293
#else
294
  arm_float_abi = "softfp";
295
#endif
296

297 298 299
#if defined __arm__ && (defined __thumb__) || (defined __thumb2__)
  arm_thumb = " thumb";
#endif
300

301 302 303
  printf("target%s%s %s%s%s %s\n",
         arm_target_type, arm_no_probe, arm_arch, arm_fpu, arm_thumb,
         arm_float_abi);
304 305 306 307
}


void CpuFeatures::PrintFeatures() {
308 309 310 311
  printf("ARMv8=%d ARMv7=%d VFPv3=%d VFP32DREGS=%d NEON=%d SUDIV=%d",
         CpuFeatures::IsSupported(ARMv8), CpuFeatures::IsSupported(ARMv7),
         CpuFeatures::IsSupported(VFPv3), CpuFeatures::IsSupported(VFP32DREGS),
         CpuFeatures::IsSupported(NEON), CpuFeatures::IsSupported(SUDIV));
312
#ifdef __arm__
313
  bool eabi_hardfloat = base::OS::ArmUsingHardFloat();
314 315
#elif USE_EABI_HARDFLOAT
  bool eabi_hardfloat = true;
316
#else
317
  bool eabi_hardfloat = false;
318
#endif
319
  printf(" USE_EABI_HARDFLOAT=%d\n", eabi_hardfloat);
320 321 322
}


323 324 325
// -----------------------------------------------------------------------------
// Implementation of RelocInfo

326
// static
327 328
const int RelocInfo::kApplyMask =
    RelocInfo::ModeMask(RelocInfo::RELATIVE_CODE_TARGET);
329

330
bool RelocInfo::IsCodedSpecially() {
331
  // The deserializer needs to know whether a pointer is specially coded.  Being
332 333 334
  // specially coded on ARM means that it is a movw/movt instruction. We don't
  // generate those for relocatable pointers.
  return false;
335 336
}

337
bool RelocInfo::IsInConstantPool() {
338
  return Assembler::is_constant_pool_load(pc_);
339 340
}

341 342 343 344 345
int RelocInfo::GetDeoptimizationId(Isolate* isolate, DeoptimizeKind kind) {
  DCHECK(IsRuntimeEntry(rmode_));
  return Deoptimizer::GetDeoptimizationId(isolate, target_address(), kind);
}

346
void RelocInfo::set_js_to_wasm_address(Address address,
347 348
                                       ICacheFlushMode icache_flush_mode) {
  DCHECK_EQ(rmode_, JS_TO_WASM_CALL);
349 350
  Assembler::set_target_address_at(pc_, constant_pool_, address,
                                   icache_flush_mode);
351 352 353 354
}

Address RelocInfo::js_to_wasm_address() const {
  DCHECK_EQ(rmode_, JS_TO_WASM_CALL);
355
  return Assembler::target_address_at(pc_, constant_pool_);
356 357
}

358 359
uint32_t RelocInfo::wasm_call_tag() const {
  DCHECK(rmode_ == WASM_CALL || rmode_ == WASM_STUB_CALL);
360 361 362 363
  return static_cast<uint32_t>(
      Assembler::target_address_at(pc_, constant_pool_));
}

364 365 366 367
// -----------------------------------------------------------------------------
// Implementation of Operand and MemOperand
// See assembler-arm-inl.h for inlined constructors

368
Operand::Operand(Handle<HeapObject> handle) {
369
  rm_ = no_reg;
370
  value_.immediate = static_cast<intptr_t>(handle.address());
371
  rmode_ = RelocInfo::EMBEDDED_OBJECT;
372 373 374 375
}


Operand::Operand(Register rm, ShiftOp shift_op, int shift_imm) {
376
  DCHECK(is_uint5(shift_imm));
377

378 379 380 381
  rm_ = rm;
  rs_ = no_reg;
  shift_op_ = shift_op;
  shift_imm_ = shift_imm & 31;
382 383 384 385 386 387

  if ((shift_op == ROR) && (shift_imm == 0)) {
    // ROR #0 is functionally equivalent to LSL #0 and this allow us to encode
    // RRX as ROR #0 (See below).
    shift_op = LSL;
  } else if (shift_op == RRX) {
388
    // encoded as ROR with shift_imm == 0
389
    DCHECK_EQ(shift_imm, 0);
390 391 392 393 394 395 396
    shift_op_ = ROR;
    shift_imm_ = 0;
  }
}


Operand::Operand(Register rm, ShiftOp shift_op, Register rs) {
397
  DCHECK(shift_op != RRX);
398 399 400 401 402 403
  rm_ = rm;
  rs_ = no_reg;
  shift_op_ = shift_op;
  rs_ = rs;
}

404 405 406 407
Operand Operand::EmbeddedNumber(double value) {
  int32_t smi;
  if (DoubleToSmiInteger(value, &smi)) return Operand(Smi::FromInt(smi));
  Operand result(0, RelocInfo::EMBEDDED_OBJECT);
408 409 410 411 412 413 414 415 416
  result.is_heap_object_request_ = true;
  result.value_.heap_object_request = HeapObjectRequest(value);
  return result;
}

Operand Operand::EmbeddedCode(CodeStub* stub) {
  Operand result(0, RelocInfo::CODE_TARGET);
  result.is_heap_object_request_ = true;
  result.value_.heap_object_request = HeapObjectRequest(stub);
417 418
  return result;
}
419

420 421
MemOperand::MemOperand(Register rn, int32_t offset, AddrMode am)
    : rn_(rn), rm_(no_reg), offset_(offset), am_(am) {
422 423
  // Accesses below the stack pointer are not safe, and are prohibited by the
  // ABI. We can check obvious violations here.
424
  if (rn == sp) {
425 426 427
    if (am == Offset) DCHECK_LE(0, offset);
    if (am == NegOffset) DCHECK_GE(0, offset);
  }
428 429
}

430 431
MemOperand::MemOperand(Register rn, Register rm, AddrMode am)
    : rn_(rn), rm_(rm), shift_op_(LSL), shift_imm_(0), am_(am) {}
432

433 434 435 436 437 438 439
MemOperand::MemOperand(Register rn, Register rm, ShiftOp shift_op,
                       int shift_imm, AddrMode am)
    : rn_(rn),
      rm_(rm),
      shift_op_(shift_op),
      shift_imm_(shift_imm & 31),
      am_(am) {
440
  DCHECK(is_uint5(shift_imm));
441 442
}

443 444
NeonMemOperand::NeonMemOperand(Register rn, AddrMode am, int align)
    : rn_(rn), rm_(am == Offset ? pc : sp) {
445
  DCHECK((am == Offset) || (am == PostIndex));
446 447 448
  SetAlignment(align);
}

449 450
NeonMemOperand::NeonMemOperand(Register rn, Register rm, int align)
    : rn_(rn), rm_(rm) {
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
  SetAlignment(align);
}

void NeonMemOperand::SetAlignment(int align) {
  switch (align) {
    case 0:
      align_ = 0;
      break;
    case 64:
      align_ = 1;
      break;
    case 128:
      align_ = 2;
      break;
    case 256:
      align_ = 3;
      break;
    default:
      UNREACHABLE();
      break;
  }
}

474 475 476 477 478
void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
  for (auto& request : heap_object_requests_) {
    Handle<HeapObject> object;
    switch (request.kind()) {
      case HeapObjectRequest::kHeapNumber:
479 480
        object =
            isolate->factory()->NewHeapNumber(request.heap_number(), TENURED);
481 482 483 484 485 486
        break;
      case HeapObjectRequest::kCodeStub:
        request.code_stub()->set_isolate(isolate);
        object = request.code_stub()->GetCode();
        break;
    }
487
    Address pc = reinterpret_cast<Address>(buffer_) + request.offset();
488
    Memory<Address>(constant_pool_entry_address(pc, 0 /* unused */)) =
489
        object.address();
490 491 492
  }
}

493
// -----------------------------------------------------------------------------
494
// Specific instructions, constants, and masks.
495

496 497
// str(r, MemOperand(sp, 4, NegPreIndex), al) instruction (aka push(r))
// register r is not encoded.
498
const Instr kPushRegPattern = al | B26 | 4 | NegPreIndex | sp.code() * B16;
499 500
// ldr(r, MemOperand(sp, 4, PostIndex), al) instruction (aka pop(r))
// register r is not encoded.
501
const Instr kPopRegPattern = al | B26 | L | 4 | PostIndex | sp.code() * B16;
502
// ldr rd, [pc, #offset]
503
const Instr kLdrPCImmedMask = 15 * B24 | 7 * B20 | 15 * B16;
504
const Instr kLdrPCImmedPattern = 5 * B24 | L | pc.code() * B16;
505 506
// vldr dd, [pc, #offset]
const Instr kVldrDPCMask = 15 * B24 | 3 * B20 | 15 * B16 | 15 * B8;
507
const Instr kVldrDPCPattern = 13 * B24 | L | pc.code() * B16 | 11 * B8;
508 509 510 511
// blxcc rm
const Instr kBlxRegMask =
    15 * B24 | 15 * B20 | 15 * B16 | 15 * B12 | 15 * B8 | 15 * B4;
const Instr kBlxRegPattern =
512
    B24 | B21 | 15 * B16 | 15 * B12 | 15 * B8 | BLX;
513
const Instr kBlxIp = al | kBlxRegPattern | ip.code();
514 515
const Instr kMovMvnMask = 0x6D * B21 | 0xF * B16;
const Instr kMovMvnPattern = 0xD * B21;
516
const Instr kMovMvnFlip = B22;
517 518
const Instr kMovLeaveCCMask = 0xDFF * B16;
const Instr kMovLeaveCCPattern = 0x1A0 * B16;
519 520
const Instr kMovwPattern = 0x30 * B20;
const Instr kMovtPattern = 0x34 * B20;
521
const Instr kMovwLeaveCCFlip = 0x5 * B21;
522 523 524 525 526
const Instr kMovImmedMask = 0x7F * B21;
const Instr kMovImmedPattern = 0x1D * B21;
const Instr kOrrImmedMask = 0x7F * B21;
const Instr kOrrImmedPattern = 0x1C * B21;
const Instr kCmpCmnMask = 0xDD * B20 | 0xF * B12;
527 528 529
const Instr kCmpCmnPattern = 0x15 * B20;
const Instr kCmpCmnFlip = B21;
const Instr kAddSubFlip = 0x6 * B21;
530
const Instr kAndBicFlip = 0xE * B21;
531

532
// A mask for the Rd register for push, pop, ldr, str instructions.
533 534
const Instr kLdrRegFpOffsetPattern = al | B26 | L | Offset | fp.code() * B16;
const Instr kStrRegFpOffsetPattern = al | B26 | Offset | fp.code() * B16;
535
const Instr kLdrRegFpNegOffsetPattern =
536 537
    al | B26 | L | NegOffset | fp.code() * B16;
const Instr kStrRegFpNegOffsetPattern = al | B26 | NegOffset | fp.code() * B16;
538
const Instr kLdrStrInstrTypeMask = 0xFFFF0000;
539

540 541
Assembler::Assembler(const AssemblerOptions& options, void* buffer,
                     int buffer_size)
542
    : AssemblerBase(options, buffer, buffer_size),
543
      pending_32_bit_constants_(),
544 545
      pending_64_bit_constants_(),
      scratch_register_list_(ip.bit()) {
546 547
  pending_32_bit_constants_.reserve(kMinNumPendingConstants);
  pending_64_bit_constants_.reserve(kMinNumPendingConstants);
548
  reloc_info_writer.Reposition(buffer_ + buffer_size_, pc_);
549
  next_buffer_check_ = 0;
550
  const_pool_blocked_nesting_ = 0;
551
  no_const_pool_before_ = 0;
552 553
  first_const_pool_32_use_ = -1;
  first_const_pool_64_use_ = -1;
554
  last_bound_pos_ = 0;
555 556 557 558 559
  if (CpuFeatures::IsSupported(VFP32DREGS)) {
    // Register objects tend to be abstracted and survive between scopes, so
    // it's awkward to use CpuFeatures::VFP32DREGS with CpuFeatureScope. To make
    // its use consistent with other features, we always enable it if we can.
    EnableCpuFeature(VFP32DREGS);
560 561 562 563 564 565 566
    // Make sure we pick two D registers which alias a Q register. This way, we
    // can use a Q as a scratch if NEON is supported.
    scratch_vfp_register_list_ = d14.ToVfpRegList() | d15.ToVfpRegList();
  } else {
    // When VFP32DREGS is not supported, d15 become allocatable. Therefore we
    // cannot use it as a scratch.
    scratch_vfp_register_list_ = d14.ToVfpRegList();
567
  }
568 569 570
}

Assembler::~Assembler() {
571
  DCHECK_EQ(const_pool_blocked_nesting_, 0);
572 573
}

574
void Assembler::GetCode(Isolate* isolate, CodeDesc* desc) {
575 576
  // Emit constant pool if necessary.
  int constant_pool_offset = 0;
577 578 579
  CheckConstPool(true, false);
  DCHECK(pending_32_bit_constants_.empty());
  DCHECK(pending_64_bit_constants_.empty());
580

581
  AllocateAndInstallRequestedHeapObjects(isolate);
582

583
  // Set up code descriptor.
584 585 586 587
  desc->buffer = buffer_;
  desc->buffer_size = buffer_size_;
  desc->instr_size = pc_offset();
  desc->reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos();
588 589
  desc->constant_pool_size =
      (constant_pool_offset ? desc->instr_size - constant_pool_offset : 0);
590
  desc->origin = this;
591 592
  desc->unwinding_info_size = 0;
  desc->unwinding_info = nullptr;
593 594 595 596
}


void Assembler::Align(int m) {
597
  DCHECK(m >= 4 && base::bits::IsPowerOfTwo(m));
598
  DCHECK_EQ(pc_offset() & (kInstrSize - 1), 0);
599 600 601 602 603 604
  while ((pc_offset() & (m - 1)) != 0) {
    nop();
  }
}


605
void Assembler::CodeTargetAlign() {
606 607
  // Preferred alignment of jump targets on some ARM chips.
  Align(8);
608 609 610
}


611 612 613 614
Condition Assembler::GetCondition(Instr instr) {
  return Instruction::ConditionField(instr);
}

615 616 617 618 619
bool Assembler::IsLdrRegisterImmediate(Instr instr) {
  return (instr & (B27 | B26 | B25 | B22 | B20)) == (B26 | B20);
}


620 621 622 623 624
bool Assembler::IsVldrDRegisterImmediate(Instr instr) {
  return (instr & (15 * B24 | 3 * B20 | 15 * B8)) == (13 * B24 | B20 | 11 * B8);
}


625
int Assembler::GetLdrRegisterImmediateOffset(Instr instr) {
626
  DCHECK(IsLdrRegisterImmediate(instr));
627
  bool positive = (instr & B23) == B23;
628
  int offset = instr & kOff12Mask;  // Zero extended offset.
629 630 631 632
  return positive ? offset : -offset;
}


633
int Assembler::GetVldrDRegisterImmediateOffset(Instr instr) {
634
  DCHECK(IsVldrDRegisterImmediate(instr));
635 636 637 638 639 640 641
  bool positive = (instr & B23) == B23;
  int offset = instr & kOff8Mask;  // Zero extended offset.
  offset <<= 2;
  return positive ? offset : -offset;
}


642
Instr Assembler::SetLdrRegisterImmediateOffset(Instr instr, int offset) {
643
  DCHECK(IsLdrRegisterImmediate(instr));
644 645
  bool positive = offset >= 0;
  if (!positive) offset = -offset;
646
  DCHECK(is_uint12(offset));
647 648 649
  // Set bit indicating whether the offset should be added.
  instr = (instr & ~B23) | (positive ? B23 : 0);
  // Set the actual offset.
650
  return (instr & ~kOff12Mask) | offset;
651 652 653
}


654
Instr Assembler::SetVldrDRegisterImmediateOffset(Instr instr, int offset) {
655 656
  DCHECK(IsVldrDRegisterImmediate(instr));
  DCHECK((offset & ~3) == offset);  // Must be 64-bit aligned.
657 658
  bool positive = offset >= 0;
  if (!positive) offset = -offset;
659
  DCHECK(is_uint10(offset));
660 661 662 663 664 665 666
  // Set bit indicating whether the offset should be added.
  instr = (instr & ~B23) | (positive ? B23 : 0);
  // Set the actual offset. Its bottom 2 bits are zero.
  return (instr & ~kOff8Mask) | (offset >> 2);
}


667 668 669 670 671 672
bool Assembler::IsStrRegisterImmediate(Instr instr) {
  return (instr & (B27 | B26 | B25 | B22 | B20)) == B26;
}


Instr Assembler::SetStrRegisterImmediateOffset(Instr instr, int offset) {
673
  DCHECK(IsStrRegisterImmediate(instr));
674 675
  bool positive = offset >= 0;
  if (!positive) offset = -offset;
676
  DCHECK(is_uint12(offset));
677 678 679
  // Set bit indicating whether the offset should be added.
  instr = (instr & ~B23) | (positive ? B23 : 0);
  // Set the actual offset.
680
  return (instr & ~kOff12Mask) | offset;
681 682 683 684 685 686 687 688 689
}


bool Assembler::IsAddRegisterImmediate(Instr instr) {
  return (instr & (B27 | B26 | B25 | B24 | B23 | B22 | B21)) == (B25 | B23);
}


Instr Assembler::SetAddRegisterImmediateOffset(Instr instr, int offset) {
690
  DCHECK(IsAddRegisterImmediate(instr));
691
  DCHECK_GE(offset, 0);
692
  DCHECK(is_uint12(offset));
693
  // Set the offset.
694
  return (instr & ~kOff12Mask) | offset;
695 696 697
}


698
Register Assembler::GetRd(Instr instr) {
699
  return Register::from_code(Instruction::RdValue(instr));
700 701 702
}


703
Register Assembler::GetRn(Instr instr) {
704
  return Register::from_code(Instruction::RnValue(instr));
705 706 707 708
}


Register Assembler::GetRm(Instr instr) {
709
  return Register::from_code(Instruction::RmValue(instr));
710 711 712
}


713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742
bool Assembler::IsPush(Instr instr) {
  return ((instr & ~kRdMask) == kPushRegPattern);
}


bool Assembler::IsPop(Instr instr) {
  return ((instr & ~kRdMask) == kPopRegPattern);
}


bool Assembler::IsStrRegFpOffset(Instr instr) {
  return ((instr & kLdrStrInstrTypeMask) == kStrRegFpOffsetPattern);
}


bool Assembler::IsLdrRegFpOffset(Instr instr) {
  return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpOffsetPattern);
}


bool Assembler::IsStrRegFpNegOffset(Instr instr) {
  return ((instr & kLdrStrInstrTypeMask) == kStrRegFpNegOffsetPattern);
}


bool Assembler::IsLdrRegFpNegOffset(Instr instr) {
  return ((instr & kLdrStrInstrTypeMask) == kLdrRegFpNegOffsetPattern);
}


743 744 745
bool Assembler::IsLdrPcImmediateOffset(Instr instr) {
  // Check the instruction is indeed a
  // ldr<cond> <Rd>, [pc +/- offset_12].
746
  return (instr & kLdrPCImmedMask) == kLdrPCImmedPattern;
747 748 749 750 751 752 753
}


bool Assembler::IsVldrDPcImmediateOffset(Instr instr) {
  // Check the instruction is indeed a
  // vldr<cond> <Dd>, [pc +/- offset_10].
  return (instr & kVldrDPCMask) == kVldrDPCPattern;
754 755 756
}


757 758 759 760 761 762 763 764 765 766 767 768 769 770
bool Assembler::IsBlxReg(Instr instr) {
  // Check the instruction is indeed a
  // blxcc <Rm>
  return (instr & kBlxRegMask) == kBlxRegPattern;
}


bool Assembler::IsBlxIp(Instr instr) {
  // Check the instruction is indeed a
  // blx ip
  return instr == kBlxIp;
}


771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
bool Assembler::IsTstImmediate(Instr instr) {
  return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) ==
      (I | TST | S);
}


bool Assembler::IsCmpRegister(Instr instr) {
  return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask | B4)) ==
      (CMP | S);
}


bool Assembler::IsCmpImmediate(Instr instr) {
  return (instr & (B27 | B26 | I | kOpCodeMask | S | kRdMask)) ==
      (I | CMP | S);
}


Register Assembler::GetCmpImmediateRegister(Instr instr) {
790
  DCHECK(IsCmpImmediate(instr));
791 792 793 794 795
  return GetRn(instr);
}


int Assembler::GetCmpImmediateRawImmediate(Instr instr) {
796
  DCHECK(IsCmpImmediate(instr));
797 798 799
  return instr & kOff12Mask;
}

800

801 802 803 804 805 806 807 808 809
// Labels refer to positions in the (to be) generated code.
// There are bound, linked, and unused labels.
//
// Bound labels refer to known positions in the already
// generated code. pos() is the position the label refers to.
//
// Linked labels refer to unknown positions in the code
// to be generated; pos() is the position of the last
// instruction using the label.
810 811 812 813 814 815 816
//
// The linked labels form a link chain by making the branch offset
// in the instruction steam to point to the previous branch
// instruction using the same label.
//
// The link chain is terminated by a branch offset pointing to the
// same position.
817 818


819
int Assembler::target_at(int pos) {
820
  Instr instr = instr_at(pos);
821 822 823
  if (is_uint24(instr)) {
    // Emitted link to a label, not part of a branch.
    return instr;
lrn@chromium.org's avatar
lrn@chromium.org committed
824
  }
825 826 827 828 829 830
  DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
  int imm26 = ((instr & kImm24Mask) << 8) >> 6;
  if ((Instruction::ConditionField(instr) == kSpecialCondition) &&
      ((instr & B24) != 0)) {
    // blx uses bit 24 to encode bit 2 of imm26
    imm26 += 2;
831
  }
832
  return pos + Instruction::kPcLoadDelta + imm26;
833 834 835 836 837
}


void Assembler::target_at_put(int pos, int target_pos) {
  Instr instr = instr_at(pos);
838
  if (is_uint24(instr)) {
839
    DCHECK(target_pos == pos || target_pos >= 0);
840 841 842 843
    // Emitted link to a label, not part of a branch.
    // Load the position of the label relative to the generated code object
    // pointer in a register.

844 845 846 847 848 849 850 851 852 853 854 855 856
    // The existing code must be a single 24-bit label chain link, followed by
    // nops encoding the destination register. See mov_label_offset.

    // Extract the destination register from the first nop instructions.
    Register dst =
        Register::from_code(Instruction::RmValue(instr_at(pos + kInstrSize)));
    // In addition to the 24-bit label chain link, we expect to find one nop for
    // ARMv7 and above, or two nops for ARMv6. See mov_label_offset.
    DCHECK(IsNop(instr_at(pos + kInstrSize), dst.code()));
    if (!CpuFeatures::IsSupported(ARMv7)) {
      DCHECK(IsNop(instr_at(pos + 2 * kInstrSize), dst.code()));
    }

857 858 859 860 861 862 863 864 865 866
    // Here are the instructions we need to emit:
    //   For ARMv7: target24 => target16_1:target16_0
    //      movw dst, #target16_0
    //      movt dst, #target16_1
    //   For ARMv6: target24 => target8_2:target8_1:target8_0
    //      mov dst, #target8_0
    //      orr dst, dst, #target8_1 << 8
    //      orr dst, dst, #target8_2 << 16

    uint32_t target24 = target_pos + (Code::kHeaderSize - kHeapObjectTag);
867
    DCHECK(is_uint24(target24));
868 869 870
    if (is_uint8(target24)) {
      // If the target fits in a byte then only patch with a mov
      // instruction.
871
      PatchingAssembler patcher(options(),
872 873
                                reinterpret_cast<byte*>(buffer_ + pos), 1);
      patcher.mov(dst, Operand(target24));
874 875 876 877 878 879
    } else {
      uint16_t target16_0 = target24 & kImm16Mask;
      uint16_t target16_1 = target24 >> 16;
      if (CpuFeatures::IsSupported(ARMv7)) {
        // Patch with movw/movt.
        if (target16_1 == 0) {
880
          PatchingAssembler patcher(options(),
881 882 883
                                    reinterpret_cast<byte*>(buffer_ + pos), 1);
          CpuFeatureScope scope(&patcher, ARMv7);
          patcher.movw(dst, target16_0);
884
        } else {
885
          PatchingAssembler patcher(options(),
886 887 888 889
                                    reinterpret_cast<byte*>(buffer_ + pos), 2);
          CpuFeatureScope scope(&patcher, ARMv7);
          patcher.movw(dst, target16_0);
          patcher.movt(dst, target16_1);
890 891 892 893 894 895 896
        }
      } else {
        // Patch with a sequence of mov/orr/orr instructions.
        uint8_t target8_0 = target16_0 & kImm8Mask;
        uint8_t target8_1 = target16_0 >> 8;
        uint8_t target8_2 = target16_1 & kImm8Mask;
        if (target8_2 == 0) {
897
          PatchingAssembler patcher(options(),
898 899 900
                                    reinterpret_cast<byte*>(buffer_ + pos), 2);
          patcher.mov(dst, Operand(target8_0));
          patcher.orr(dst, dst, Operand(target8_1 << 8));
901
        } else {
902
          PatchingAssembler patcher(options(),
903 904 905 906
                                    reinterpret_cast<byte*>(buffer_ + pos), 3);
          patcher.mov(dst, Operand(target8_0));
          patcher.orr(dst, dst, Operand(target8_1 << 8));
          patcher.orr(dst, dst, Operand(target8_2 << 16));
907 908 909
        }
      }
    }
lrn@chromium.org's avatar
lrn@chromium.org committed
910 911
    return;
  }
912
  int imm26 = target_pos - (pos + Instruction::kPcLoadDelta);
913 914 915 916 917 918 919 920
  DCHECK_EQ(5 * B25, instr & 7 * B25);  // b, bl, or blx imm24
  if (Instruction::ConditionField(instr) == kSpecialCondition) {
    // blx uses bit 24 to encode bit 2 of imm26
    DCHECK_EQ(0, imm26 & 1);
    instr = (instr & ~(B24 | kImm24Mask)) | ((imm26 & 2) >> 1) * B24;
  } else {
    DCHECK_EQ(0, imm26 & 3);
    instr &= ~kImm24Mask;
921
  }
922 923 924
  int imm24 = imm26 >> 2;
  DCHECK(is_int24(imm24));
  instr_at_put(pos, instr | (imm24 & kImm24Mask));
925 926
}

927
void Assembler::print(const Label* L) {
928 929 930 931 932
  if (L->is_unused()) {
    PrintF("unused label\n");
  } else if (L->is_bound()) {
    PrintF("bound label to %d\n", L->pos());
  } else if (L->is_linked()) {
933 934
    Label l;
    l.link_to(L->pos());
935 936 937 938
    PrintF("unbound label");
    while (l.is_linked()) {
      PrintF("@ %d ", l.pos());
      Instr instr = instr_at(l.pos());
939
      if ((instr & ~kImm24Mask) == 0) {
lrn@chromium.org's avatar
lrn@chromium.org committed
940
        PrintF("value\n");
941
      } else {
942
        DCHECK_EQ(instr & 7 * B25, 5 * B25);  // b, bl, or blx
943
        Condition cond = Instruction::ConditionField(instr);
lrn@chromium.org's avatar
lrn@chromium.org committed
944 945
        const char* b;
        const char* c;
946
        if (cond == kSpecialCondition) {
lrn@chromium.org's avatar
lrn@chromium.org committed
947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974
          b = "blx";
          c = "";
        } else {
          if ((instr & B24) != 0)
            b = "bl";
          else
            b = "b";

          switch (cond) {
            case eq: c = "eq"; break;
            case ne: c = "ne"; break;
            case hs: c = "hs"; break;
            case lo: c = "lo"; break;
            case mi: c = "mi"; break;
            case pl: c = "pl"; break;
            case vs: c = "vs"; break;
            case vc: c = "vc"; break;
            case hi: c = "hi"; break;
            case ls: c = "ls"; break;
            case ge: c = "ge"; break;
            case lt: c = "lt"; break;
            case gt: c = "gt"; break;
            case le: c = "le"; break;
            case al: c = ""; break;
            default:
              c = "";
              UNREACHABLE();
          }
975
        }
lrn@chromium.org's avatar
lrn@chromium.org committed
976
        PrintF("%s%s\n", b, c);
977 978 979 980 981 982 983 984 985 986
      }
      next(&l);
    }
  } else {
    PrintF("label in inconsistent state (pos = %d)\n", L->pos_);
  }
}


void Assembler::bind_to(Label* L, int pos) {
987
  DCHECK(0 <= pos && pos <= pc_offset());  // must have a valid binding position
988 989 990 991 992 993 994
  while (L->is_linked()) {
    int fixup_pos = L->pos();
    next(L);  // call next before overwriting link with target at fixup_pos
    target_at_put(fixup_pos, pos);
  }
  L->bind_to(pos);

995 996
  // Keep track of the last bound label so we don't eliminate any instructions
  // before a bound label.
997
  if (pos > last_bound_pos_)
998 999 1000 1001 1002
    last_bound_pos_ = pos;
}


void Assembler::bind(Label* L) {
1003
  DCHECK(!L->is_bound());  // label can only be bound once
1004 1005 1006 1007 1008
  bind_to(L, pc_offset());
}


void Assembler::next(Label* L) {
1009
  DCHECK(L->is_linked());
1010
  int link = target_at(L->pos());
1011
  if (link == L->pos()) {
1012
    // Branch target points to the same instruction. This is the end of the link
1013
    // chain.
1014
    L->Unuse();
1015
  } else {
1016
    DCHECK_GE(link, 0);
1017
    L->link_to(link);
1018 1019 1020
  }
}

1021
namespace {
1022

1023
// Low-level code emission routines depending on the addressing mode.
1024 1025 1026
// If this returns true then you have to use the rotate_imm and immed_8
// that it returns, because it may have already changed the instruction
// to match them!
1027 1028
bool FitsShifter(uint32_t imm32, uint32_t* rotate_imm, uint32_t* immed_8,
                 Instr* instr) {
1029
  // imm32 must be unsigned.
1030
  for (int rot = 0; rot < 16; rot++) {
1031
    uint32_t imm8 = base::bits::RotateLeft32(imm32, 2 * rot);
1032
    if ((imm8 <= 0xFF)) {
1033 1034
      *rotate_imm = rot;
      *immed_8 = imm8;
1035 1036 1037
      return true;
    }
  }
1038 1039
  // If the opcode is one with a complementary version and the complementary
  // immediate fits, change the opcode.
1040
  if (instr != nullptr) {
1041
    if ((*instr & kMovMvnMask) == kMovMvnPattern) {
1042
      if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
1043 1044
        *instr ^= kMovMvnFlip;
        return true;
1045
      } else if ((*instr & kMovLeaveCCMask) == kMovLeaveCCPattern) {
1046
        if (CpuFeatures::IsSupported(ARMv7)) {
1047 1048
          if (imm32 < 0x10000) {
            *instr ^= kMovwLeaveCCFlip;
1049
            *instr |= Assembler::EncodeMovwImmediate(imm32);
1050
            *rotate_imm = *immed_8 = 0;  // Not used for movw.
1051 1052 1053
            return true;
          }
        }
1054 1055
      }
    } else if ((*instr & kCmpCmnMask) == kCmpCmnPattern) {
1056
      if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8, nullptr)) {
1057 1058 1059 1060 1061
        *instr ^= kCmpCmnFlip;
        return true;
      }
    } else {
      Instr alu_insn = (*instr & kALUMask);
1062 1063
      if (alu_insn == ADD ||
          alu_insn == SUB) {
1064 1065
        if (FitsShifter(-static_cast<int>(imm32), rotate_imm, immed_8,
                        nullptr)) {
1066 1067 1068
          *instr ^= kAddSubFlip;
          return true;
        }
1069 1070
      } else if (alu_insn == AND ||
                 alu_insn == BIC) {
1071
        if (FitsShifter(~imm32, rotate_imm, immed_8, nullptr)) {
1072 1073 1074 1075
          *instr ^= kAndBicFlip;
          return true;
        }
      }
1076 1077 1078 1079 1080
    }
  }
  return false;
}

1081 1082 1083 1084
// We have to use the temporary register for things that can be relocated even
// if they can be encoded in the ARM's 12 bits of immediate-offset instruction
// space.  There is no guarantee that the relocated location can be similarly
// encoded.
1085
bool MustOutputRelocInfo(RelocInfo::Mode rmode, const Assembler* assembler) {
1086
  if (RelocInfo::IsOnlyForSerializer(rmode)) {
1087
    if (assembler->predictable_code_size()) return true;
1088
    return assembler->options().record_reloc_info_for_serialization;
1089
  } else if (RelocInfo::IsNone(rmode)) {
1090 1091 1092 1093 1094
    return false;
  }
  return true;
}

1095
bool UseMovImmediateLoad(const Operand& x, const Assembler* assembler) {
1096
  DCHECK_NOT_NULL(assembler);
1097
  if (x.MustOutputRelocInfo(assembler)) {
1098
    // Prefer constant pool if data is likely to be patched.
1099
    return false;
1100 1101 1102
  } else {
    // Otherwise, use immediate load if movw / movt is available.
    return CpuFeatures::IsSupported(ARMv7);
1103 1104 1105
  }
}

1106 1107
}  // namespace

1108 1109
bool Operand::MustOutputRelocInfo(const Assembler* assembler) const {
  return v8::internal::MustOutputRelocInfo(rmode_, assembler);
1110
}
1111

1112 1113
int Operand::InstructionsRequired(const Assembler* assembler,
                                  Instr instr) const {
1114
  DCHECK_NOT_NULL(assembler);
1115
  if (rm_.is_valid()) return 1;
1116
  uint32_t dummy1, dummy2;
1117 1118
  if (MustOutputRelocInfo(assembler) ||
      !FitsShifter(immediate(), &dummy1, &dummy2, &instr)) {
1119
    // The immediate operand cannot be encoded as a shifter operand, or use of
1120 1121 1122
    // constant pool is required.  First account for the instructions required
    // for the constant pool or immediate load
    int instructions;
1123
    if (UseMovImmediateLoad(*this, assembler)) {
1124 1125 1126
      DCHECK(CpuFeatures::IsSupported(ARMv7));
      // A movw / movt immediate load.
      instructions = 2;
1127
    } else {
1128 1129
      // A small constant pool load.
      instructions = 1;
1130
    }
1131 1132 1133 1134 1135 1136 1137
    if ((instr & ~kCondMask) != 13 * B21) {  // mov, S not set
      // For a mov or mvn instruction which doesn't set the condition
      // code, the constant pool or immediate load is enough, otherwise we need
      // to account for the actual instruction being requested.
      instructions += 1;
    }
    return instructions;
1138 1139 1140
  } else {
    // No use of constant pool and the immediate operand can be encoded as a
    // shifter operand.
1141
    return 1;
1142
  }
1143 1144
}

1145 1146 1147
void Assembler::Move32BitImmediate(Register rd, const Operand& x,
                                   Condition cond) {
  if (UseMovImmediateLoad(x, this)) {
1148
    CpuFeatureScope scope(this, ARMv7);
1149
    // UseMovImmediateLoad should return false when we need to output
1150 1151
    // relocation info, since we prefer the constant pool for values that
    // can be patched.
1152
    DCHECK(!x.MustOutputRelocInfo(this));
1153 1154
    UseScratchRegisterScope temps(this);
    // Re-use the destination register as a scratch if possible.
1155
    Register target = rd != pc ? rd : temps.Acquire();
1156 1157 1158
    uint32_t imm32 = static_cast<uint32_t>(x.immediate());
    movw(target, imm32 & 0xFFFF, cond);
    movt(target, imm32 >> 16, cond);
1159 1160 1161 1162
    if (target.code() != rd.code()) {
      mov(rd, target, LeaveCC, cond);
    }
  } else {
1163
    int32_t immediate;
1164 1165
    if (x.IsHeapObjectRequest()) {
      RequestHeapObject(x.heap_object_request());
1166 1167 1168 1169 1170
      immediate = 0;
    } else {
      immediate = x.immediate();
    }
    ConstantPoolAddEntry(pc_offset(), x.rmode_, immediate);
1171
    ldr_pcrel(rd, 0, cond);
1172 1173 1174
  }
}

1175 1176
void Assembler::AddrMode1(Instr instr, Register rd, Register rn,
                          const Operand& x) {
1177
  CheckBuffer();
1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
  uint32_t opcode = instr & kOpCodeMask;
  bool set_flags = (instr & S) != 0;
  DCHECK((opcode == ADC) || (opcode == ADD) || (opcode == AND) ||
         (opcode == BIC) || (opcode == EOR) || (opcode == ORR) ||
         (opcode == RSB) || (opcode == RSC) || (opcode == SBC) ||
         (opcode == SUB) || (opcode == CMN) || (opcode == CMP) ||
         (opcode == TEQ) || (opcode == TST) || (opcode == MOV) ||
         (opcode == MVN));
  // For comparison instructions, rd is not defined.
  DCHECK(rd.is_valid() || (opcode == CMN) || (opcode == CMP) ||
         (opcode == TEQ) || (opcode == TST));
  // For move instructions, rn is not defined.
  DCHECK(rn.is_valid() || (opcode == MOV) || (opcode == MVN));
  DCHECK(rd.is_valid() || rn.is_valid());
1192
  DCHECK_EQ(instr & ~(kCondMask | kOpCodeMask | S), 0);
1193 1194 1195 1196
  if (!AddrMode1TryEncodeOperand(&instr, x)) {
    DCHECK(x.IsImmediate());
    // Upon failure to encode, the opcode should not have changed.
    DCHECK(opcode == (instr & kOpCodeMask));
1197
    UseScratchRegisterScope temps(this);
1198 1199 1200 1201 1202 1203
    Condition cond = Instruction::ConditionField(instr);
    if ((opcode == MOV) && !set_flags) {
      // Generate a sequence of mov instructions or a load from the constant
      // pool only for a MOV instruction which does not set the flags.
      DCHECK(!rn.is_valid());
      Move32BitImmediate(rd, x, cond);
1204
    } else if ((opcode == ADD) && !set_flags && (rd == rn) &&
1205
               !temps.CanAcquire()) {
1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218
      // Split the operation into a sequence of additions if we cannot use a
      // scratch register. In this case, we cannot re-use rn and the assembler
      // does not have any scratch registers to spare.
      uint32_t imm = x.immediate();
      do {
        // The immediate encoding format is composed of 8 bits of data and 4
        // bits encoding a rotation. Each of the 16 possible rotations accounts
        // for a rotation by an even number.
        //   4 bits -> 16 rotations possible
        //          -> 16 rotations of 2 bits each fits in a 32-bit value.
        // This means that finding the even number of trailing zeroes of the
        // immediate allows us to more efficiently split it:
        int trailing_zeroes = base::bits::CountTrailingZeros(imm) & ~1u;
1219
        uint32_t mask = (0xFF << trailing_zeroes);
1220 1221 1222 1223
        add(rd, rd, Operand(imm & mask), LeaveCC, cond);
        imm = imm & ~mask;
      } while (!ImmediateFitsAddrMode1Instruction(imm));
      add(rd, rd, Operand(imm), LeaveCC, cond);
1224
    } else {
1225
      // The immediate operand cannot be encoded as a shifter operand, so load
1226 1227 1228 1229
      // it first to a scratch register and change the original instruction to
      // use it.
      // Re-use the destination register if possible.
      Register scratch =
1230
          (rd.is_valid() && rd != rn && rd != pc) ? rd : temps.Acquire();
1231 1232
      mov(scratch, x, LeaveCC, cond);
      AddrMode1(instr, rd, rn, Operand(scratch));
1233
    }
1234 1235 1236 1237 1238 1239 1240 1241
    return;
  }
  if (!rd.is_valid()) {
    // Emit a comparison instruction.
    emit(instr | rn.code() * B16);
  } else if (!rn.is_valid()) {
    // Emit a move instruction. If the operand is a register-shifted register,
    // then prevent the destination from being PC as this is unpredictable.
1242
    DCHECK(!x.IsRegisterShiftedRegister() || rd != pc);
1243
    emit(instr | rd.code() * B12);
1244
  } else {
1245
    emit(instr | rn.code() * B16 | rd.code() * B12);
1246
  }
1247
  if (rn == pc || x.rm_ == pc) {
1248
    // Block constant pool emission for one instruction after reading pc.
1249
    BlockConstPoolFor(1);
1250
  }
1251 1252
}

1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
bool Assembler::AddrMode1TryEncodeOperand(Instr* instr, const Operand& x) {
  if (x.IsImmediate()) {
    // Immediate.
    uint32_t rotate_imm;
    uint32_t immed_8;
    if (x.MustOutputRelocInfo(this) ||
        !FitsShifter(x.immediate(), &rotate_imm, &immed_8, instr)) {
      // Let the caller handle generating multiple instructions.
      return false;
    }
    *instr |= I | rotate_imm * B8 | immed_8;
  } else if (x.IsImmediateShiftedRegister()) {
    *instr |= x.shift_imm_ * B7 | x.shift_op_ | x.rm_.code();
  } else {
    DCHECK(x.IsRegisterShiftedRegister());
    // It is unpredictable to use the PC in this case.
1269
    DCHECK(x.rm_ != pc && x.rs_ != pc);
1270 1271 1272 1273 1274
    *instr |= x.rs_.code() * B8 | x.shift_op_ | B4 | x.rm_.code();
  }

  return true;
}
1275

1276
void Assembler::AddrMode2(Instr instr, Register rd, const MemOperand& x) {
1277
  DCHECK((instr & ~(kCondMask | B | L)) == B26);
1278 1279 1280
  // This method does not handle pc-relative addresses. ldr_pcrel() should be
  // used instead.
  DCHECK(x.rn_ != pc);
1281 1282
  int am = x.am_;
  if (!x.rm_.is_valid()) {
1283
    // Immediate offset.
1284 1285 1286 1287 1288 1289
    int offset_12 = x.offset_;
    if (offset_12 < 0) {
      offset_12 = -offset_12;
      am ^= U;
    }
    if (!is_uint12(offset_12)) {
1290 1291 1292 1293 1294 1295
      // Immediate offset cannot be encoded, load it first to a scratch
      // register.
      UseScratchRegisterScope temps(this);
      // Allow re-using rd for load instructions if possible.
      bool is_load = (instr & L) == L;
      Register scratch =
1296
          (is_load && rd != x.rn_ && rd != pc) ? rd : temps.Acquire();
1297 1298 1299
      mov(scratch, Operand(x.offset_), LeaveCC,
          Instruction::ConditionField(instr));
      AddrMode2(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1300 1301
      return;
    }
1302
    DCHECK_GE(offset_12, 0);  // no masking needed
1303 1304
    instr |= offset_12;
  } else {
1305
    // Register offset (shift_imm_ and shift_op_ are 0) or scaled
1306
    // register offset the constructors make sure than both shift_imm_
1307
    // and shift_op_ are initialized.
1308
    DCHECK(x.rm_ != pc);
1309 1310
    instr |= B25 | x.shift_imm_*B7 | x.shift_op_ | x.rm_.code();
  }
1311
  DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1312 1313 1314
  emit(instr | am | x.rn_.code()*B16 | rd.code()*B12);
}

1315
void Assembler::AddrMode3(Instr instr, Register rd, const MemOperand& x) {
1316 1317
  DCHECK((instr & ~(kCondMask | L | S6 | H)) == (B4 | B7));
  DCHECK(x.rn_.is_valid());
1318 1319 1320
  // This method does not handle pc-relative addresses. ldr_pcrel() should be
  // used instead.
  DCHECK(x.rn_ != pc);
1321
  int am = x.am_;
1322
  bool is_load = (instr & L) == L;
1323
  if (!x.rm_.is_valid()) {
1324
    // Immediate offset.
1325 1326 1327 1328 1329 1330
    int offset_8 = x.offset_;
    if (offset_8 < 0) {
      offset_8 = -offset_8;
      am ^= U;
    }
    if (!is_uint8(offset_8)) {
1331 1332 1333 1334 1335
      // Immediate offset cannot be encoded, load it first to a scratch
      // register.
      UseScratchRegisterScope temps(this);
      // Allow re-using rd for load instructions if possible.
      Register scratch =
1336
          (is_load && rd != x.rn_ && rd != pc) ? rd : temps.Acquire();
1337 1338 1339
      mov(scratch, Operand(x.offset_), LeaveCC,
          Instruction::ConditionField(instr));
      AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1340 1341
      return;
    }
1342
    DCHECK_GE(offset_8, 0);  // no masking needed
1343
    instr |= B | (offset_8 >> 4) * B8 | (offset_8 & 0xF);
1344
  } else if (x.shift_imm_ != 0) {
1345
    // Scaled register offsets are not supported, compute the offset separately
1346 1347 1348 1349
    // to a scratch register.
    UseScratchRegisterScope temps(this);
    // Allow re-using rd for load instructions if possible.
    Register scratch =
1350
        (is_load && rd != x.rn_ && rd != pc) ? rd : temps.Acquire();
1351
    mov(scratch, Operand(x.rm_, x.shift_op_, x.shift_imm_), LeaveCC,
1352
        Instruction::ConditionField(instr));
1353
    AddrMode3(instr, rd, MemOperand(x.rn_, scratch, x.am_));
1354 1355
    return;
  } else {
1356
    // Register offset.
1357
    DCHECK((am & (P | W)) == P || x.rm_ != pc);  // no pc index with writeback
1358 1359
    instr |= x.rm_.code();
  }
1360
  DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1361 1362 1363
  emit(instr | am | x.rn_.code()*B16 | rd.code()*B12);
}

1364
void Assembler::AddrMode4(Instr instr, Register rn, RegList rl) {
1365
  DCHECK((instr & ~(kCondMask | P | U | W | L)) == B27);
1366
  DCHECK_NE(rl, 0);
1367
  DCHECK(rn != pc);
1368 1369 1370
  emit(instr | rn.code()*B16 | rl);
}

1371
void Assembler::AddrMode5(Instr instr, CRegister crd, const MemOperand& x) {
1372
  // Unindexed addressing is not encoded by this function.
1373
  DCHECK_EQ((B27 | B26),
1374
            (instr & ~(kCondMask | kCoprocessorMask | P | U | N | W | L)));
1375
  DCHECK(x.rn_.is_valid() && !x.rm_.is_valid());
1376 1377
  int am = x.am_;
  int offset_8 = x.offset_;
1378
  DCHECK_EQ(offset_8 & 3, 0);  // offset must be an aligned word offset
1379 1380 1381 1382 1383
  offset_8 >>= 2;
  if (offset_8 < 0) {
    offset_8 = -offset_8;
    am ^= U;
  }
1384
  DCHECK(is_uint8(offset_8));  // unsigned word offset must fit in a byte
1385
  DCHECK((am & (P | W)) == P || x.rn_ != pc);  // no pc base with writeback
1386

1387
  // Post-indexed addressing requires W == 1; different than in AddrMode2/3.
1388 1389 1390
  if ((am & P) == 0)
    am |= W;

1391
  DCHECK_GE(offset_8, 0);  // no masking needed
1392 1393 1394 1395
  emit(instr | am | x.rn_.code()*B16 | crd.code()*B12 | offset_8);
}


1396
int Assembler::branch_offset(Label* L) {
1397 1398 1399 1400
  int target_pos;
  if (L->is_bound()) {
    target_pos = L->pos();
  } else {
1401
    if (L->is_linked()) {
1402 1403
      // Point to previous instruction that uses the link.
      target_pos = L->pos();
1404
    } else {
1405 1406
      // First entry of the link chain points to itself.
      target_pos = pc_offset();
1407
    }
1408 1409 1410 1411
    L->link_to(pc_offset());
  }

  // Block the emission of the constant pool, since the branch instruction must
1412
  // be emitted at the pc offset recorded by the label.
1413 1414
  if (!is_const_pool_blocked()) BlockConstPoolFor(1);

1415
  return target_pos - (pc_offset() + Instruction::kPcLoadDelta);
lrn@chromium.org's avatar
lrn@chromium.org committed
1416
}
1417

lrn@chromium.org's avatar
lrn@chromium.org committed
1418

1419
// Branch instructions.
1420 1421
void Assembler::b(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
  RecordRelocInfo(rmode);
1422
  DCHECK_EQ(branch_offset & 3, 0);
1423
  int imm24 = branch_offset >> 2;
1424 1425
  const bool b_imm_check = is_int24(imm24);
  CHECK(b_imm_check);
1426
  emit(cond | B27 | B25 | (imm24 & kImm24Mask));
1427

1428
  if (cond == al) {
1429
    // Dead code is a good location to emit the constant pool.
1430
    CheckConstPool(false, false);
1431
  }
1432 1433
}

1434 1435
void Assembler::bl(int branch_offset, Condition cond, RelocInfo::Mode rmode) {
  RecordRelocInfo(rmode);
1436
  DCHECK_EQ(branch_offset & 3, 0);
1437
  int imm24 = branch_offset >> 2;
1438 1439
  const bool bl_imm_check = is_int24(imm24);
  CHECK(bl_imm_check);
1440
  emit(cond | B27 | B25 | B24 | (imm24 & kImm24Mask));
1441 1442
}

1443
void Assembler::blx(int branch_offset) {
1444
  DCHECK_EQ(branch_offset & 1, 0);
1445 1446
  int h = ((branch_offset & 2) >> 1)*B24;
  int imm24 = branch_offset >> 2;
1447 1448
  const bool blx_imm_check = is_int24(imm24);
  CHECK(blx_imm_check);
1449
  emit(kSpecialCondition | B27 | B25 | h | (imm24 & kImm24Mask));
1450 1451
}

1452
void Assembler::blx(Register target, Condition cond) {
1453
  DCHECK(target != pc);
1454
  emit(cond | B24 | B21 | 15*B16 | 15*B12 | 15*B8 | BLX | target.code());
1455 1456
}

1457
void Assembler::bx(Register target, Condition cond) {
1458
  DCHECK(target != pc);  // use of pc is actually allowed, but discouraged
1459
  emit(cond | B24 | B21 | 15*B16 | 15*B12 | 15*B8 | BX | target.code());
1460 1461 1462
}


1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480
void Assembler::b(Label* L, Condition cond) {
  CheckBuffer();
  b(branch_offset(L), cond);
}


void Assembler::bl(Label* L, Condition cond) {
  CheckBuffer();
  bl(branch_offset(L), cond);
}


void Assembler::blx(Label* L) {
  CheckBuffer();
  blx(branch_offset(L));
}


1481
// Data-processing instructions.
1482

1483 1484
void Assembler::and_(Register dst, Register src1, const Operand& src2,
                     SBit s, Condition cond) {
1485
  AddrMode1(cond | AND | s, dst, src1, src2);
1486 1487
}

1488 1489 1490 1491
void Assembler::and_(Register dst, Register src1, Register src2, SBit s,
                     Condition cond) {
  and_(dst, src1, Operand(src2), s, cond);
}
1492 1493 1494

void Assembler::eor(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1495
  AddrMode1(cond | EOR | s, dst, src1, src2);
1496 1497 1498 1499 1500
}


void Assembler::sub(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1501
  AddrMode1(cond | SUB | s, dst, src1, src2);
1502 1503
}

1504 1505 1506 1507
void Assembler::sub(Register dst, Register src1, Register src2, SBit s,
                    Condition cond) {
  sub(dst, src1, Operand(src2), s, cond);
}
1508 1509 1510

void Assembler::rsb(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1511
  AddrMode1(cond | RSB | s, dst, src1, src2);
1512 1513 1514 1515 1516
}


void Assembler::add(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1517
  AddrMode1(cond | ADD | s, dst, src1, src2);
1518 1519
}

1520 1521 1522 1523
void Assembler::add(Register dst, Register src1, Register src2, SBit s,
                    Condition cond) {
  add(dst, src1, Operand(src2), s, cond);
}
1524 1525 1526

void Assembler::adc(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1527
  AddrMode1(cond | ADC | s, dst, src1, src2);
1528 1529 1530 1531 1532
}


void Assembler::sbc(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1533
  AddrMode1(cond | SBC | s, dst, src1, src2);
1534 1535 1536 1537 1538
}


void Assembler::rsc(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1539
  AddrMode1(cond | RSC | s, dst, src1, src2);
1540 1541 1542 1543
}


void Assembler::tst(Register src1, const Operand& src2, Condition cond) {
1544
  AddrMode1(cond | TST | S, no_reg, src1, src2);
1545 1546
}

1547 1548 1549
void Assembler::tst(Register src1, Register src2, Condition cond) {
  tst(src1, Operand(src2), cond);
}
1550 1551

void Assembler::teq(Register src1, const Operand& src2, Condition cond) {
1552
  AddrMode1(cond | TEQ | S, no_reg, src1, src2);
1553 1554 1555 1556
}


void Assembler::cmp(Register src1, const Operand& src2, Condition cond) {
1557
  AddrMode1(cond | CMP | S, no_reg, src1, src2);
1558 1559
}

1560 1561 1562
void Assembler::cmp(Register src1, Register src2, Condition cond) {
  cmp(src1, Operand(src2), cond);
}
1563

1564 1565
void Assembler::cmp_raw_immediate(
    Register src, int raw_immediate, Condition cond) {
1566
  DCHECK(is_uint12(raw_immediate));
1567 1568 1569 1570
  emit(cond | I | CMP | S | src.code() << 16 | raw_immediate);
}


1571
void Assembler::cmn(Register src1, const Operand& src2, Condition cond) {
1572
  AddrMode1(cond | CMN | S, no_reg, src1, src2);
1573 1574 1575 1576 1577
}


void Assembler::orr(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1578
  AddrMode1(cond | ORR | s, dst, src1, src2);
1579 1580
}

1581 1582 1583 1584
void Assembler::orr(Register dst, Register src1, Register src2, SBit s,
                    Condition cond) {
  orr(dst, src1, Operand(src2), s, cond);
}
1585 1586

void Assembler::mov(Register dst, const Operand& src, SBit s, Condition cond) {
1587
  // Don't allow nop instructions in the form mov rn, rn to be generated using
1588
  // the mov instruction. They must be generated using nop(int/NopMarkerTypes).
1589
  DCHECK(!(src.IsRegister() && src.rm() == dst && s == LeaveCC && cond == al));
1590
  AddrMode1(cond | MOV | s, dst, no_reg, src);
1591 1592
}

1593 1594 1595
void Assembler::mov(Register dst, Register src, SBit s, Condition cond) {
  mov(dst, Operand(src), s, cond);
}
1596

1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
void Assembler::mov_label_offset(Register dst, Label* label) {
  if (label->is_bound()) {
    mov(dst, Operand(label->pos() + (Code::kHeaderSize - kHeapObjectTag)));
  } else {
    // Emit the link to the label in the code stream followed by extra nop
    // instructions.
    // If the label is not linked, then start a new link chain by linking it to
    // itself, emitting pc_offset().
    int link = label->is_linked() ? label->pos() : pc_offset();
    label->link_to(pc_offset());

    // When the label is bound, these instructions will be patched with a
    // sequence of movw/movt or mov/orr/orr instructions. They will load the
    // destination register with the position of the label from the beginning
    // of the code.
    //
    // The link will be extracted from the first instruction and the destination
    // register from the second.
    //   For ARMv7:
    //      link
    //      mov dst, dst
    //   For ARMv6:
    //      link
    //      mov dst, dst
    //      mov dst, dst
    //
    // When the label gets bound: target_at extracts the link and target_at_put
    // patches the instructions.
1625
    CHECK(is_uint24(link));
1626 1627 1628 1629 1630 1631 1632 1633 1634 1635
    BlockConstPoolScope block_const_pool(this);
    emit(link);
    nop(dst.code());
    if (!CpuFeatures::IsSupported(ARMv7)) {
      nop(dst.code());
    }
  }
}


1636
void Assembler::movw(Register reg, uint32_t immediate, Condition cond) {
1637
  DCHECK(IsEnabled(ARMv7));
1638
  emit(cond | 0x30*B20 | reg.code()*B12 | EncodeMovwImmediate(immediate));
1639 1640 1641 1642
}


void Assembler::movt(Register reg, uint32_t immediate, Condition cond) {
1643
  DCHECK(IsEnabled(ARMv7));
1644 1645 1646 1647
  emit(cond | 0x34*B20 | reg.code()*B12 | EncodeMovwImmediate(immediate));
}


1648 1649
void Assembler::bic(Register dst, Register src1, const Operand& src2,
                    SBit s, Condition cond) {
1650
  AddrMode1(cond | BIC | s, dst, src1, src2);
1651 1652 1653 1654
}


void Assembler::mvn(Register dst, const Operand& src, SBit s, Condition cond) {
1655
  AddrMode1(cond | MVN | s, dst, no_reg, src);
1656 1657
}

1658 1659
void Assembler::asr(Register dst, Register src1, const Operand& src2, SBit s,
                    Condition cond) {
1660
  if (src2.IsRegister()) {
1661 1662 1663 1664 1665 1666 1667 1668
    mov(dst, Operand(src1, ASR, src2.rm()), s, cond);
  } else {
    mov(dst, Operand(src1, ASR, src2.immediate()), s, cond);
  }
}

void Assembler::lsl(Register dst, Register src1, const Operand& src2, SBit s,
                    Condition cond) {
1669
  if (src2.IsRegister()) {
1670 1671 1672 1673 1674 1675 1676 1677
    mov(dst, Operand(src1, LSL, src2.rm()), s, cond);
  } else {
    mov(dst, Operand(src1, LSL, src2.immediate()), s, cond);
  }
}

void Assembler::lsr(Register dst, Register src1, const Operand& src2, SBit s,
                    Condition cond) {
1678
  if (src2.IsRegister()) {
1679 1680 1681 1682 1683
    mov(dst, Operand(src1, LSR, src2.rm()), s, cond);
  } else {
    mov(dst, Operand(src1, LSR, src2.immediate()), s, cond);
  }
}
1684

1685
// Multiply instructions.
1686 1687
void Assembler::mla(Register dst, Register src1, Register src2, Register srcA,
                    SBit s, Condition cond) {
1688
  DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1689 1690 1691 1692 1693
  emit(cond | A | s | dst.code()*B16 | srcA.code()*B12 |
       src2.code()*B8 | B7 | B4 | src1.code());
}


1694 1695
void Assembler::mls(Register dst, Register src1, Register src2, Register srcA,
                    Condition cond) {
1696
  DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1697
  DCHECK(IsEnabled(ARMv7));
1698 1699 1700 1701 1702 1703 1704
  emit(cond | B22 | B21 | dst.code()*B16 | srcA.code()*B12 |
       src2.code()*B8 | B7 | B4 | src1.code());
}


void Assembler::sdiv(Register dst, Register src1, Register src2,
                     Condition cond) {
1705
  DCHECK(dst != pc && src1 != pc && src2 != pc);
1706
  DCHECK(IsEnabled(SUDIV));
1707 1708
  emit(cond | B26 | B25 | B24 | B20 | dst.code() * B16 | 0xF * B12 |
       src2.code() * B8 | B4 | src1.code());
1709 1710 1711
}


1712 1713
void Assembler::udiv(Register dst, Register src1, Register src2,
                     Condition cond) {
1714
  DCHECK(dst != pc && src1 != pc && src2 != pc);
1715
  DCHECK(IsEnabled(SUDIV));
1716
  emit(cond | B26 | B25 | B24 | B21 | B20 | dst.code() * B16 | 0xF * B12 |
1717 1718 1719 1720
       src2.code() * B8 | B4 | src1.code());
}


1721 1722
void Assembler::mul(Register dst, Register src1, Register src2, SBit s,
                    Condition cond) {
1723
  DCHECK(dst != pc && src1 != pc && src2 != pc);
1724
  // dst goes in bits 16-19 for this instruction!
1725 1726 1727 1728 1729 1730
  emit(cond | s | dst.code() * B16 | src2.code() * B8 | B7 | B4 | src1.code());
}


void Assembler::smmla(Register dst, Register src1, Register src2, Register srcA,
                      Condition cond) {
1731
  DCHECK(dst != pc && src1 != pc && src2 != pc && srcA != pc);
1732 1733 1734 1735 1736 1737 1738
  emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 |
       srcA.code() * B12 | src2.code() * B8 | B4 | src1.code());
}


void Assembler::smmul(Register dst, Register src1, Register src2,
                      Condition cond) {
1739
  DCHECK(dst != pc && src1 != pc && src2 != pc);
1740
  emit(cond | B26 | B25 | B24 | B22 | B20 | dst.code() * B16 | 0xF * B12 |
1741
       src2.code() * B8 | B4 | src1.code());
1742 1743 1744 1745 1746 1747 1748 1749 1750
}


void Assembler::smlal(Register dstL,
                      Register dstH,
                      Register src1,
                      Register src2,
                      SBit s,
                      Condition cond) {
1751 1752
  DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
  DCHECK(dstL != dstH);
1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763
  emit(cond | B23 | B22 | A | s | dstH.code()*B16 | dstL.code()*B12 |
       src2.code()*B8 | B7 | B4 | src1.code());
}


void Assembler::smull(Register dstL,
                      Register dstH,
                      Register src1,
                      Register src2,
                      SBit s,
                      Condition cond) {
1764 1765
  DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
  DCHECK(dstL != dstH);
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776
  emit(cond | B23 | B22 | s | dstH.code()*B16 | dstL.code()*B12 |
       src2.code()*B8 | B7 | B4 | src1.code());
}


void Assembler::umlal(Register dstL,
                      Register dstH,
                      Register src1,
                      Register src2,
                      SBit s,
                      Condition cond) {
1777 1778
  DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
  DCHECK(dstL != dstH);
1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789
  emit(cond | B23 | A | s | dstH.code()*B16 | dstL.code()*B12 |
       src2.code()*B8 | B7 | B4 | src1.code());
}


void Assembler::umull(Register dstL,
                      Register dstH,
                      Register src1,
                      Register src2,
                      SBit s,
                      Condition cond) {
1790 1791
  DCHECK(dstL != pc && dstH != pc && src1 != pc && src2 != pc);
  DCHECK(dstL != dstH);
1792
  emit(cond | B23 | s | dstH.code()*B16 | dstL.code()*B12 |
1793 1794 1795 1796
       src2.code()*B8 | B7 | B4 | src1.code());
}


1797
// Miscellaneous arithmetic instructions.
1798
void Assembler::clz(Register dst, Register src, Condition cond) {
1799
  DCHECK(dst != pc && src != pc);
1800
  emit(cond | B24 | B22 | B21 | 15*B16 | dst.code()*B12 |
1801
       15*B8 | CLZ | src.code());
1802 1803 1804
}


1805 1806 1807 1808 1809 1810 1811
// Saturating instructions.

// Unsigned saturate.
void Assembler::usat(Register dst,
                     int satpos,
                     const Operand& src,
                     Condition cond) {
1812
  DCHECK(dst != pc && src.rm_ != pc);
1813
  DCHECK((satpos >= 0) && (satpos <= 31));
1814
  DCHECK(src.IsImmediateShiftedRegister());
1815
  DCHECK((src.shift_op_ == ASR) || (src.shift_op_ == LSL));
1816 1817 1818 1819 1820 1821

  int sh = 0;
  if (src.shift_op_ == ASR) {
      sh = 1;
  }

1822 1823
  emit(cond | 0x6 * B24 | 0xE * B20 | satpos * B16 | dst.code() * B12 |
       src.shift_imm_ * B7 | sh * B6 | 0x1 * B4 | src.rm_.code());
1824 1825 1826
}


1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837
// Bitfield manipulation instructions.

// Unsigned bit field extract.
// Extracts #width adjacent bits from position #lsb in a register, and
// writes them to the low bits of a destination register.
//   ubfx dst, src, #lsb, #width
void Assembler::ubfx(Register dst,
                     Register src,
                     int lsb,
                     int width,
                     Condition cond) {
1838
  DCHECK(IsEnabled(ARMv7));
1839
  DCHECK(dst != pc && src != pc);
1840 1841
  DCHECK((lsb >= 0) && (lsb <= 31));
  DCHECK((width >= 1) && (width <= (32 - lsb)));
1842 1843
  emit(cond | 0xF * B23 | B22 | B21 | (width - 1) * B16 | dst.code() * B12 |
       lsb * B7 | B6 | B4 | src.code());
1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856
}


// Signed bit field extract.
// Extracts #width adjacent bits from position #lsb in a register, and
// writes them to the low bits of a destination register. The extracted
// value is sign extended to fill the destination register.
//   sbfx dst, src, #lsb, #width
void Assembler::sbfx(Register dst,
                     Register src,
                     int lsb,
                     int width,
                     Condition cond) {
1857
  DCHECK(IsEnabled(ARMv7));
1858
  DCHECK(dst != pc && src != pc);
1859 1860
  DCHECK((lsb >= 0) && (lsb <= 31));
  DCHECK((width >= 1) && (width <= (32 - lsb)));
1861 1862
  emit(cond | 0xF * B23 | B21 | (width - 1) * B16 | dst.code() * B12 |
       lsb * B7 | B6 | B4 | src.code());
1863 1864 1865 1866 1867 1868 1869 1870
}


// Bit field clear.
// Sets #width adjacent bits at position #lsb in the destination register
// to zero, preserving the value of the other bits.
//   bfc dst, #lsb, #width
void Assembler::bfc(Register dst, int lsb, int width, Condition cond) {
1871
  DCHECK(IsEnabled(ARMv7));
1872
  DCHECK(dst != pc);
1873 1874
  DCHECK((lsb >= 0) && (lsb <= 31));
  DCHECK((width >= 1) && (width <= (32 - lsb)));
1875
  int msb = lsb + width - 1;
1876
  emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 | 0xF);
1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888
}


// Bit field insert.
// Inserts #width adjacent bits from the low bits of the source register
// into position #lsb of the destination register.
//   bfi dst, src, #lsb, #width
void Assembler::bfi(Register dst,
                    Register src,
                    int lsb,
                    int width,
                    Condition cond) {
1889
  DCHECK(IsEnabled(ARMv7));
1890
  DCHECK(dst != pc && src != pc);
1891 1892
  DCHECK((lsb >= 0) && (lsb <= 31));
  DCHECK((width >= 1) && (width <= (32 - lsb)));
1893
  int msb = lsb + width - 1;
1894
  emit(cond | 0x1F * B22 | msb * B16 | dst.code() * B12 | lsb * B7 | B4 |
1895 1896 1897 1898
       src.code());
}


1899 1900 1901 1902 1903 1904 1905
void Assembler::pkhbt(Register dst,
                      Register src1,
                      const Operand& src2,
                      Condition cond ) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.125.
  // cond(31-28) | 01101000(27-20) | Rn(19-16) |
  // Rd(15-12) | imm5(11-7) | 0(6) | 01(5-4) | Rm(3-0)
1906 1907
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
1908
  DCHECK(src2.IsImmediateShiftedRegister());
1909
  DCHECK(src2.rm() != pc);
1910 1911
  DCHECK((src2.shift_imm_ >= 0) && (src2.shift_imm_ <= 31));
  DCHECK(src2.shift_op() == LSL);
1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923
  emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
       src2.shift_imm_*B7 | B4 | src2.rm().code());
}


void Assembler::pkhtb(Register dst,
                      Register src1,
                      const Operand& src2,
                      Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.125.
  // cond(31-28) | 01101000(27-20) | Rn(19-16) |
  // Rd(15-12) | imm5(11-7) | 1(6) | 01(5-4) | Rm(3-0)
1924 1925
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
1926
  DCHECK(src2.IsImmediateShiftedRegister());
1927
  DCHECK(src2.rm() != pc);
1928 1929
  DCHECK((src2.shift_imm_ >= 1) && (src2.shift_imm_ <= 32));
  DCHECK(src2.shift_op() == ASR);
1930 1931 1932 1933 1934 1935
  int asr = (src2.shift_imm_ == 32) ? 0 : src2.shift_imm_;
  emit(cond | 0x68*B20 | src1.code()*B16 | dst.code()*B12 |
       asr*B7 | B6 | B4 | src2.rm().code());
}


1936 1937 1938 1939
void Assembler::sxtb(Register dst, Register src, int rotate, Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.233.
  // cond(31-28) | 01101010(27-20) | 1111(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1940 1941
  DCHECK(dst != pc);
  DCHECK(src != pc);
1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6A * B20 | 0xF * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
}


void Assembler::sxtab(Register dst, Register src1, Register src2, int rotate,
                      Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.233.
  // cond(31-28) | 01101010(27-20) | Rn(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1953 1954 1955
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
  DCHECK(src2 != pc);
1956 1957 1958 1959 1960 1961 1962 1963 1964 1965
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6A * B20 | src1.code() * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
}


void Assembler::sxth(Register dst, Register src, int rotate, Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.235.
  // cond(31-28) | 01101011(27-20) | 1111(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1966 1967
  DCHECK(dst != pc);
  DCHECK(src != pc);
1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6B * B20 | 0xF * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
}


void Assembler::sxtah(Register dst, Register src1, Register src2, int rotate,
                      Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.235.
  // cond(31-28) | 01101011(27-20) | Rn(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1979 1980 1981
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
  DCHECK(src2 != pc);
1982 1983 1984 1985 1986 1987 1988
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6B * B20 | src1.code() * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
}


void Assembler::uxtb(Register dst, Register src, int rotate, Condition cond) {
1989 1990 1991
  // Instruction details available in ARM DDI 0406C.b, A8.8.274.
  // cond(31-28) | 01101110(27-20) | 1111(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
1992 1993
  DCHECK(dst != pc);
  DCHECK(src != pc);
1994 1995 1996 1997 1998 1999 2000
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6E * B20 | 0xF * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
}


void Assembler::uxtab(Register dst, Register src1, Register src2, int rotate,
2001 2002 2003 2004
                      Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.271.
  // cond(31-28) | 01101110(27-20) | Rn(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2005 2006 2007
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
  DCHECK(src2 != pc);
2008 2009 2010
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6E * B20 | src1.code() * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
2011 2012 2013
}


2014
void Assembler::uxtb16(Register dst, Register src, int rotate, Condition cond) {
2015 2016 2017
  // Instruction details available in ARM DDI 0406C.b, A8.8.275.
  // cond(31-28) | 01101100(27-20) | 1111(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2018 2019
  DCHECK(dst != pc);
  DCHECK(src != pc);
2020 2021 2022 2023 2024 2025 2026 2027 2028 2029
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6C * B20 | 0xF * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
}


void Assembler::uxth(Register dst, Register src, int rotate, Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.276.
  // cond(31-28) | 01101111(27-20) | 1111(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2030 2031
  DCHECK(dst != pc);
  DCHECK(src != pc);
2032 2033 2034 2035 2036 2037 2038 2039 2040 2041 2042
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6F * B20 | 0xF * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src.code());
}


void Assembler::uxtah(Register dst, Register src1, Register src2, int rotate,
                      Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.273.
  // cond(31-28) | 01101111(27-20) | Rn(19-16) |
  // Rd(15-12) | rotate(11-10) | 00(9-8)| 0111(7-4) | Rm(3-0)
2043 2044 2045
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
  DCHECK(src2 != pc);
2046 2047 2048
  DCHECK(rotate == 0 || rotate == 8 || rotate == 16 || rotate == 24);
  emit(cond | 0x6F * B20 | src1.code() * B16 | dst.code() * B12 |
       ((rotate >> 1) & 0xC) * B8 | 7 * B4 | src2.code());
2049 2050 2051
}


2052 2053 2054 2055
void Assembler::rbit(Register dst, Register src, Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.144.
  // cond(31-28) | 011011111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
  DCHECK(IsEnabled(ARMv7));
2056 2057
  DCHECK(dst != pc);
  DCHECK(src != pc);
2058 2059 2060
  emit(cond | 0x6FF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
}

2061 2062 2063 2064 2065 2066 2067
void Assembler::rev(Register dst, Register src, Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.144.
  // cond(31-28) | 011010111111(27-16) | Rd(15-12) | 11110011(11-4) | Rm(3-0)
  DCHECK(dst != pc);
  DCHECK(src != pc);
  emit(cond | 0x6BF * B16 | dst.code() * B12 | 0xF3 * B4 | src.code());
}
2068

2069
// Status register access instructions.
2070
void Assembler::mrs(Register dst, SRegister s, Condition cond) {
2071
  DCHECK(dst != pc);
2072 2073 2074 2075 2076 2077
  emit(cond | B24 | s | 15*B16 | dst.code()*B12);
}


void Assembler::msr(SRegisterFieldMask fields, const Operand& src,
                    Condition cond) {
2078 2079
  DCHECK_NE(fields & 0x000F0000, 0);  // At least one field must be set.
  DCHECK(((fields & 0xFFF0FFFF) == CPSR) || ((fields & 0xFFF0FFFF) == SPSR));
2080
  Instr instr;
2081
  if (src.IsImmediate()) {
2082
    // Immediate.
2083 2084
    uint32_t rotate_imm;
    uint32_t immed_8;
2085
    if (src.MustOutputRelocInfo(this) ||
2086
        !FitsShifter(src.immediate(), &rotate_imm, &immed_8, nullptr)) {
2087 2088 2089 2090 2091 2092
      UseScratchRegisterScope temps(this);
      Register scratch = temps.Acquire();
      // Immediate operand cannot be encoded, load it first to a scratch
      // register.
      Move32BitImmediate(scratch, src);
      msr(fields, Operand(scratch), cond);
2093 2094 2095 2096
      return;
    }
    instr = I | rotate_imm*B8 | immed_8;
  } else {
2097
    DCHECK(src.IsRegister());  // Only rm is allowed.
2098 2099 2100 2101 2102 2103
    instr = src.rm_.code();
  }
  emit(cond | instr | B24 | B21 | fields | 15*B12);
}


2104
// Load/Store instructions.
2105
void Assembler::ldr(Register dst, const MemOperand& src, Condition cond) {
2106
  AddrMode2(cond | B26 | L, dst, src);
2107 2108 2109 2110
}


void Assembler::str(Register src, const MemOperand& dst, Condition cond) {
2111
  AddrMode2(cond | B26, src, dst);
2112 2113 2114 2115
}


void Assembler::ldrb(Register dst, const MemOperand& src, Condition cond) {
2116
  AddrMode2(cond | B26 | B | L, dst, src);
2117 2118 2119 2120
}


void Assembler::strb(Register src, const MemOperand& dst, Condition cond) {
2121
  AddrMode2(cond | B26 | B, src, dst);
2122 2123 2124 2125
}


void Assembler::ldrh(Register dst, const MemOperand& src, Condition cond) {
2126
  AddrMode3(cond | L | B7 | H | B4, dst, src);
2127 2128 2129 2130
}


void Assembler::strh(Register src, const MemOperand& dst, Condition cond) {
2131
  AddrMode3(cond | B7 | H | B4, src, dst);
2132 2133 2134 2135
}


void Assembler::ldrsb(Register dst, const MemOperand& src, Condition cond) {
2136
  AddrMode3(cond | L | B7 | S6 | B4, dst, src);
2137 2138 2139 2140
}


void Assembler::ldrsh(Register dst, const MemOperand& src, Condition cond) {
2141
  AddrMode3(cond | L | B7 | S6 | H | B4, dst, src);
2142 2143 2144
}


2145
void Assembler::ldrd(Register dst1, Register dst2,
2146
                     const MemOperand& src, Condition cond) {
2147 2148
  DCHECK(src.rm() == no_reg);
  DCHECK(dst1 != lr);  // r14.
2149 2150
  DCHECK_EQ(0, dst1.code() % 2);
  DCHECK_EQ(dst1.code() + 1, dst2.code());
2151
  AddrMode3(cond | B7 | B6 | B4, dst1, src);
2152 2153 2154
}


2155
void Assembler::strd(Register src1, Register src2,
2156
                     const MemOperand& dst, Condition cond) {
2157 2158
  DCHECK(dst.rm() == no_reg);
  DCHECK(src1 != lr);  // r14.
2159 2160
  DCHECK_EQ(0, src1.code() % 2);
  DCHECK_EQ(src1.code() + 1, src2.code());
2161
  AddrMode3(cond | B7 | B6 | B5 | B4, src1, dst);
2162 2163
}

2164 2165 2166 2167 2168 2169 2170 2171 2172 2173
void Assembler::ldr_pcrel(Register dst, int imm12, Condition cond) {
  AddrMode am = Offset;
  if (imm12 < 0) {
    imm12 = -imm12;
    am = NegOffset;
  }
  DCHECK(is_uint12(imm12));
  emit(cond | B26 | am | L | pc.code() * B16 | dst.code() * B12 | imm12);
}

2174 2175 2176 2177
// Load/Store exclusive instructions.
void Assembler::ldrex(Register dst, Register src, Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.75.
  // cond(31-28) | 00011001(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2178 2179
  DCHECK(dst != pc);
  DCHECK(src != pc);
2180
  emit(cond | B24 | B23 | B20 | src.code() * B16 | dst.code() * B12 | 0xF9F);
2181 2182 2183 2184 2185 2186 2187
}

void Assembler::strex(Register src1, Register src2, Register dst,
                      Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.212.
  // cond(31-28) | 00011000(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
  // Rt(3-0)
2188 2189 2190 2191 2192
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
  DCHECK(src2 != pc);
  DCHECK(src1 != dst);
  DCHECK(src1 != src2);
2193
  emit(cond | B24 | B23 | dst.code() * B16 | src1.code() * B12 | 0xF9 * B4 |
2194 2195 2196 2197 2198 2199
       src2.code());
}

void Assembler::ldrexb(Register dst, Register src, Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.76.
  // cond(31-28) | 00011101(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2200 2201
  DCHECK(dst != pc);
  DCHECK(src != pc);
2202
  emit(cond | B24 | B23 | B22 | B20 | src.code() * B16 | dst.code() * B12 |
2203
       0xF9F);
2204 2205 2206 2207 2208 2209 2210
}

void Assembler::strexb(Register src1, Register src2, Register dst,
                       Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.213.
  // cond(31-28) | 00011100(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
  // Rt(3-0)
2211 2212 2213 2214 2215
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
  DCHECK(src2 != pc);
  DCHECK(src1 != dst);
  DCHECK(src1 != src2);
2216
  emit(cond | B24 | B23 | B22 | dst.code() * B16 | src1.code() * B12 |
2217
       0xF9 * B4 | src2.code());
2218 2219 2220 2221 2222
}

void Assembler::ldrexh(Register dst, Register src, Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.78.
  // cond(31-28) | 00011111(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
2223 2224
  DCHECK(dst != pc);
  DCHECK(src != pc);
2225
  emit(cond | B24 | B23 | B22 | B21 | B20 | src.code() * B16 |
2226
       dst.code() * B12 | 0xF9F);
2227 2228 2229 2230 2231 2232 2233
}

void Assembler::strexh(Register src1, Register src2, Register dst,
                       Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.215.
  // cond(31-28) | 00011110(27-20) | Rn(19-16) | Rd(15-12) | 11111001(11-4) |
  // Rt(3-0)
2234 2235 2236 2237 2238
  DCHECK(dst != pc);
  DCHECK(src1 != pc);
  DCHECK(src2 != pc);
  DCHECK(src1 != dst);
  DCHECK(src1 != src2);
2239
  emit(cond | B24 | B23 | B22 | B21 | dst.code() * B16 | src1.code() * B12 |
2240
       0xF9 * B4 | src2.code());
2241
}
2242

2243
void Assembler::ldrexd(Register dst1, Register dst2, Register src,
2244 2245 2246 2247 2248 2249 2250
                       Condition cond) {
  // cond(31-28) | 00011011(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
  DCHECK(dst1 != lr);  // r14.
  // The pair of destination registers is restricted to being an even-numbered
  // register and the odd-numbered register that immediately follows it.
  DCHECK_EQ(0, dst1.code() % 2);
  DCHECK_EQ(dst1.code() + 1, dst2.code());
2251
  emit(cond | B24 | B23 | B21 | B20 | src.code() * B16 | dst1.code() * B12 |
2252 2253 2254
       0xF9F);
}

2255 2256
void Assembler::strexd(Register res, Register src1, Register src2, Register dst,
                       Condition cond) {
2257 2258 2259 2260 2261 2262
  // cond(31-28) | 00011010(27-20) | Rn(19-16) | Rt(15-12) | 111110011111(11-0)
  DCHECK(src1 != lr);  // r14.
  // The pair of source registers is restricted to being an even-numbered
  // register and the odd-numbered register that immediately follows it.
  DCHECK_EQ(0, src1.code() % 2);
  DCHECK_EQ(src1.code() + 1, src2.code());
2263
  emit(cond | B24 | B23 | B21 | dst.code() * B16 | res.code() * B12 |
2264 2265 2266
       0xF9 * B4 | src1.code());
}

2267 2268 2269 2270 2271
// Preload instructions.
void Assembler::pld(const MemOperand& address) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.128.
  // 1111(31-28) | 0111(27-24) | U(23) | R(22) | 01(21-20) | Rn(19-16) |
  // 1111(15-12) | imm5(11-07) | type(6-5) | 0(4)| Rm(3-0) |
2272
  DCHECK(address.rm() == no_reg);
2273
  DCHECK(address.am() == Offset);
2274 2275 2276 2277 2278 2279
  int U = B23;
  int offset = address.offset();
  if (offset < 0) {
    offset = -offset;
    U = 0;
  }
2280
  DCHECK_LT(offset, 4096);
2281 2282
  emit(kSpecialCondition | B26 | B24 | U | B22 | B20 |
       address.rn().code() * B16 | 0xF * B12 | offset);
2283 2284 2285
}


2286
// Load/Store multiple instructions.
2287 2288 2289 2290
void Assembler::ldm(BlockAddrMode am,
                    Register base,
                    RegList dst,
                    Condition cond) {
2291
  // ABI stack constraint: ldmxx base, {..sp..}  base != sp  is not restartable.
2292
  DCHECK(base == sp || (dst & sp.bit()) == 0);
2293

2294
  AddrMode4(cond | B27 | am | L, base, dst);
2295

2296
  // Emit the constant pool after a function return implemented by ldm ..{..pc}.
2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2311
  if (cond == al && (dst & pc.bit()) != 0) {
    // There is a slight chance that the ldm instruction was actually a call,
    // in which case it would be wrong to return into the constant pool; we
    // recognize this case by checking if the emission of the pool was blocked
    // at the pc of the ldm instruction by a mov lr, pc instruction; if this is
    // the case, we emit a jump over the pool.
    CheckConstPool(true, no_const_pool_before_ == pc_offset() - kInstrSize);
  }
}


void Assembler::stm(BlockAddrMode am,
                    Register base,
                    RegList src,
                    Condition cond) {
2312
  AddrMode4(cond | B27 | am, base, src);
2313 2314 2315
}


2316
// Exception-generating instructions and debugging support.
2317 2318 2319
// Stops with a non-negative code less than kNumOfWatchedStops support
// enabling/disabling and a counter feature. See simulator-arm.h .
void Assembler::stop(const char* msg, Condition cond, int32_t code) {
2320
#ifndef __arm__
2321
  DCHECK_GE(code, kDefaultStopCode);
2322 2323 2324 2325 2326 2327 2328
  {
    BlockConstPoolScope block_const_pool(this);
    if (code >= 0) {
      svc(kStopCode + code, cond);
    } else {
      svc(kStopCode + kMaxStopCode, cond);
    }
2329
  }
2330
#else  // def __arm__
2331 2332 2333 2334 2335 2336 2337 2338
  if (cond != al) {
    Label skip;
    b(&skip, NegateCondition(cond));
    bkpt(0);
    bind(&skip);
  } else {
    bkpt(0);
  }
2339
#endif  // def __arm__
2340 2341
}

2342
void Assembler::bkpt(uint32_t imm16) {
2343
  DCHECK(is_uint16(imm16));
2344
  emit(al | B24 | B21 | (imm16 >> 4) * B8 | BKPT | (imm16 & 0xF));
2345 2346 2347
}


2348
void Assembler::svc(uint32_t imm24, Condition cond) {
2349
  DCHECK(is_uint24(imm24));
2350 2351 2352 2353
  emit(cond | 15*B24 | imm24);
}


2354
void Assembler::dmb(BarrierOption option) {
2355 2356
  if (CpuFeatures::IsSupported(ARMv7)) {
    // Details available in ARM DDI 0406C.b, A8-378.
2357
    emit(kSpecialCondition | 0x57FF * B12 | 5 * B4 | option);
2358 2359 2360 2361 2362
  } else {
    // Details available in ARM DDI 0406C.b, B3-1750.
    // CP15DMB: CRn=c7, opc1=0, CRm=c10, opc2=5, Rt is ignored.
    mcr(p15, 0, r0, cr7, cr10, 5);
  }
2363 2364 2365 2366
}


void Assembler::dsb(BarrierOption option) {
2367 2368
  if (CpuFeatures::IsSupported(ARMv7)) {
    // Details available in ARM DDI 0406C.b, A8-380.
2369
    emit(kSpecialCondition | 0x57FF * B12 | 4 * B4 | option);
2370 2371 2372 2373 2374
  } else {
    // Details available in ARM DDI 0406C.b, B3-1750.
    // CP15DSB: CRn=c7, opc1=0, CRm=c10, opc2=4, Rt is ignored.
    mcr(p15, 0, r0, cr7, cr10, 4);
  }
2375 2376 2377 2378
}


void Assembler::isb(BarrierOption option) {
2379 2380
  if (CpuFeatures::IsSupported(ARMv7)) {
    // Details available in ARM DDI 0406C.b, A8-389.
2381
    emit(kSpecialCondition | 0x57FF * B12 | 6 * B4 | option);
2382 2383 2384 2385 2386
  } else {
    // Details available in ARM DDI 0406C.b, B3-1750.
    // CP15ISB: CRn=c7, opc1=0, CRm=c5, opc2=4, Rt is ignored.
    mcr(p15, 0, r0, cr7, cr5, 4);
  }
2387 2388
}

2389 2390 2391 2392 2393
void Assembler::csdb() {
  // Details available in Arm Cache Speculation Side-channels white paper,
  // version 1.1, page 4.
  emit(0xE320F014);
}
2394

2395
// Coprocessor instructions.
2396 2397 2398 2399 2400 2401 2402
void Assembler::cdp(Coprocessor coproc,
                    int opcode_1,
                    CRegister crd,
                    CRegister crn,
                    CRegister crm,
                    int opcode_2,
                    Condition cond) {
2403
  DCHECK(is_uint4(opcode_1) && is_uint3(opcode_2));
2404 2405 2406 2407
  emit(cond | B27 | B26 | B25 | (opcode_1 & 15)*B20 | crn.code()*B16 |
       crd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | crm.code());
}

2408 2409
void Assembler::cdp2(Coprocessor coproc, int opcode_1, CRegister crd,
                     CRegister crn, CRegister crm, int opcode_2) {
2410
  cdp(coproc, opcode_1, crd, crn, crm, opcode_2, kSpecialCondition);
2411 2412 2413 2414 2415 2416 2417 2418 2419 2420
}


void Assembler::mcr(Coprocessor coproc,
                    int opcode_1,
                    Register rd,
                    CRegister crn,
                    CRegister crm,
                    int opcode_2,
                    Condition cond) {
2421
  DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
2422 2423 2424 2425
  emit(cond | B27 | B26 | B25 | (opcode_1 & 7)*B21 | crn.code()*B16 |
       rd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | B4 | crm.code());
}

2426 2427
void Assembler::mcr2(Coprocessor coproc, int opcode_1, Register rd,
                     CRegister crn, CRegister crm, int opcode_2) {
2428
  mcr(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
2429 2430 2431 2432 2433 2434 2435 2436 2437 2438
}


void Assembler::mrc(Coprocessor coproc,
                    int opcode_1,
                    Register rd,
                    CRegister crn,
                    CRegister crm,
                    int opcode_2,
                    Condition cond) {
2439
  DCHECK(is_uint3(opcode_1) && is_uint3(opcode_2));
2440 2441 2442 2443
  emit(cond | B27 | B26 | B25 | (opcode_1 & 7)*B21 | L | crn.code()*B16 |
       rd.code()*B12 | coproc*B8 | (opcode_2 & 7)*B5 | B4 | crm.code());
}

2444 2445
void Assembler::mrc2(Coprocessor coproc, int opcode_1, Register rd,
                     CRegister crn, CRegister crm, int opcode_2) {
2446
  mrc(coproc, opcode_1, rd, crn, crm, opcode_2, kSpecialCondition);
2447 2448 2449 2450 2451 2452 2453 2454
}


void Assembler::ldc(Coprocessor coproc,
                    CRegister crd,
                    const MemOperand& src,
                    LFlag l,
                    Condition cond) {
2455
  AddrMode5(cond | B27 | B26 | l | L | coproc * B8, crd, src);
2456 2457 2458 2459 2460 2461 2462 2463 2464
}


void Assembler::ldc(Coprocessor coproc,
                    CRegister crd,
                    Register rn,
                    int option,
                    LFlag l,
                    Condition cond) {
2465
  // Unindexed addressing.
2466
  DCHECK(is_uint8(option));
2467 2468 2469 2470
  emit(cond | B27 | B26 | U | l | L | rn.code()*B16 | crd.code()*B12 |
       coproc*B8 | (option & 255));
}

2471 2472
void Assembler::ldc2(Coprocessor coproc, CRegister crd, const MemOperand& src,
                     LFlag l) {
2473
  ldc(coproc, crd, src, l, kSpecialCondition);
2474 2475
}

2476 2477
void Assembler::ldc2(Coprocessor coproc, CRegister crd, Register rn, int option,
                     LFlag l) {
2478
  ldc(coproc, crd, rn, option, l, kSpecialCondition);
2479 2480 2481
}


2482
// Support for VFP.
2483

2484 2485 2486 2487 2488
void Assembler::vldr(const DwVfpRegister dst,
                     const Register base,
                     int offset,
                     const Condition cond) {
  // Ddst = MEM(Rbase + offset).
2489 2490 2491
  // Instruction details available in ARM DDI 0406C.b, A8-924.
  // cond(31-28) | 1101(27-24)| U(23) | D(22) | 01(21-20) | Rbase(19-16) |
  // Vd(15-12) | 1011(11-8) | offset
2492
  DCHECK(VfpRegisterIsAvailable(dst));
2493 2494
  int u = 1;
  if (offset < 0) {
2495
    CHECK_NE(offset, kMinInt);
2496 2497 2498
    offset = -offset;
    u = 0;
  }
2499 2500
  int vd, d;
  dst.split_code(&vd, &d);
2501

2502
  DCHECK_GE(offset, 0);
2503
  if ((offset % 4) == 0 && (offset / 4) < 256) {
2504
    emit(cond | 0xD*B24 | u*B23 | d*B22 | B20 | base.code()*B16 | vd*B12 |
2505 2506
         0xB*B8 | ((offset / 4) & 255));
  } else {
2507 2508 2509 2510
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
    // Larger offsets must be handled by computing the correct address in a
    // scratch register.
2511
    DCHECK(base != scratch);
2512
    if (u == 1) {
2513
      add(scratch, base, Operand(offset));
2514
    } else {
2515
      sub(scratch, base, Operand(offset));
2516
    }
2517 2518
    emit(cond | 0xD * B24 | d * B22 | B20 | scratch.code() * B16 | vd * B12 |
         0xB * B8);
2519 2520 2521 2522 2523 2524 2525
  }
}


void Assembler::vldr(const DwVfpRegister dst,
                     const MemOperand& operand,
                     const Condition cond) {
2526
  DCHECK(VfpRegisterIsAvailable(dst));
2527
  DCHECK(operand.am_ == Offset);
2528
  if (operand.rm().is_valid()) {
2529 2530 2531
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
    add(scratch, operand.rn(),
2532
        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2533
    vldr(dst, scratch, 0, cond);
2534 2535 2536
  } else {
    vldr(dst, operand.rn(), operand.offset(), cond);
  }
2537 2538 2539
}


2540 2541 2542 2543 2544 2545
void Assembler::vldr(const SwVfpRegister dst,
                     const Register base,
                     int offset,
                     const Condition cond) {
  // Sdst = MEM(Rbase + offset).
  // Instruction details available in ARM DDI 0406A, A8-628.
2546
  // cond(31-28) | 1101(27-24)| U001(23-20) | Rbase(19-16) |
2547
  // Vdst(15-12) | 1010(11-8) | offset
2548 2549 2550 2551 2552
  int u = 1;
  if (offset < 0) {
    offset = -offset;
    u = 0;
  }
2553 2554
  int sd, d;
  dst.split_code(&sd, &d);
2555
  DCHECK_GE(offset, 0);
2556 2557

  if ((offset % 4) == 0 && (offset / 4) < 256) {
2558
  emit(cond | u*B23 | d*B22 | 0xD1*B20 | base.code()*B16 | sd*B12 |
2559
       0xA*B8 | ((offset / 4) & 255));
2560
  } else {
2561 2562 2563 2564
    // Larger offsets must be handled by computing the correct address in a
    // scratch register.
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
2565
    DCHECK(base != scratch);
2566
    if (u == 1) {
2567
      add(scratch, base, Operand(offset));
2568
    } else {
2569
      sub(scratch, base, Operand(offset));
2570
    }
2571 2572
    emit(cond | d * B22 | 0xD1 * B20 | scratch.code() * B16 | sd * B12 |
         0xA * B8);
2573 2574 2575 2576 2577 2578 2579
  }
}


void Assembler::vldr(const SwVfpRegister dst,
                     const MemOperand& operand,
                     const Condition cond) {
2580
  DCHECK(operand.am_ == Offset);
2581
  if (operand.rm().is_valid()) {
2582 2583 2584
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
    add(scratch, operand.rn(),
2585
        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2586
    vldr(dst, scratch, 0, cond);
2587 2588 2589
  } else {
    vldr(dst, operand.rn(), operand.offset(), cond);
  }
2590 2591 2592
}


2593 2594 2595 2596 2597
void Assembler::vstr(const DwVfpRegister src,
                     const Register base,
                     int offset,
                     const Condition cond) {
  // MEM(Rbase + offset) = Dsrc.
2598 2599 2600
  // Instruction details available in ARM DDI 0406C.b, A8-1082.
  // cond(31-28) | 1101(27-24)| U(23) | D(22) | 00(21-20) | Rbase(19-16) |
  // Vd(15-12) | 1011(11-8) | (offset/4)
2601
  DCHECK(VfpRegisterIsAvailable(src));
2602 2603
  int u = 1;
  if (offset < 0) {
2604
    CHECK_NE(offset, kMinInt);
2605 2606 2607
    offset = -offset;
    u = 0;
  }
2608
  DCHECK_GE(offset, 0);
2609 2610 2611
  int vd, d;
  src.split_code(&vd, &d);

2612
  if ((offset % 4) == 0 && (offset / 4) < 256) {
2613 2614
    emit(cond | 0xD*B24 | u*B23 | d*B22 | base.code()*B16 | vd*B12 | 0xB*B8 |
         ((offset / 4) & 255));
2615
  } else {
2616 2617 2618 2619
    // Larger offsets must be handled by computing the correct address in the a
    // scratch register.
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
2620
    DCHECK(base != scratch);
2621
    if (u == 1) {
2622
      add(scratch, base, Operand(offset));
2623
    } else {
2624
      sub(scratch, base, Operand(offset));
2625
    }
2626 2627
    emit(cond | 0xD * B24 | d * B22 | scratch.code() * B16 | vd * B12 |
         0xB * B8);
2628 2629 2630 2631 2632 2633 2634
  }
}


void Assembler::vstr(const DwVfpRegister src,
                     const MemOperand& operand,
                     const Condition cond) {
2635
  DCHECK(VfpRegisterIsAvailable(src));
2636
  DCHECK(operand.am_ == Offset);
2637
  if (operand.rm().is_valid()) {
2638 2639 2640
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
    add(scratch, operand.rn(),
2641
        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2642
    vstr(src, scratch, 0, cond);
2643 2644 2645
  } else {
    vstr(src, operand.rn(), operand.offset(), cond);
  }
2646 2647 2648
}


2649 2650 2651 2652 2653 2654
void Assembler::vstr(const SwVfpRegister src,
                     const Register base,
                     int offset,
                     const Condition cond) {
  // MEM(Rbase + offset) = SSrc.
  // Instruction details available in ARM DDI 0406A, A8-786.
2655
  // cond(31-28) | 1101(27-24)| U000(23-20) | Rbase(19-16) |
2656
  // Vdst(15-12) | 1010(11-8) | (offset/4)
2657 2658
  int u = 1;
  if (offset < 0) {
2659
    CHECK_NE(offset, kMinInt);
2660 2661 2662
    offset = -offset;
    u = 0;
  }
2663 2664
  int sd, d;
  src.split_code(&sd, &d);
2665
  DCHECK_GE(offset, 0);
2666 2667 2668 2669
  if ((offset % 4) == 0 && (offset / 4) < 256) {
    emit(cond | u*B23 | d*B22 | 0xD0*B20 | base.code()*B16 | sd*B12 |
         0xA*B8 | ((offset / 4) & 255));
  } else {
2670 2671 2672 2673
    // Larger offsets must be handled by computing the correct address in a
    // scratch register.
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
2674
    DCHECK(base != scratch);
2675
    if (u == 1) {
2676
      add(scratch, base, Operand(offset));
2677
    } else {
2678
      sub(scratch, base, Operand(offset));
2679
    }
2680 2681
    emit(cond | d * B22 | 0xD0 * B20 | scratch.code() * B16 | sd * B12 |
         0xA * B8);
2682 2683 2684 2685 2686 2687 2688
  }
}


void Assembler::vstr(const SwVfpRegister src,
                     const MemOperand& operand,
                     const Condition cond) {
2689
  DCHECK(operand.am_ == Offset);
2690
  if (operand.rm().is_valid()) {
2691 2692 2693
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
    add(scratch, operand.rn(),
2694
        Operand(operand.rm(), operand.shift_op_, operand.shift_imm_));
2695
    vstr(src, scratch, 0, cond);
2696 2697 2698
  } else {
    vstr(src, operand.rn(), operand.offset(), cond);
  }
2699 2700
}

2701 2702
void Assembler::vldm(BlockAddrMode am, Register base, DwVfpRegister first,
                     DwVfpRegister last, Condition cond) {
2703
  // Instruction details available in ARM DDI 0406C.b, A8-922.
2704
  // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
2705
  // first(15-12) | 1011(11-8) | (count * 2)
2706
  DCHECK_LE(first.code(), last.code());
2707
  DCHECK(VfpRegisterIsAvailable(last));
2708
  DCHECK(am == ia || am == ia_w || am == db_w);
2709
  DCHECK(base != pc);
2710 2711 2712 2713

  int sd, d;
  first.split_code(&sd, &d);
  int count = last.code() - first.code() + 1;
2714
  DCHECK_LE(count, 16);
2715 2716 2717 2718
  emit(cond | B27 | B26 | am | d*B22 | B20 | base.code()*B16 | sd*B12 |
       0xB*B8 | count*2);
}

2719 2720
void Assembler::vstm(BlockAddrMode am, Register base, DwVfpRegister first,
                     DwVfpRegister last, Condition cond) {
2721
  // Instruction details available in ARM DDI 0406C.b, A8-1080.
2722 2723
  // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
  // first(15-12) | 1011(11-8) | (count * 2)
2724
  DCHECK_LE(first.code(), last.code());
2725
  DCHECK(VfpRegisterIsAvailable(last));
2726
  DCHECK(am == ia || am == ia_w || am == db_w);
2727
  DCHECK(base != pc);
2728 2729 2730 2731

  int sd, d;
  first.split_code(&sd, &d);
  int count = last.code() - first.code() + 1;
2732
  DCHECK_LE(count, 16);
2733 2734 2735 2736
  emit(cond | B27 | B26 | am | d*B22 | base.code()*B16 | sd*B12 |
       0xB*B8 | count*2);
}

2737 2738
void Assembler::vldm(BlockAddrMode am, Register base, SwVfpRegister first,
                     SwVfpRegister last, Condition cond) {
2739 2740 2741
  // Instruction details available in ARM DDI 0406A, A8-626.
  // cond(31-28) | 110(27-25)| PUDW1(24-20) | Rbase(19-16) |
  // first(15-12) | 1010(11-8) | (count/2)
2742 2743
  DCHECK_LE(first.code(), last.code());
  DCHECK(am == ia || am == ia_w || am == db_w);
2744
  DCHECK(base != pc);
2745 2746 2747 2748 2749 2750 2751 2752

  int sd, d;
  first.split_code(&sd, &d);
  int count = last.code() - first.code() + 1;
  emit(cond | B27 | B26 | am | d*B22 | B20 | base.code()*B16 | sd*B12 |
       0xA*B8 | count);
}

2753 2754
void Assembler::vstm(BlockAddrMode am, Register base, SwVfpRegister first,
                     SwVfpRegister last, Condition cond) {
2755 2756 2757
  // Instruction details available in ARM DDI 0406A, A8-784.
  // cond(31-28) | 110(27-25)| PUDW0(24-20) | Rbase(19-16) |
  // first(15-12) | 1011(11-8) | (count/2)
2758 2759
  DCHECK_LE(first.code(), last.code());
  DCHECK(am == ia || am == ia_w || am == db_w);
2760
  DCHECK(base != pc);
2761 2762 2763 2764 2765 2766 2767 2768

  int sd, d;
  first.split_code(&sd, &d);
  int count = last.code() - first.code() + 1;
  emit(cond | B27 | B26 | am | d*B22 | base.code()*B16 | sd*B12 |
       0xA*B8 | count);
}

2769 2770
static void DoubleAsTwoUInt32(Double d, uint32_t* lo, uint32_t* hi) {
  uint64_t i = d.AsUint64();
2771

2772
  *lo = i & 0xFFFFFFFF;
2773 2774 2775 2776 2777
  *hi = i >> 32;
}

// Only works for little endian floating point formats.
// We don't support VFP on the mixed endian floating point platform.
2778
static bool FitsVmovFPImmediate(Double d, uint32_t* encoding) {
2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800
  // VMOV can accept an immediate of the form:
  //
  //  +/- m * 2^(-n) where 16 <= m <= 31 and 0 <= n <= 7
  //
  // The immediate is encoded using an 8-bit quantity, comprised of two
  // 4-bit fields. For an 8-bit immediate of the form:
  //
  //  [abcdefgh]
  //
  // where a is the MSB and h is the LSB, an immediate 64-bit double can be
  // created of the form:
  //
  //  [aBbbbbbb,bbcdefgh,00000000,00000000,
  //      00000000,00000000,00000000,00000000]
  //
  // where B = ~b.
  //

  uint32_t lo, hi;
  DoubleAsTwoUInt32(d, &lo, &hi);

  // The most obvious constraint is the long block of zeroes.
2801
  if ((lo != 0) || ((hi & 0xFFFF) != 0)) {
2802 2803 2804
    return false;
  }

2805
  // Bits 61:54 must be all clear or all set.
2806
  if (((hi & 0x3FC00000) != 0) && ((hi & 0x3FC00000) != 0x3FC00000)) {
2807 2808 2809
    return false;
  }

2810
  // Bit 62 must be NOT bit 61.
2811 2812 2813 2814 2815 2816
  if (((hi ^ (hi << 1)) & (0x40000000)) == 0) {
    return false;
  }

  // Create the encoded immediate in the form:
  //  [00000000,0000abcd,00000000,0000efgh]
2817
  *encoding = (hi >> 16) & 0xF;       // Low nybble.
2818 2819 2820 2821 2822 2823
  *encoding |= (hi >> 4) & 0x70000;   // Low three bits of the high nybble.
  *encoding |= (hi >> 12) & 0x80000;  // Top bit of the high nybble.

  return true;
}

2824
void Assembler::vmov(const SwVfpRegister dst, Float32 imm) {
2825
  uint32_t enc;
2826
  if (CpuFeatures::IsSupported(VFPv3) &&
2827
      FitsVmovFPImmediate(Double(imm.get_scalar()), &enc)) {
2828
    CpuFeatureScope scope(this, VFPv3);
2829 2830 2831 2832 2833 2834 2835 2836 2837 2838
    // The float can be encoded in the instruction.
    //
    // Sd = immediate
    // Instruction details available in ARM DDI 0406C.b, A8-936.
    // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
    // Vd(15-12) | 101(11-9) | sz=0(8) | imm4L(3-0)
    int vd, d;
    dst.split_code(&vd, &d);
    emit(al | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | enc);
  } else {
2839 2840
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
2841
    mov(scratch, Operand(imm.get_bits()));
2842
    vmov(dst, scratch);
2843 2844 2845
  }
}

2846
void Assembler::vmov(const DwVfpRegister dst, Double imm,
2847
                     const Register extra_scratch) {
2848
  DCHECK(VfpRegisterIsAvailable(dst));
2849
  uint32_t enc;
2850 2851
  if (CpuFeatures::IsSupported(VFPv3) && FitsVmovFPImmediate(imm, &enc)) {
    CpuFeatureScope scope(this, VFPv3);
2852
    // The double can be encoded in the instruction.
2853 2854 2855 2856 2857 2858 2859
    //
    // Dd = immediate
    // Instruction details available in ARM DDI 0406C.b, A8-936.
    // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | imm4H(19-16) |
    // Vd(15-12) | 101(11-9) | sz=1(8) | imm4L(3-0)
    int vd, d;
    dst.split_code(&vd, &d);
2860
    emit(al | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | enc);
2861
  } else {
2862
    // Synthesise the double from ARM immediates.
2863 2864
    uint32_t lo, hi;
    DoubleAsTwoUInt32(imm, &lo, &hi);
2865 2866
    UseScratchRegisterScope temps(this);
    Register scratch = temps.Acquire();
2867

2868 2869 2870
    if (lo == hi) {
      // Move the low and high parts of the double to a D register in one
      // instruction.
2871 2872
      mov(scratch, Operand(lo));
      vmov(dst, scratch, scratch);
2873
    } else if (extra_scratch == no_reg) {
2874 2875
      // We only have one spare scratch register.
      mov(scratch, Operand(lo));
2876
      vmov(NeonS32, dst, 0, scratch);
2877
      if (((lo & 0xFFFF) == (hi & 0xFFFF)) && CpuFeatures::IsSupported(ARMv7)) {
2878
        CpuFeatureScope scope(this, ARMv7);
2879
        movt(scratch, hi >> 16);
2880
      } else {
2881
        mov(scratch, Operand(hi));
2882
      }
2883
      vmov(NeonS32, dst, 1, scratch);
2884 2885 2886
    } else {
      // Move the low and high parts of the double to a D register in one
      // instruction.
2887 2888 2889
      mov(scratch, Operand(lo));
      mov(extra_scratch, Operand(hi));
      vmov(dst, scratch, extra_scratch);
2890 2891 2892 2893 2894 2895 2896 2897 2898
    }
  }
}

void Assembler::vmov(const SwVfpRegister dst,
                     const SwVfpRegister src,
                     const Condition cond) {
  // Sd = Sm
  // Instruction details available in ARM DDI 0406B, A8-642.
2899 2900 2901 2902
  int sd, d, sm, m;
  dst.split_code(&sd, &d);
  src.split_code(&sm, &m);
  emit(cond | 0xE*B24 | d*B22 | 0xB*B20 | sd*B12 | 0xA*B8 | B6 | m*B5 | sm);
2903 2904 2905
}


2906 2907 2908 2909
void Assembler::vmov(const DwVfpRegister dst,
                     const DwVfpRegister src,
                     const Condition cond) {
  // Dd = Dm
2910 2911 2912
  // Instruction details available in ARM DDI 0406C.b, A8-938.
  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
  // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
2913 2914
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src));
2915 2916 2917 2918 2919 2920 2921 2922
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | B6 | m*B5 |
       vm);
}

2923 2924 2925 2926
void Assembler::vmov(const DwVfpRegister dst,
                     const Register src1,
                     const Register src2,
                     const Condition cond) {
2927
  // Dm = <Rt,Rt2>.
2928
  // Instruction details available in ARM DDI 0406C.b, A8-948.
2929 2930
  // cond(31-28) | 1100(27-24)| 010(23-21) | op=0(20) | Rt2(19-16) |
  // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
2931
  DCHECK(VfpRegisterIsAvailable(dst));
2932
  DCHECK(src1 != pc && src2 != pc);
2933 2934
  int vm, m;
  dst.split_code(&vm, &m);
2935
  emit(cond | 0xC*B24 | B22 | src2.code()*B16 |
2936
       src1.code()*B12 | 0xB*B8 | m*B5 | B4 | vm);
2937 2938 2939
}


2940 2941 2942 2943
void Assembler::vmov(const Register dst1,
                     const Register dst2,
                     const DwVfpRegister src,
                     const Condition cond) {
2944
  // <Rt,Rt2> = Dm.
2945
  // Instruction details available in ARM DDI 0406C.b, A8-948.
2946 2947
  // cond(31-28) | 1100(27-24)| 010(23-21) | op=1(20) | Rt2(19-16) |
  // Rt(15-12) | 1011(11-8) | 00(7-6) | M(5) | 1(4) | Vm
2948
  DCHECK(VfpRegisterIsAvailable(src));
2949
  DCHECK(dst1 != pc && dst2 != pc);
2950 2951
  int vm, m;
  src.split_code(&vm, &m);
2952
  emit(cond | 0xC*B24 | B22 | B20 | dst2.code()*B16 |
2953
       dst1.code()*B12 | 0xB*B8 | m*B5 | B4 | vm);
2954 2955 2956
}


2957
void Assembler::vmov(const SwVfpRegister dst,
2958 2959 2960 2961 2962 2963
                     const Register src,
                     const Condition cond) {
  // Sn = Rt.
  // Instruction details available in ARM DDI 0406A, A8-642.
  // cond(31-28) | 1110(27-24)| 000(23-21) | op=0(20) | Vn(19-16) |
  // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2964
  DCHECK(src != pc);
2965 2966 2967
  int sn, n;
  dst.split_code(&sn, &n);
  emit(cond | 0xE*B24 | sn*B16 | src.code()*B12 | 0xA*B8 | n*B7 | B4);
2968 2969 2970
}


2971 2972
void Assembler::vmov(const Register dst,
                     const SwVfpRegister src,
2973 2974 2975 2976 2977
                     const Condition cond) {
  // Rt = Sn.
  // Instruction details available in ARM DDI 0406A, A8-642.
  // cond(31-28) | 1110(27-24)| 000(23-21) | op=1(20) | Vn(19-16) |
  // Rt(15-12) | 1010(11-8) | N(7)=0 | 00(6-5) | 1(4) | 0000(3-0)
2978
  DCHECK(dst != pc);
2979 2980 2981
  int sn, n;
  src.split_code(&sn, &n);
  emit(cond | 0xE*B24 | B20 | sn*B16 | dst.code()*B12 | 0xA*B8 | n*B7 | B4);
2982 2983
}

2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2996 2997 2998 2999 3000 3001 3002 3003 3004 3005 3006 3007 3008 3009 3010 3011 3012 3013 3014 3015 3016 3017 3018 3019 3020 3021 3022 3023 3024 3025 3026
// Type of data to read from or write to VFP register.
// Used as specifier in generic vcvt instruction.
enum VFPType { S32, U32, F32, F64 };


static bool IsSignedVFPType(VFPType type) {
  switch (type) {
    case S32:
      return true;
    case U32:
      return false;
    default:
      UNREACHABLE();
  }
}


static bool IsIntegerVFPType(VFPType type) {
  switch (type) {
    case S32:
    case U32:
      return true;
    case F32:
    case F64:
      return false;
    default:
      UNREACHABLE();
  }
}


static bool IsDoubleVFPType(VFPType type) {
  switch (type) {
    case F32:
      return false;
    case F64:
      return true;
    default:
      UNREACHABLE();
  }
}


3027 3028 3029 3030 3031
// Split five bit reg_code based on size of reg_type.
//  32-bit register codes are Vm:M
//  64-bit register codes are M:Vm
// where Vm is four bits, and M is a single bit.
static void SplitRegCode(VFPType reg_type,
3032 3033 3034
                         int reg_code,
                         int* vm,
                         int* m) {
3035
  DCHECK((reg_code >= 0) && (reg_code <= 31));
3036
  if (IsIntegerVFPType(reg_type) || !IsDoubleVFPType(reg_type)) {
3037
    SwVfpRegister::split_code(reg_code, vm, m);
3038
  } else {
3039
    DwVfpRegister::split_code(reg_code, vm, m);
3040 3041 3042 3043 3044 3045 3046 3047 3048
  }
}


// Encode vcvt.src_type.dst_type instruction.
static Instr EncodeVCVT(const VFPType dst_type,
                        const int dst_code,
                        const VFPType src_type,
                        const int src_code,
3049
                        VFPConversionMode mode,
3050
                        const Condition cond) {
3051
  DCHECK(src_type != dst_type);
3052 3053 3054 3055
  int D, Vd, M, Vm;
  SplitRegCode(src_type, src_code, &Vm, &M);
  SplitRegCode(dst_type, dst_code, &Vd, &D);

3056 3057 3058 3059 3060
  if (IsIntegerVFPType(dst_type) || IsIntegerVFPType(src_type)) {
    // Conversion between IEEE floating point and 32-bit integer.
    // Instruction details available in ARM DDI 0406B, A8.6.295.
    // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 1(19) | opc2(18-16) |
    // Vd(15-12) | 101(11-9) | sz(8) | op(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3061
    DCHECK(!IsIntegerVFPType(dst_type) || !IsIntegerVFPType(src_type));
3062

3063
    int sz, opc2, op;
3064 3065 3066 3067

    if (IsIntegerVFPType(dst_type)) {
      opc2 = IsSignedVFPType(dst_type) ? 0x5 : 0x4;
      sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
3068
      op = mode;
3069
    } else {
3070
      DCHECK(IsIntegerVFPType(src_type));
3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082
      opc2 = 0x0;
      sz = IsDoubleVFPType(dst_type) ? 0x1 : 0x0;
      op = IsSignedVFPType(src_type) ? 0x1 : 0x0;
    }

    return (cond | 0xE*B24 | B23 | D*B22 | 0x3*B20 | B19 | opc2*B16 |
            Vd*B12 | 0x5*B9 | sz*B8 | op*B7 | B6 | M*B5 | Vm);
  } else {
    // Conversion between IEEE double and single precision.
    // Instruction details available in ARM DDI 0406B, A8.6.298.
    // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0111(19-16) |
    // Vd(15-12) | 101(11-9) | sz(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3083
    int sz = IsDoubleVFPType(src_type) ? 0x1 : 0x0;
3084 3085 3086 3087 3088 3089 3090 3091
    return (cond | 0xE*B24 | B23 | D*B22 | 0x3*B20 | 0x7*B16 |
            Vd*B12 | 0x5*B9 | sz*B8 | B7 | B6 | M*B5 | Vm);
  }
}


void Assembler::vcvt_f64_s32(const DwVfpRegister dst,
                             const SwVfpRegister src,
3092
                             VFPConversionMode mode,
3093
                             const Condition cond) {
3094
  DCHECK(VfpRegisterIsAvailable(dst));
3095
  emit(EncodeVCVT(F64, dst.code(), S32, src.code(), mode, cond));
3096 3097 3098
}


3099 3100
void Assembler::vcvt_f32_s32(const SwVfpRegister dst,
                             const SwVfpRegister src,
3101
                             VFPConversionMode mode,
3102
                             const Condition cond) {
3103
  emit(EncodeVCVT(F32, dst.code(), S32, src.code(), mode, cond));
3104 3105 3106 3107 3108
}


void Assembler::vcvt_f64_u32(const DwVfpRegister dst,
                             const SwVfpRegister src,
3109
                             VFPConversionMode mode,
3110
                             const Condition cond) {
3111
  DCHECK(VfpRegisterIsAvailable(dst));
3112
  emit(EncodeVCVT(F64, dst.code(), U32, src.code(), mode, cond));
3113 3114 3115
}


3116 3117 3118 3119 3120 3121
void Assembler::vcvt_f32_u32(const SwVfpRegister dst, const SwVfpRegister src,
                             VFPConversionMode mode, const Condition cond) {
  emit(EncodeVCVT(F32, dst.code(), U32, src.code(), mode, cond));
}


3122 3123 3124 3125 3126 3127
void Assembler::vcvt_s32_f32(const SwVfpRegister dst, const SwVfpRegister src,
                             VFPConversionMode mode, const Condition cond) {
  emit(EncodeVCVT(S32, dst.code(), F32, src.code(), mode, cond));
}


3128 3129 3130 3131 3132 3133
void Assembler::vcvt_u32_f32(const SwVfpRegister dst, const SwVfpRegister src,
                             VFPConversionMode mode, const Condition cond) {
  emit(EncodeVCVT(U32, dst.code(), F32, src.code(), mode, cond));
}


3134 3135
void Assembler::vcvt_s32_f64(const SwVfpRegister dst,
                             const DwVfpRegister src,
3136
                             VFPConversionMode mode,
3137
                             const Condition cond) {
3138
  DCHECK(VfpRegisterIsAvailable(src));
3139
  emit(EncodeVCVT(S32, dst.code(), F64, src.code(), mode, cond));
3140 3141 3142 3143 3144
}


void Assembler::vcvt_u32_f64(const SwVfpRegister dst,
                             const DwVfpRegister src,
3145
                             VFPConversionMode mode,
3146
                             const Condition cond) {
3147
  DCHECK(VfpRegisterIsAvailable(src));
3148
  emit(EncodeVCVT(U32, dst.code(), F64, src.code(), mode, cond));
3149 3150 3151 3152 3153
}


void Assembler::vcvt_f64_f32(const DwVfpRegister dst,
                             const SwVfpRegister src,
3154
                             VFPConversionMode mode,
3155
                             const Condition cond) {
3156
  DCHECK(VfpRegisterIsAvailable(dst));
3157
  emit(EncodeVCVT(F64, dst.code(), F32, src.code(), mode, cond));
3158 3159 3160 3161 3162
}


void Assembler::vcvt_f32_f64(const SwVfpRegister dst,
                             const DwVfpRegister src,
3163
                             VFPConversionMode mode,
3164
                             const Condition cond) {
3165
  DCHECK(VfpRegisterIsAvailable(src));
3166
  emit(EncodeVCVT(F32, dst.code(), F64, src.code(), mode, cond));
3167 3168 3169
}


3170 3171 3172 3173 3174 3175
void Assembler::vcvt_f64_s32(const DwVfpRegister dst,
                             int fraction_bits,
                             const Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8-874.
  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 1010(19-16) | Vd(15-12) |
  // 101(11-9) | sf=1(8) | sx=1(7) | 1(6) | i(5) | 0(4) | imm4(3-0)
3176 3177
  DCHECK(IsEnabled(VFPv3));
  DCHECK(VfpRegisterIsAvailable(dst));
3178
  DCHECK(fraction_bits > 0 && fraction_bits <= 32);
3179 3180
  int vd, d;
  dst.split_code(&vd, &d);
3181 3182
  int imm5 = 32 - fraction_bits;
  int i = imm5 & 1;
3183
  int imm4 = (imm5 >> 1) & 0xF;
3184 3185 3186 3187 3188
  emit(cond | 0xE*B24 | B23 | d*B22 | 0x3*B20 | B19 | 0x2*B16 |
       vd*B12 | 0x5*B9 | B8 | B7 | B6 | i*B5 | imm4);
}


3189 3190 3191
void Assembler::vneg(const DwVfpRegister dst,
                     const DwVfpRegister src,
                     const Condition cond) {
3192 3193 3194
  // Instruction details available in ARM DDI 0406C.b, A8-968.
  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
  // 101(11-9) | sz=1(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3195 3196
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src));
3197 3198 3199 3200 3201 3202 3203
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);

  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | B6 |
       m*B5 | vm);
3204 3205 3206
}


3207 3208 3209 3210 3211 3212 3213 3214 3215 3216 3217 3218 3219 3220 3221
void Assembler::vneg(const SwVfpRegister dst, const SwVfpRegister src,
                     const Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8-968.
  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0001(19-16) | Vd(15-12) |
  // 101(11-9) | sz=0(8) | 0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);

  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
       B6 | m * B5 | vm);
}


3222 3223 3224
void Assembler::vabs(const DwVfpRegister dst,
                     const DwVfpRegister src,
                     const Condition cond) {
3225 3226 3227
  // Instruction details available in ARM DDI 0406C.b, A8-524.
  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
  // 101(11-9) | sz=1(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3228 3229
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src));
3230 3231 3232 3233 3234 3235
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | vd*B12 | 0x5*B9 | B8 | B7 | B6 |
       m*B5 | vm);
3236 3237 3238
}


3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252
void Assembler::vabs(const SwVfpRegister dst, const SwVfpRegister src,
                     const Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8-524.
  // cond(31-28) | 11101(27-23) | D(22) | 11(21-20) | 0000(19-16) | Vd(15-12) |
  // 101(11-9) | sz=0(8) | 1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | vd * B12 | 0x5 * B9 | B7 | B6 |
       m * B5 | vm);
}


3253 3254 3255 3256 3257
void Assembler::vadd(const DwVfpRegister dst,
                     const DwVfpRegister src1,
                     const DwVfpRegister src2,
                     const Condition cond) {
  // Dd = vadd(Dn, Dm) double precision floating point addition.
3258
  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3259 3260 3261
  // Instruction details available in ARM DDI 0406C.b, A8-830.
  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3262 3263 3264
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src1));
  DCHECK(VfpRegisterIsAvailable(src2));
3265 3266 3267 3268 3269 3270 3271 3272
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C*B23 | d*B22 | 0x3*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
       n*B7 | m*B5 | vm);
3273 3274 3275
}


3276 3277 3278 3279 3280 3281 3282 3283 3284 3285 3286 3287 3288 3289 3290 3291 3292 3293
void Assembler::vadd(const SwVfpRegister dst, const SwVfpRegister src1,
                     const SwVfpRegister src2, const Condition cond) {
  // Sd = vadd(Sn, Sm) single precision floating point addition.
  // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
  // Instruction details available in ARM DDI 0406C.b, A8-830.
  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
       0x5 * B9 | n * B7 | m * B5 | vm);
}


3294 3295 3296 3297 3298
void Assembler::vsub(const DwVfpRegister dst,
                     const DwVfpRegister src1,
                     const DwVfpRegister src2,
                     const Condition cond) {
  // Dd = vsub(Dn, Dm) double precision floating point subtraction.
3299
  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3300 3301 3302
  // Instruction details available in ARM DDI 0406C.b, A8-1086.
  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3303 3304 3305
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src1));
  DCHECK(VfpRegisterIsAvailable(src2));
3306 3307 3308 3309 3310 3311 3312 3313
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C*B23 | d*B22 | 0x3*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
       n*B7 | B6 | m*B5 | vm);
3314 3315 3316
}


3317 3318 3319 3320 3321 3322 3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334
void Assembler::vsub(const SwVfpRegister dst, const SwVfpRegister src1,
                     const SwVfpRegister src2, const Condition cond) {
  // Sd = vsub(Sn, Sm) single precision floating point subtraction.
  // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
  // Instruction details available in ARM DDI 0406C.b, A8-1086.
  // cond(31-28) | 11100(27-23)| D(22) | 11(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 |
       0x5 * B9 | n * B7 | B6 | m * B5 | vm);
}


3335 3336 3337 3338 3339
void Assembler::vmul(const DwVfpRegister dst,
                     const DwVfpRegister src1,
                     const DwVfpRegister src2,
                     const Condition cond) {
  // Dd = vmul(Dn, Dm) double precision floating point multiplication.
3340
  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3341 3342 3343
  // Instruction details available in ARM DDI 0406C.b, A8-960.
  // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3344 3345 3346
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src1));
  DCHECK(VfpRegisterIsAvailable(src2));
3347 3348 3349 3350 3351 3352 3353 3354
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C*B23 | d*B22 | 0x2*B20 | vn*B16 | vd*B12 | 0x5*B9 | B8 |
       n*B7 | m*B5 | vm);
3355 3356 3357
}


3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375
void Assembler::vmul(const SwVfpRegister dst, const SwVfpRegister src1,
                     const SwVfpRegister src2, const Condition cond) {
  // Sd = vmul(Sn, Sm) single precision floating point multiplication.
  // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
  // Instruction details available in ARM DDI 0406C.b, A8-960.
  // cond(31-28) | 11100(27-23)| D(22) | 10(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C * B23 | d * B22 | 0x2 * B20 | vn * B16 | vd * B12 |
       0x5 * B9 | n * B7 | m * B5 | vm);
}


3376 3377 3378 3379
void Assembler::vmla(const DwVfpRegister dst,
                     const DwVfpRegister src1,
                     const DwVfpRegister src2,
                     const Condition cond) {
3380 3381 3382
  // Instruction details available in ARM DDI 0406C.b, A8-932.
  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
3383 3384 3385
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src1));
  DCHECK(VfpRegisterIsAvailable(src2));
3386 3387 3388 3389 3390 3391 3392 3393
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | m*B5 |
       vm);
3394 3395 3396
}


3397 3398 3399 3400 3401 3402 3403 3404 3405 3406 3407 3408 3409 3410 3411 3412
void Assembler::vmla(const SwVfpRegister dst, const SwVfpRegister src1,
                     const SwVfpRegister src2, const Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8-932.
  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=0(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
       m * B5 | vm);
}


3413 3414 3415 3416 3417 3418 3419
void Assembler::vmls(const DwVfpRegister dst,
                     const DwVfpRegister src1,
                     const DwVfpRegister src2,
                     const Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8-932.
  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
3420 3421 3422
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src1));
  DCHECK(VfpRegisterIsAvailable(src2));
3423 3424 3425 3426 3427 3428 3429 3430 3431 3432 3433
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | B6 |
       m*B5 | vm);
}


3434 3435 3436 3437 3438 3439 3440 3441 3442 3443 3444 3445 3446 3447 3448 3449
void Assembler::vmls(const SwVfpRegister dst, const SwVfpRegister src1,
                     const SwVfpRegister src2, const Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8-932.
  // cond(31-28) | 11100(27-23) | D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | op=1(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1C * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
       B6 | m * B5 | vm);
}


3450 3451 3452 3453 3454
void Assembler::vdiv(const DwVfpRegister dst,
                     const DwVfpRegister src1,
                     const DwVfpRegister src2,
                     const Condition cond) {
  // Dd = vdiv(Dn, Dm) double precision floating point division.
3455
  // Dd = D:Vd; Dm=M:Vm; Dn=N:Vm.
3456 3457 3458
  // Instruction details available in ARM DDI 0406C.b, A8-882.
  // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3459 3460 3461
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src1));
  DCHECK(VfpRegisterIsAvailable(src2));
3462 3463 3464 3465 3466 3467 3468 3469
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1D*B23 | d*B22 | vn*B16 | vd*B12 | 0x5*B9 | B8 | n*B7 | m*B5 |
       vm);
3470 3471 3472
}


3473 3474 3475 3476 3477 3478 3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490
void Assembler::vdiv(const SwVfpRegister dst, const SwVfpRegister src1,
                     const SwVfpRegister src2, const Condition cond) {
  // Sd = vdiv(Sn, Sm) single precision floating point division.
  // Sd = D:Vd; Sm=M:Vm; Sn=N:Vm.
  // Instruction details available in ARM DDI 0406C.b, A8-882.
  // cond(31-28) | 11101(27-23)| D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 | 0x5 * B9 | n * B7 |
       m * B5 | vm);
}


3491 3492
void Assembler::vcmp(const DwVfpRegister src1,
                     const DwVfpRegister src2,
3493 3494
                     const Condition cond) {
  // vcmp(Dd, Dm) double precision floating point comparison.
3495 3496 3497
  // Instruction details available in ARM DDI 0406C.b, A8-864.
  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3498 3499
  DCHECK(VfpRegisterIsAvailable(src1));
  DCHECK(VfpRegisterIsAvailable(src2));
3500 3501 3502 3503 3504 3505
  int vd, d;
  src1.split_code(&vd, &d);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | 0x4*B16 | vd*B12 | 0x5*B9 | B8 | B6 |
       m*B5 | vm);
3506 3507 3508
}


3509 3510 3511 3512 3513 3514 3515 3516 3517 3518 3519 3520 3521 3522 3523
void Assembler::vcmp(const SwVfpRegister src1, const SwVfpRegister src2,
                     const Condition cond) {
  // vcmp(Sd, Sm) single precision floating point comparison.
  // Instruction details available in ARM DDI 0406C.b, A8-864.
  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0100(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  src1.split_code(&vd, &d);
  int vm, m;
  src2.split_code(&vm, &m);
  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x4 * B16 | vd * B12 |
       0x5 * B9 | B6 | m * B5 | vm);
}


3524 3525 3526
void Assembler::vcmp(const DwVfpRegister src1,
                     const double src2,
                     const Condition cond) {
3527 3528 3529 3530
  // vcmp(Dd, #0.0) double precision floating point comparison.
  // Instruction details available in ARM DDI 0406C.b, A8-864.
  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
3531
  DCHECK(VfpRegisterIsAvailable(src1));
3532
  DCHECK_EQ(src2, 0.0);
3533 3534 3535
  int vd, d;
  src1.split_code(&vd, &d);
  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | 0x5*B16 | vd*B12 | 0x5*B9 | B8 | B6);
3536 3537 3538
}


3539 3540 3541 3542 3543 3544
void Assembler::vcmp(const SwVfpRegister src1, const float src2,
                     const Condition cond) {
  // vcmp(Sd, #0.0) single precision floating point comparison.
  // Instruction details available in ARM DDI 0406C.b, A8-864.
  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0101(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | E=0(7) | 1(6) | 0(5) | 0(4) | 0000(3-0)
3545
  DCHECK_EQ(src2, 0.0);
3546 3547 3548 3549
  int vd, d;
  src1.split_code(&vd, &d);
  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x5 * B16 | vd * B12 |
       0x5 * B9 | B6);
3550 3551
}

3552 3553 3554 3555
void Assembler::vmaxnm(const DwVfpRegister dst, const DwVfpRegister src1,
                       const DwVfpRegister src2) {
  // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3556
  DCHECK(IsEnabled(ARMv8));
3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);

  emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
       0x5 * B9 | B8 | n * B7 | m * B5 | vm);
}

void Assembler::vmaxnm(const SwVfpRegister dst, const SwVfpRegister src1,
                       const SwVfpRegister src2) {
  // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 0(6) | M(5) | 0(4) | Vm(3-0)
3572
  DCHECK(IsEnabled(ARMv8));
3573 3574 3575 3576 3577 3578 3579 3580 3581 3582 3583 3584 3585 3586 3587
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);

  emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
       0x5 * B9 | n * B7 | m * B5 | vm);
}

void Assembler::vminnm(const DwVfpRegister dst, const DwVfpRegister src1,
                       const DwVfpRegister src2) {
  // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3588
  DCHECK(IsEnabled(ARMv8));
3589 3590 3591 3592 3593 3594 3595 3596 3597 3598 3599 3600 3601 3602 3603
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);

  emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
       0x5 * B9 | B8 | n * B7 | B6 | m * B5 | vm);
}

void Assembler::vminnm(const SwVfpRegister dst, const SwVfpRegister src1,
                       const SwVfpRegister src2) {
  // kSpecialCondition(31-28) | 11101(27-23) | D(22) | 00(21-20) | Vn(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | N(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3604
  DCHECK(IsEnabled(ARMv8));
3605 3606 3607 3608 3609 3610 3611 3612 3613 3614 3615
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);

  emit(kSpecialCondition | 0x1D * B23 | d * B22 | vn * B16 | vd * B12 |
       0x5 * B9 | n * B7 | B6 | m * B5 | vm);
}

3616 3617 3618 3619 3620
void Assembler::vsel(Condition cond, const DwVfpRegister dst,
                     const DwVfpRegister src1, const DwVfpRegister src2) {
  // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
  // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=1(8) | N(7) |
  // 0(6) | M(5) | 0(4) | Vm(3-0)
3621
  DCHECK(IsEnabled(ARMv8));
3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  int sz = 1;

  // VSEL has a special (restricted) condition encoding.
  //   eq(0b0000)... -> 0b00
  //   ge(0b1010)... -> 0b10
  //   gt(0b1100)... -> 0b11
  //   vs(0b0110)... -> 0b01
  // No other conditions are supported.
  int vsel_cond = (cond >> 30) & 0x3;
  if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
    // We can implement some other conditions by swapping the inputs.
    DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
    std::swap(vn, vm);
    std::swap(n, m);
  }

  emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
       vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
}

void Assembler::vsel(Condition cond, const SwVfpRegister dst,
                     const SwVfpRegister src1, const SwVfpRegister src2) {
  // cond=kSpecialCondition(31-28) | 11100(27-23) | D(22) |
  // vsel_cond=XX(21-20) | Vn(19-16) | Vd(15-12) | 101(11-9) | sz=0(8) | N(7) |
  // 0(6) | M(5) | 0(4) | Vm(3-0)
3653
  DCHECK(IsEnabled(ARMv8));
3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  int sz = 0;

  // VSEL has a special (restricted) condition encoding.
  //   eq(0b0000)... -> 0b00
  //   ge(0b1010)... -> 0b10
  //   gt(0b1100)... -> 0b11
  //   vs(0b0110)... -> 0b01
  // No other conditions are supported.
  int vsel_cond = (cond >> 30) & 0x3;
  if ((cond != eq) && (cond != ge) && (cond != gt) && (cond != vs)) {
    // We can implement some other conditions by swapping the inputs.
    DCHECK((cond == ne) | (cond == lt) | (cond == le) | (cond == vc));
    std::swap(vn, vm);
    std::swap(n, m);
  }

  emit(kSpecialCondition | 0x1C * B23 | d * B22 | vsel_cond * B20 | vn * B16 |
       vd * B12 | 0x5 * B9 | sz * B8 | n * B7 | m * B5 | vm);
}
3679

3680 3681 3682
void Assembler::vsqrt(const DwVfpRegister dst,
                      const DwVfpRegister src,
                      const Condition cond) {
3683 3684 3685
  // Instruction details available in ARM DDI 0406C.b, A8-1058.
  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
3686 3687
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src));
3688 3689 3690 3691 3692 3693
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(cond | 0x1D*B23 | d*B22 | 0x3*B20 | B16 | vd*B12 | 0x5*B9 | B8 | 0x3*B6 |
       m*B5 | vm);
3694 3695 3696
}


3697 3698 3699 3700 3701 3702 3703 3704 3705 3706 3707 3708 3709 3710 3711 3712 3713 3714 3715 3716 3717 3718 3719 3720 3721 3722 3723 3724 3725 3726
void Assembler::vsqrt(const SwVfpRegister dst, const SwVfpRegister src,
                      const Condition cond) {
  // Instruction details available in ARM DDI 0406C.b, A8-1058.
  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 0001(19-16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | 11(7-6) | M(5) | 0(4) | Vm(3-0)
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | B16 | vd * B12 | 0x5 * B9 |
       0x3 * B6 | m * B5 | vm);
}


void Assembler::vmsr(Register dst, Condition cond) {
  // Instruction details available in ARM DDI 0406A, A8-652.
  // cond(31-28) | 1110 (27-24) | 1110(23-20)| 0001 (19-16) |
  // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
  emit(cond | 0xE * B24 | 0xE * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
}


void Assembler::vmrs(Register dst, Condition cond) {
  // Instruction details available in ARM DDI 0406A, A8-652.
  // cond(31-28) | 1110 (27-24) | 1111(23-20)| 0001 (19-16) |
  // Rt(15-12) | 1010 (11-8) | 0(7) | 00 (6-5) | 1(4) | 0000(3-0)
  emit(cond | 0xE * B24 | 0xF * B20 | B16 | dst.code() * B12 | 0xA * B8 | B4);
}


3727 3728 3729 3730
void Assembler::vrinta(const SwVfpRegister dst, const SwVfpRegister src) {
  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
  // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
  // M(5) | 0(4) | Vm(3-0)
3731
  DCHECK(IsEnabled(ARMv8));
3732 3733 3734 3735 3736 3737 3738 3739 3740
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
       0x5 * B9 | B6 | m * B5 | vm);
}


3741 3742 3743 3744
void Assembler::vrinta(const DwVfpRegister dst, const DwVfpRegister src) {
  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
  // 10(19-18) | RM=00(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
  // M(5) | 0(4) | Vm(3-0)
3745
  DCHECK(IsEnabled(ARMv8));
3746 3747 3748 3749 3750 3751 3752 3753 3754
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | vd * B12 |
       0x5 * B9 | B8 | B6 | m * B5 | vm);
}


3755 3756 3757 3758
void Assembler::vrintn(const SwVfpRegister dst, const SwVfpRegister src) {
  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
  // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
  // M(5) | 0(4) | Vm(3-0)
3759
  DCHECK(IsEnabled(ARMv8));
3760 3761 3762 3763 3764 3765 3766 3767 3768
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
       vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
}


3769 3770 3771 3772
void Assembler::vrintn(const DwVfpRegister dst, const DwVfpRegister src) {
  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
  // 10(19-18) | RM=01(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
  // M(5) | 0(4) | Vm(3-0)
3773
  DCHECK(IsEnabled(ARMv8));
3774 3775 3776 3777 3778 3779 3780 3781 3782
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x1 * B16 |
       vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
}


3783 3784 3785 3786
void Assembler::vrintp(const SwVfpRegister dst, const SwVfpRegister src) {
  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
  // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
  // M(5) | 0(4) | Vm(3-0)
3787
  DCHECK(IsEnabled(ARMv8));
3788 3789 3790 3791 3792 3793 3794 3795 3796
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
       vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
}


3797 3798 3799 3800
void Assembler::vrintp(const DwVfpRegister dst, const DwVfpRegister src) {
  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
  // 10(19-18) | RM=10(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
  // M(5) | 0(4) | Vm(3-0)
3801
  DCHECK(IsEnabled(ARMv8));
3802 3803 3804 3805 3806 3807
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x2 * B16 |
       vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
3808 3809 3810 3811 3812 3813 3814
}


void Assembler::vrintm(const SwVfpRegister dst, const SwVfpRegister src) {
  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
  // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=0(8) | 01(7-6) |
  // M(5) | 0(4) | Vm(3-0)
3815
  DCHECK(IsEnabled(ARMv8));
3816 3817 3818 3819 3820 3821
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
       vd * B12 | 0x5 * B9 | B6 | m * B5 | vm);
3822 3823 3824 3825 3826 3827 3828
}


void Assembler::vrintm(const DwVfpRegister dst, const DwVfpRegister src) {
  // cond=kSpecialCondition(31-28) | 11101(27-23)| D(22) | 11(21-20) |
  // 10(19-18) | RM=11(17-16) |  Vd(15-12) | 101(11-9) | sz=1(8) | 01(7-6) |
  // M(5) | 0(4) | Vm(3-0)
3829
  DCHECK(IsEnabled(ARMv8));
3830 3831 3832 3833 3834 3835 3836 3837 3838
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(kSpecialCondition | 0x1D * B23 | d * B22 | 0x3 * B20 | B19 | 0x3 * B16 |
       vd * B12 | 0x5 * B9 | B8 | B6 | m * B5 | vm);
}


3839 3840 3841 3842
void Assembler::vrintz(const SwVfpRegister dst, const SwVfpRegister src,
                       const Condition cond) {
  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
  // Vd(15-12) | 101(11-9) | sz=0(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3843
  DCHECK(IsEnabled(ARMv8));
3844 3845 3846 3847 3848 3849 3850 3851 3852
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
       0x5 * B9 | B7 | B6 | m * B5 | vm);
}


3853 3854 3855 3856
void Assembler::vrintz(const DwVfpRegister dst, const DwVfpRegister src,
                       const Condition cond) {
  // cond(31-28) | 11101(27-23)| D(22) | 11(21-20) | 011(19-17) | 0(16) |
  // Vd(15-12) | 101(11-9) | sz=1(8) | op=1(7) | 1(6) | M(5) | 0(4) | Vm(3-0)
3857
  DCHECK(IsEnabled(ARMv8));
3858 3859 3860 3861 3862 3863 3864 3865 3866
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  emit(cond | 0x1D * B23 | d * B22 | 0x3 * B20 | 0x3 * B17 | vd * B12 |
       0x5 * B9 | B8 | B7 | B6 | m * B5 | vm);
}


3867 3868 3869 3870 3871 3872 3873 3874
// Support for NEON.

void Assembler::vld1(NeonSize size,
                     const NeonListOperand& dst,
                     const NeonMemOperand& src) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.320.
  // 1111(31-28) | 01000(27-23) | D(22) | 10(21-20) | Rn(19-16) |
  // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
3875
  DCHECK(IsEnabled(NEON));
3876 3877 3878 3879 3880 3881
  int vd, d;
  dst.base().split_code(&vd, &d);
  emit(0xFU*B28 | 4*B24 | d*B22 | 2*B20 | src.rn().code()*B16 | vd*B12 |
       dst.type()*B8 | size*B6 | src.align()*B4 | src.rm().code());
}

3882
void Assembler::vst1(NeonSize size, const NeonListOperand& src,
3883 3884 3885 3886
                     const NeonMemOperand& dst) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.404.
  // 1111(31-28) | 01000(27-23) | D(22) | 00(21-20) | Rn(19-16) |
  // Vd(15-12) | type(11-8) | size(7-6) | align(5-4) | Rm(3-0)
3887
  DCHECK(IsEnabled(NEON));
3888 3889 3890 3891 3892 3893 3894 3895 3896 3897 3898
  int vd, d;
  src.base().split_code(&vd, &d);
  emit(0xFU*B28 | 4*B24 | d*B22 | dst.rn().code()*B16 | vd*B12 | src.type()*B8 |
       size*B6 | dst.align()*B4 | dst.rm().code());
}


void Assembler::vmovl(NeonDataType dt, QwNeonRegister dst, DwVfpRegister src) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.346.
  // 1111(31-28) | 001(27-25) | U(24) | 1(23) | D(22) | imm3(21-19) |
  // 000(18-16) | Vd(15-12) | 101000(11-6) | M(5) | 1(4) | Vm(3-0)
3899
  DCHECK(IsEnabled(NEON));
3900 3901 3902 3903
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
3904 3905 3906 3907
  int U = NeonU(dt);
  int imm3 = 1 << NeonSz(dt);
  emit(0xFU * B28 | B25 | U * B24 | B23 | d * B22 | imm3 * B19 | vd * B12 |
       0xA * B8 | m * B5 | B4 | vm);
3908 3909
}

3910 3911 3912 3913 3914 3915 3916 3917 3918 3919 3920 3921 3922 3923 3924
void Assembler::vqmovn(NeonDataType dt, DwVfpRegister dst, QwNeonRegister src) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.1004.
  // vqmovn.<type><size> Dd, Qm. ARM vector narrowing move with saturation.
  DCHECK(IsEnabled(NEON));
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
  int size = NeonSz(dt);
  int u = NeonU(dt);
  int op = u != 0 ? 3 : 2;
  emit(0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | 0x2 * B16 | vd * B12 |
       0x2 * B8 | op * B6 | m * B5 | vm);
}

3925 3926 3927 3928 3929 3930 3931 3932 3933 3934 3935 3936 3937 3938 3939 3940 3941 3942 3943 3944 3945 3946 3947 3948 3949 3950 3951 3952 3953 3954 3955 3956 3957 3958 3959 3960 3961 3962 3963 3964 3965 3966 3967 3968 3969 3970
static int EncodeScalar(NeonDataType dt, int index) {
  int opc1_opc2 = 0;
  DCHECK_LE(0, index);
  switch (dt) {
    case NeonS8:
    case NeonU8:
      DCHECK_GT(8, index);
      opc1_opc2 = 0x8 | index;
      break;
    case NeonS16:
    case NeonU16:
      DCHECK_GT(4, index);
      opc1_opc2 = 0x1 | (index << 1);
      break;
    case NeonS32:
    case NeonU32:
      DCHECK_GT(2, index);
      opc1_opc2 = index << 2;
      break;
    default:
      UNREACHABLE();
      break;
  }
  return (opc1_opc2 >> 2) * B21 | (opc1_opc2 & 0x3) * B5;
}

void Assembler::vmov(NeonDataType dt, DwVfpRegister dst, int index,
                     Register src) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.940.
  // vmov ARM core register to scalar.
  DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
  int vd, d;
  dst.split_code(&vd, &d);
  int opc1_opc2 = EncodeScalar(dt, index);
  emit(0xEEu * B24 | vd * B16 | src.code() * B12 | 0xB * B8 | d * B7 | B4 |
       opc1_opc2);
}

void Assembler::vmov(NeonDataType dt, Register dst, DwVfpRegister src,
                     int index) {
  // Instruction details available in ARM DDI 0406C.b, A8.8.942.
  // vmov Arm scalar to core register.
  DCHECK(dt == NeonS32 || dt == NeonU32 || IsEnabled(NEON));
  int vn, n;
  src.split_code(&vn, &n);
  int opc1_opc2 = EncodeScalar(dt, index);
3971
  int u = NeonU(dt);
3972 3973 3974 3975
  emit(0xEEu * B24 | u * B23 | B20 | vn * B16 | dst.code() * B12 | 0xB * B8 |
       n * B7 | B4 | opc1_opc2);
}

3976
void Assembler::vmov(QwNeonRegister dst, QwNeonRegister src) {
3977
  // Instruction details available in ARM DDI 0406C.b, A8-938.
3978 3979
  // vmov is encoded as vorr.
  vorr(dst, src, src);
3980
}
3981

3982
void Assembler::vdup(NeonSize size, QwNeonRegister dst, Register src) {
3983 3984 3985 3986 3987 3988 3989 3990 3991 3992 3993 3994 3995 3996 3997 3998 3999 4000 4001 4002 4003 4004 4005
  DCHECK(IsEnabled(NEON));
  // Instruction details available in ARM DDI 0406C.b, A8-886.
  int B = 0, E = 0;
  switch (size) {
    case Neon8:
      B = 1;
      break;
    case Neon16:
      E = 1;
      break;
    case Neon32:
      break;
    default:
      UNREACHABLE();
      break;
  }
  int vd, d;
  dst.split_code(&vd, &d);

  emit(al | 0x1D * B23 | B * B22 | B21 | vd * B16 | src.code() * B12 |
       0xB * B8 | d * B7 | E * B5 | B4);
}

4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025
enum NeonRegType { NEON_D, NEON_Q };

void NeonSplitCode(NeonRegType type, int code, int* vm, int* m, int* encoding) {
  if (type == NEON_D) {
    DwVfpRegister::split_code(code, vm, m);
  } else {
    DCHECK_EQ(type, NEON_Q);
    QwNeonRegister::split_code(code, vm, m);
    *encoding |= B6;
  }
}

static Instr EncodeNeonDupOp(NeonSize size, NeonRegType reg_type, int dst_code,
                             DwVfpRegister src, int index) {
  DCHECK_NE(Neon64, size);
  int sz = static_cast<int>(size);
  DCHECK_LE(0, index);
  DCHECK_GT(kSimd128Size / (1 << sz), index);
  int imm4 = (1 << sz) | ((index << (sz + 1)) & 0xF);
  int qbit = 0;
4026
  int vd, d;
4027
  NeonSplitCode(reg_type, dst_code, &vd, &d, &qbit);
4028
  int vm, m;
4029
  src.split_code(&vm, &m);
4030

4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046
  return 0x1E7U * B23 | d * B22 | 0x3 * B20 | imm4 * B16 | vd * B12 |
         0x18 * B7 | qbit | m * B5 | vm;
}

void Assembler::vdup(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
                     int index) {
  DCHECK(IsEnabled(NEON));
  // Instruction details available in ARM DDI 0406C.b, A8-884.
  emit(EncodeNeonDupOp(size, NEON_D, dst.code(), src, index));
}

void Assembler::vdup(NeonSize size, QwNeonRegister dst, DwVfpRegister src,
                     int index) {
  // Instruction details available in ARM DDI 0406C.b, A8-884.
  DCHECK(IsEnabled(NEON));
  emit(EncodeNeonDupOp(size, NEON_Q, dst.code(), src, index));
4047 4048 4049
}

// Encode NEON vcvt.src_type.dst_type instruction.
4050 4051
static Instr EncodeNeonVCVT(VFPType dst_type, QwNeonRegister dst,
                            VFPType src_type, QwNeonRegister src) {
4052 4053 4054 4055 4056 4057 4058 4059 4060 4061 4062 4063 4064 4065 4066 4067 4068 4069 4070 4071 4072
  DCHECK(src_type != dst_type);
  DCHECK(src_type == F32 || dst_type == F32);
  // Instruction details available in ARM DDI 0406C.b, A8.8.868.
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);

  int op = 0;
  if (src_type == F32) {
    DCHECK(dst_type == S32 || dst_type == U32);
    op = dst_type == U32 ? 3 : 2;
  } else {
    DCHECK(src_type == S32 || src_type == U32);
    op = src_type == U32 ? 1 : 0;
  }

  return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x3 * B9 | op * B7 |
         B6 | m * B5 | vm;
}

4073
void Assembler::vcvt_f32_s32(QwNeonRegister dst, QwNeonRegister src) {
4074 4075 4076 4077 4078 4079
  DCHECK(IsEnabled(NEON));
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src));
  emit(EncodeNeonVCVT(F32, dst, S32, src));
}

4080
void Assembler::vcvt_f32_u32(QwNeonRegister dst, QwNeonRegister src) {
4081 4082 4083 4084 4085 4086
  DCHECK(IsEnabled(NEON));
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src));
  emit(EncodeNeonVCVT(F32, dst, U32, src));
}

4087
void Assembler::vcvt_s32_f32(QwNeonRegister dst, QwNeonRegister src) {
4088 4089 4090 4091 4092 4093
  DCHECK(IsEnabled(NEON));
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src));
  emit(EncodeNeonVCVT(S32, dst, F32, src));
}

4094
void Assembler::vcvt_u32_f32(QwNeonRegister dst, QwNeonRegister src) {
4095 4096 4097 4098 4099 4100
  DCHECK(IsEnabled(NEON));
  DCHECK(VfpRegisterIsAvailable(dst));
  DCHECK(VfpRegisterIsAvailable(src));
  emit(EncodeNeonVCVT(U32, dst, F32, src));
}

4101
enum UnaryOp { VMVN, VSWP, VABS, VABSF, VNEG, VNEGF };
4102

4103 4104
static Instr EncodeNeonUnaryOp(UnaryOp op, NeonRegType reg_type, NeonSize size,
                               int dst_code, int src_code) {
4105 4106
  int op_encoding = 0;
  switch (op) {
4107 4108 4109 4110 4111 4112 4113 4114
    case VMVN:
      DCHECK_EQ(Neon8, size);  // size == 0 for vmvn
      op_encoding = B10 | 0x3 * B7;
      break;
    case VSWP:
      DCHECK_EQ(Neon8, size);  // size == 0 for vswp
      op_encoding = B17;
      break;
4115
    case VABS:
4116
      op_encoding = B16 | 0x6 * B7;
4117 4118 4119
      break;
    case VABSF:
      DCHECK_EQ(Neon32, size);
4120
      op_encoding = B16 | B10 | 0x6 * B7;
4121 4122
      break;
    case VNEG:
4123
      op_encoding = B16 | 0x7 * B7;
4124 4125 4126
      break;
    case VNEGF:
      DCHECK_EQ(Neon32, size);
4127
      op_encoding = B16 | B10 | 0x7 * B7;
4128 4129 4130 4131 4132
      break;
    default:
      UNREACHABLE();
      break;
  }
4133 4134 4135 4136 4137
  int vd, d;
  NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
  int vm, m;
  NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);

4138 4139 4140 4141 4142 4143 4144 4145 4146
  return 0x1E7U * B23 | d * B22 | 0x3 * B20 | size * B18 | vd * B12 | m * B5 |
         vm | op_encoding;
}

void Assembler::vmvn(QwNeonRegister dst, QwNeonRegister src) {
  // Qd = vmvn(Qn, Qm) SIMD bitwise negate.
  // Instruction details available in ARM DDI 0406C.b, A8-966.
  DCHECK(IsEnabled(NEON));
  emit(EncodeNeonUnaryOp(VMVN, NEON_Q, Neon8, dst.code(), src.code()));
4147 4148
}

4149 4150 4151 4152 4153 4154 4155 4156 4157 4158 4159 4160 4161 4162 4163 4164
void Assembler::vswp(DwVfpRegister dst, DwVfpRegister src) {
  DCHECK(IsEnabled(NEON));
  // Dd = vswp(Dn, Dm) SIMD d-register swap.
  // Instruction details available in ARM DDI 0406C.b, A8.8.418.
  DCHECK(IsEnabled(NEON));
  emit(EncodeNeonUnaryOp(VSWP, NEON_D, Neon8, dst.code(), src.code()));
}

void Assembler::vswp(QwNeonRegister dst, QwNeonRegister src) {
  // Qd = vswp(Qn, Qm) SIMD q-register swap.
  // Instruction details available in ARM DDI 0406C.b, A8.8.418.
  DCHECK(IsEnabled(NEON));
  emit(EncodeNeonUnaryOp(VSWP, NEON_Q, Neon8, dst.code(), src.code()));
}

void Assembler::vabs(QwNeonRegister dst, QwNeonRegister src) {
4165 4166 4167
  // Qd = vabs.f<size>(Qn, Qm) SIMD floating point absolute value.
  // Instruction details available in ARM DDI 0406C.b, A8.8.824.
  DCHECK(IsEnabled(NEON));
4168
  emit(EncodeNeonUnaryOp(VABSF, NEON_Q, Neon32, dst.code(), src.code()));
4169 4170
}

4171
void Assembler::vabs(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4172 4173 4174
  // Qd = vabs.s<size>(Qn, Qm) SIMD integer absolute value.
  // Instruction details available in ARM DDI 0406C.b, A8.8.824.
  DCHECK(IsEnabled(NEON));
4175
  emit(EncodeNeonUnaryOp(VABS, NEON_Q, size, dst.code(), src.code()));
4176 4177
}

4178
void Assembler::vneg(QwNeonRegister dst, QwNeonRegister src) {
4179 4180 4181
  // Qd = vabs.f<size>(Qn, Qm) SIMD floating point negate.
  // Instruction details available in ARM DDI 0406C.b, A8.8.968.
  DCHECK(IsEnabled(NEON));
4182
  emit(EncodeNeonUnaryOp(VNEGF, NEON_Q, Neon32, dst.code(), src.code()));
4183 4184
}

4185
void Assembler::vneg(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4186 4187 4188
  // Qd = vabs.s<size>(Qn, Qm) SIMD integer negate.
  // Instruction details available in ARM DDI 0406C.b, A8.8.968.
  DCHECK(IsEnabled(NEON));
4189
  emit(EncodeNeonUnaryOp(VNEG, NEON_Q, size, dst.code(), src.code()));
4190 4191
}

4192 4193
enum BinaryBitwiseOp { VAND, VBIC, VBIF, VBIT, VBSL, VEOR, VORR, VORN };

4194 4195 4196
static Instr EncodeNeonBinaryBitwiseOp(BinaryBitwiseOp op, NeonRegType reg_type,
                                       int dst_code, int src_code1,
                                       int src_code2) {
4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225 4226
  int op_encoding = 0;
  switch (op) {
    case VBIC:
      op_encoding = 0x1 * B20;
      break;
    case VBIF:
      op_encoding = B24 | 0x3 * B20;
      break;
    case VBIT:
      op_encoding = B24 | 0x2 * B20;
      break;
    case VBSL:
      op_encoding = B24 | 0x1 * B20;
      break;
    case VEOR:
      op_encoding = B24;
      break;
    case VORR:
      op_encoding = 0x2 * B20;
      break;
    case VORN:
      op_encoding = 0x3 * B20;
      break;
    case VAND:
      // op_encoding is 0.
      break;
    default:
      UNREACHABLE();
      break;
  }
4227 4228 4229 4230 4231 4232 4233
  int vd, d;
  NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
  int vn, n;
  NeonSplitCode(reg_type, src_code1, &vn, &n, &op_encoding);
  int vm, m;
  NeonSplitCode(reg_type, src_code2, &vm, &m, &op_encoding);

4234
  return 0x1E4U * B23 | op_encoding | d * B22 | vn * B16 | vd * B12 | B8 |
4235
         n * B7 | m * B5 | B4 | vm;
4236 4237 4238 4239 4240 4241 4242
}

void Assembler::vand(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
  // Qd = vand(Qn, Qm) SIMD AND.
  // Instruction details available in ARM DDI 0406C.b, A8.8.836.
  DCHECK(IsEnabled(NEON));
4243 4244
  emit(EncodeNeonBinaryBitwiseOp(VAND, NEON_Q, dst.code(), src1.code(),
                                 src2.code()));
4245 4246
}

4247 4248
void Assembler::vbsl(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4249 4250
  // Qd = vbsl(Qn, Qm) SIMD bitwise select.
  // Instruction details available in ARM DDI 0406C.b, A8-844.
4251 4252 4253 4254 4255 4256 4257 4258 4259 4260 4261 4262
  DCHECK(IsEnabled(NEON));
  emit(EncodeNeonBinaryBitwiseOp(VBSL, NEON_Q, dst.code(), src1.code(),
                                 src2.code()));
}

void Assembler::veor(DwVfpRegister dst, DwVfpRegister src1,
                     DwVfpRegister src2) {
  // Dd = veor(Dn, Dm) SIMD exclusive OR.
  // Instruction details available in ARM DDI 0406C.b, A8.8.888.
  DCHECK(IsEnabled(NEON));
  emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_D, dst.code(), src1.code(),
                                 src2.code()));
4263 4264 4265 4266 4267 4268 4269
}

void Assembler::veor(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
  // Qd = veor(Qn, Qm) SIMD exclusive OR.
  // Instruction details available in ARM DDI 0406C.b, A8.8.888.
  DCHECK(IsEnabled(NEON));
4270 4271
  emit(EncodeNeonBinaryBitwiseOp(VEOR, NEON_Q, dst.code(), src1.code(),
                                 src2.code()));
4272 4273 4274 4275 4276 4277 4278
}

void Assembler::vorr(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
  // Qd = vorr(Qn, Qm) SIMD OR.
  // Instruction details available in ARM DDI 0406C.b, A8.8.976.
  DCHECK(IsEnabled(NEON));
4279 4280
  emit(EncodeNeonBinaryBitwiseOp(VORR, NEON_Q, dst.code(), src1.code(),
                                 src2.code()));
4281
}
4282

4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332 4333
enum FPBinOp {
  VADDF,
  VSUBF,
  VMULF,
  VMINF,
  VMAXF,
  VRECPS,
  VRSQRTS,
  VCEQF,
  VCGEF,
  VCGTF
};

static Instr EncodeNeonBinOp(FPBinOp op, QwNeonRegister dst,
                             QwNeonRegister src1, QwNeonRegister src2) {
  int op_encoding = 0;
  switch (op) {
    case VADDF:
      op_encoding = 0xD * B8;
      break;
    case VSUBF:
      op_encoding = B21 | 0xD * B8;
      break;
    case VMULF:
      op_encoding = B24 | 0xD * B8 | B4;
      break;
    case VMINF:
      op_encoding = B21 | 0xF * B8;
      break;
    case VMAXF:
      op_encoding = 0xF * B8;
      break;
    case VRECPS:
      op_encoding = 0xF * B8 | B4;
      break;
    case VRSQRTS:
      op_encoding = B21 | 0xF * B8 | B4;
      break;
    case VCEQF:
      op_encoding = 0xE * B8;
      break;
    case VCGEF:
      op_encoding = B24 | 0xE * B8;
      break;
    case VCGTF:
      op_encoding = B24 | B21 | 0xE * B8;
      break;
    default:
      UNREACHABLE();
      break;
  }
4334 4335 4336 4337 4338 4339
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
4340 4341
  return 0x1E4U * B23 | d * B22 | vn * B16 | vd * B12 | n * B7 | B6 | m * B5 |
         vm | op_encoding;
4342 4343
}

4344 4345 4346 4347 4348 4349 4350 4351 4352 4353 4354 4355 4356
enum IntegerBinOp {
  VADD,
  VQADD,
  VSUB,
  VQSUB,
  VMUL,
  VMIN,
  VMAX,
  VTST,
  VCEQ,
  VCGE,
  VCGT
};
4357 4358

static Instr EncodeNeonBinOp(IntegerBinOp op, NeonDataType dt,
4359 4360
                             QwNeonRegister dst, QwNeonRegister src1,
                             QwNeonRegister src2) {
4361 4362 4363 4364 4365
  int op_encoding = 0;
  switch (op) {
    case VADD:
      op_encoding = 0x8 * B8;
      break;
4366 4367 4368
    case VQADD:
      op_encoding = B4;
      break;
4369 4370 4371
    case VSUB:
      op_encoding = B24 | 0x8 * B8;
      break;
4372 4373 4374
    case VQSUB:
      op_encoding = 0x2 * B8 | B4;
      break;
4375 4376 4377 4378 4379 4380 4381 4382 4383 4384 4385 4386 4387 4388 4389 4390 4391 4392 4393 4394 4395 4396 4397 4398 4399
    case VMUL:
      op_encoding = 0x9 * B8 | B4;
      break;
    case VMIN:
      op_encoding = 0x6 * B8 | B4;
      break;
    case VMAX:
      op_encoding = 0x6 * B8;
      break;
    case VTST:
      op_encoding = 0x8 * B8 | B4;
      break;
    case VCEQ:
      op_encoding = B24 | 0x8 * B8 | B4;
      break;
    case VCGE:
      op_encoding = 0x3 * B8 | B4;
      break;
    case VCGT:
      op_encoding = 0x3 * B8;
      break;
    default:
      UNREACHABLE();
      break;
  }
4400 4401 4402 4403 4404 4405
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
4406 4407 4408 4409
  int size = NeonSz(dt);
  int u = NeonU(dt);
  return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
         n * B7 | B6 | m * B5 | vm | op_encoding;
4410 4411
}

4412 4413
static Instr EncodeNeonBinOp(IntegerBinOp op, NeonSize size, QwNeonRegister dst,
                             QwNeonRegister src1, QwNeonRegister src2) {
4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434
  // Map NeonSize values to the signed values in NeonDataType, so the U bit
  // will be 0.
  return EncodeNeonBinOp(op, static_cast<NeonDataType>(size), dst, src1, src2);
}

void Assembler::vadd(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Qd = vadd(Qn, Qm) SIMD floating point addition.
  // Instruction details available in ARM DDI 0406C.b, A8-830.
  emit(EncodeNeonBinOp(VADDF, dst, src1, src2));
}

void Assembler::vadd(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Qd = vadd(Qn, Qm) SIMD integer addition.
  // Instruction details available in ARM DDI 0406C.b, A8-828.
  emit(EncodeNeonBinOp(VADD, size, dst, src1, src2));
}

4435 4436 4437 4438 4439 4440 4441 4442
void Assembler::vqadd(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
                      QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Qd = vqadd(Qn, Qm) SIMD integer saturating addition.
  // Instruction details available in ARM DDI 0406C.b, A8-996.
  emit(EncodeNeonBinOp(VQADD, dt, dst, src1, src2));
}

4443 4444
void Assembler::vsub(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4445 4446 4447
  DCHECK(IsEnabled(NEON));
  // Qd = vsub(Qn, Qm) SIMD floating point subtraction.
  // Instruction details available in ARM DDI 0406C.b, A8-1086.
4448
  emit(EncodeNeonBinOp(VSUBF, dst, src1, src2));
4449 4450
}

4451 4452
void Assembler::vsub(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4453 4454 4455
  DCHECK(IsEnabled(NEON));
  // Qd = vsub(Qn, Qm) SIMD integer subtraction.
  // Instruction details available in ARM DDI 0406C.b, A8-1084.
4456
  emit(EncodeNeonBinOp(VSUB, size, dst, src1, src2));
4457 4458
}

4459 4460 4461 4462 4463 4464 4465 4466
void Assembler::vqsub(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
                      QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Qd = vqsub(Qn, Qm) SIMD integer saturating subtraction.
  // Instruction details available in ARM DDI 0406C.b, A8-1020.
  emit(EncodeNeonBinOp(VQSUB, dt, dst, src1, src2));
}

4467 4468
void Assembler::vmul(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4469 4470 4471
  DCHECK(IsEnabled(NEON));
  // Qd = vadd(Qn, Qm) SIMD floating point multiply.
  // Instruction details available in ARM DDI 0406C.b, A8-958.
4472
  emit(EncodeNeonBinOp(VMULF, dst, src1, src2));
4473 4474
}

4475 4476
void Assembler::vmul(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4477 4478 4479
  DCHECK(IsEnabled(NEON));
  // Qd = vadd(Qn, Qm) SIMD integer multiply.
  // Instruction details available in ARM DDI 0406C.b, A8-960.
4480
  emit(EncodeNeonBinOp(VMUL, size, dst, src1, src2));
4481 4482
}

4483 4484
void Assembler::vmin(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4485
  DCHECK(IsEnabled(NEON));
4486 4487
  // Qd = vmin(Qn, Qm) SIMD floating point MIN.
  // Instruction details available in ARM DDI 0406C.b, A8-928.
4488
  emit(EncodeNeonBinOp(VMINF, dst, src1, src2));
4489 4490 4491 4492 4493 4494 4495
}

void Assembler::vmin(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Qd = vmin(Qn, Qm) SIMD integer MIN.
  // Instruction details available in ARM DDI 0406C.b, A8-926.
4496
  emit(EncodeNeonBinOp(VMIN, dt, dst, src1, src2));
4497 4498 4499 4500 4501 4502 4503
}

void Assembler::vmax(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Qd = vmax(Qn, Qm) SIMD floating point MAX.
  // Instruction details available in ARM DDI 0406C.b, A8-928.
4504
  emit(EncodeNeonBinOp(VMAXF, dst, src1, src2));
4505 4506 4507 4508 4509 4510 4511
}

void Assembler::vmax(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Qd = vmax(Qn, Qm) SIMD integer MAX.
  // Instruction details available in ARM DDI 0406C.b, A8-926.
4512
  emit(EncodeNeonBinOp(VMAX, dt, dst, src1, src2));
4513 4514
}

4515
enum NeonShiftOp { VSHL, VSHR, VSLI, VSRI };
4516

4517 4518
static Instr EncodeNeonShiftOp(NeonShiftOp op, NeonSize size, bool is_unsigned,
                               NeonRegType reg_type, int dst_code, int src_code,
4519 4520
                               int shift) {
  int imm6 = 0;
4521 4522 4523 4524 4525 4526 4527 4528 4529 4530 4531 4532 4533 4534 4535 4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554
  int size_in_bits = kBitsPerByte << static_cast<int>(size);
  int op_encoding = 0;
  switch (op) {
    case VSHL: {
      DCHECK(shift >= 0 && size_in_bits > shift);
      imm6 = size_in_bits + shift;
      op_encoding = 0x5 * B8;
      break;
    }
    case VSHR: {
      DCHECK(shift > 0 && size_in_bits >= shift);
      imm6 = 2 * size_in_bits - shift;
      if (is_unsigned) op_encoding |= B24;
      break;
    }
    case VSLI: {
      DCHECK(shift >= 0 && size_in_bits > shift);
      imm6 = size_in_bits + shift;
      int L = imm6 >> 6;
      imm6 &= 0x3F;
      op_encoding = B24 | 0x5 * B8 | L * B7;
      break;
    }
    case VSRI: {
      DCHECK(shift > 0 && size_in_bits >= shift);
      imm6 = 2 * size_in_bits - shift;
      int L = imm6 >> 6;
      imm6 &= 0x3F;
      op_encoding = B24 | 0x4 * B8 | L * B7;
      break;
    }
    default:
      UNREACHABLE();
      break;
4555
  }
4556 4557 4558 4559 4560 4561 4562 4563

  int vd, d;
  NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
  int vm, m;
  NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);

  return 0x1E5U * B23 | d * B22 | imm6 * B16 | vd * B12 | m * B5 | B4 | vm |
         op_encoding;
4564 4565 4566 4567 4568 4569 4570
}

void Assembler::vshl(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
                     int shift) {
  DCHECK(IsEnabled(NEON));
  // Qd = vshl(Qm, bits) SIMD shift left immediate.
  // Instruction details available in ARM DDI 0406C.b, A8-1046.
4571 4572
  emit(EncodeNeonShiftOp(VSHL, NeonDataTypeToSize(dt), false, NEON_Q,
                         dst.code(), src.code(), shift));
4573 4574 4575 4576 4577 4578 4579
}

void Assembler::vshr(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src,
                     int shift) {
  DCHECK(IsEnabled(NEON));
  // Qd = vshl(Qm, bits) SIMD shift right immediate.
  // Instruction details available in ARM DDI 0406C.b, A8-1052.
4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4599
  emit(EncodeNeonShiftOp(VSHR, NeonDataTypeToSize(dt), NeonU(dt), NEON_Q,
                         dst.code(), src.code(), shift));
}

void Assembler::vsli(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
                     int shift) {
  DCHECK(IsEnabled(NEON));
  // Dd = vsli(Dm, bits) SIMD shift left and insert.
  // Instruction details available in ARM DDI 0406C.b, A8-1056.
  emit(EncodeNeonShiftOp(VSLI, size, false, NEON_D, dst.code(), src.code(),
                         shift));
}

void Assembler::vsri(NeonSize size, DwVfpRegister dst, DwVfpRegister src,
                     int shift) {
  DCHECK(IsEnabled(NEON));
  // Dd = vsri(Dm, bits) SIMD shift right and insert.
  // Instruction details available in ARM DDI 0406C.b, A8-1062.
  emit(EncodeNeonShiftOp(VSRI, size, false, NEON_D, dst.code(), src.code(),
                         shift));
4600 4601
}

4602 4603
static Instr EncodeNeonEstimateOp(bool is_rsqrt, QwNeonRegister dst,
                                  QwNeonRegister src) {
4604 4605 4606 4607
  int vd, d;
  dst.split_code(&vd, &d);
  int vm, m;
  src.split_code(&vm, &m);
4608 4609 4610
  int rsqrt = is_rsqrt ? 1 : 0;
  return 0x1E7U * B23 | d * B22 | 0x3B * B16 | vd * B12 | 0x5 * B8 |
         rsqrt * B7 | B6 | m * B5 | vm;
4611 4612
}

4613
void Assembler::vrecpe(QwNeonRegister dst, QwNeonRegister src) {
4614
  DCHECK(IsEnabled(NEON));
4615 4616 4617 4618 4619
  // Qd = vrecpe(Qm) SIMD reciprocal estimate.
  // Instruction details available in ARM DDI 0406C.b, A8-1024.
  emit(EncodeNeonEstimateOp(false, dst, src));
}

4620
void Assembler::vrsqrte(QwNeonRegister dst, QwNeonRegister src) {
4621 4622 4623 4624 4625 4626
  DCHECK(IsEnabled(NEON));
  // Qd = vrsqrte(Qm) SIMD reciprocal square root estimate.
  // Instruction details available in ARM DDI 0406C.b, A8-1038.
  emit(EncodeNeonEstimateOp(true, dst, src));
}

4627 4628
void Assembler::vrecps(QwNeonRegister dst, QwNeonRegister src1,
                       QwNeonRegister src2) {
4629 4630 4631
  DCHECK(IsEnabled(NEON));
  // Qd = vrecps(Qn, Qm) SIMD reciprocal refinement step.
  // Instruction details available in ARM DDI 0406C.b, A8-1026.
4632
  emit(EncodeNeonBinOp(VRECPS, dst, src1, src2));
4633 4634
}

4635 4636
void Assembler::vrsqrts(QwNeonRegister dst, QwNeonRegister src1,
                        QwNeonRegister src2) {
4637
  DCHECK(IsEnabled(NEON));
4638
  // Qd = vrsqrts(Qn, Qm) SIMD reciprocal square root refinement step.
4639
  // Instruction details available in ARM DDI 0406C.b, A8-1040.
4640
  emit(EncodeNeonBinOp(VRSQRTS, dst, src1, src2));
4641 4642
}

4643
enum NeonPairwiseOp { VPADD, VPMIN, VPMAX };
4644

4645
static Instr EncodeNeonPairwiseOp(NeonPairwiseOp op, NeonDataType dt,
4646 4647 4648 4649
                                  DwVfpRegister dst, DwVfpRegister src1,
                                  DwVfpRegister src2) {
  int op_encoding = 0;
  switch (op) {
4650 4651 4652
    case VPADD:
      op_encoding = 0xB * B8 | B4;
      break;
4653 4654 4655 4656 4657 4658 4659 4660 4661 4662 4663 4664 4665 4666 4667 4668 4669 4670 4671 4672 4673 4674
    case VPMIN:
      op_encoding = 0xA * B8 | B4;
      break;
    case VPMAX:
      op_encoding = 0xA * B8;
      break;
    default:
      UNREACHABLE();
      break;
  }
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  int size = NeonSz(dt);
  int u = NeonU(dt);
  return 0x1E4U * B23 | u * B24 | d * B22 | size * B20 | vn * B16 | vd * B12 |
         n * B7 | m * B5 | vm | op_encoding;
}

4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 4690 4691 4692 4693 4694 4695
void Assembler::vpadd(DwVfpRegister dst, DwVfpRegister src1,
                      DwVfpRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
  // Instruction details available in ARM DDI 0406C.b, A8-982.
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);

  emit(0x1E6U * B23 | d * B22 | vn * B16 | vd * B12 | 0xD * B8 | n * B7 |
       m * B5 | vm);
}

void Assembler::vpadd(NeonSize size, DwVfpRegister dst, DwVfpRegister src1,
                      DwVfpRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Dd = vpadd(Dn, Dm) SIMD integer pairwise ADD.
  // Instruction details available in ARM DDI 0406C.b, A8-980.
4696
  emit(EncodeNeonPairwiseOp(VPADD, NeonSizeToDataType(size), dst, src1, src2));
4697 4698
}

4699 4700 4701 4702 4703 4704 4705 4706 4707 4708 4709 4710 4711 4712 4713 4714
void Assembler::vpmin(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
                      DwVfpRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Dd = vpmin(Dn, Dm) SIMD integer pairwise MIN.
  // Instruction details available in ARM DDI 0406C.b, A8-986.
  emit(EncodeNeonPairwiseOp(VPMIN, dt, dst, src1, src2));
}

void Assembler::vpmax(NeonDataType dt, DwVfpRegister dst, DwVfpRegister src1,
                      DwVfpRegister src2) {
  DCHECK(IsEnabled(NEON));
  // Dd = vpmax(Dn, Dm) SIMD integer pairwise MAX.
  // Instruction details available in ARM DDI 0406C.b, A8-986.
  emit(EncodeNeonPairwiseOp(VPMAX, dt, dst, src1, src2));
}

4715 4716
void Assembler::vtst(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4717 4718 4719
  DCHECK(IsEnabled(NEON));
  // Qd = vtst(Qn, Qm) SIMD test integer operands.
  // Instruction details available in ARM DDI 0406C.b, A8-1098.
4720
  emit(EncodeNeonBinOp(VTST, size, dst, src1, src2));
4721 4722
}

4723 4724
void Assembler::vceq(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4725
  DCHECK(IsEnabled(NEON));
4726
  // Qd = vceq(Qn, Qm) SIMD floating point compare equal.
4727
  // Instruction details available in ARM DDI 0406C.b, A8-844.
4728
  emit(EncodeNeonBinOp(VCEQF, dst, src1, src2));
4729 4730
}

4731 4732
void Assembler::vceq(NeonSize size, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4733
  DCHECK(IsEnabled(NEON));
4734
  // Qd = vceq(Qn, Qm) SIMD integer compare equal.
4735
  // Instruction details available in ARM DDI 0406C.b, A8-844.
4736
  emit(EncodeNeonBinOp(VCEQ, size, dst, src1, src2));
4737 4738
}

4739 4740
void Assembler::vcge(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4741 4742 4743
  DCHECK(IsEnabled(NEON));
  // Qd = vcge(Qn, Qm) SIMD floating point compare greater or equal.
  // Instruction details available in ARM DDI 0406C.b, A8-848.
4744
  emit(EncodeNeonBinOp(VCGEF, dst, src1, src2));
4745 4746
}

4747 4748
void Assembler::vcge(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4749 4750 4751
  DCHECK(IsEnabled(NEON));
  // Qd = vcge(Qn, Qm) SIMD integer compare greater or equal.
  // Instruction details available in ARM DDI 0406C.b, A8-848.
4752
  emit(EncodeNeonBinOp(VCGE, dt, dst, src1, src2));
4753 4754
}

4755 4756
void Assembler::vcgt(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4757 4758 4759
  DCHECK(IsEnabled(NEON));
  // Qd = vcgt(Qn, Qm) SIMD floating point compare greater than.
  // Instruction details available in ARM DDI 0406C.b, A8-852.
4760
  emit(EncodeNeonBinOp(VCGTF, dst, src1, src2));
4761 4762
}

4763 4764
void Assembler::vcgt(NeonDataType dt, QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2) {
4765 4766 4767
  DCHECK(IsEnabled(NEON));
  // Qd = vcgt(Qn, Qm) SIMD integer compare greater than.
  // Instruction details available in ARM DDI 0406C.b, A8-852.
4768
  emit(EncodeNeonBinOp(VCGT, dt, dst, src1, src2));
4769 4770
}

4771 4772
void Assembler::vext(QwNeonRegister dst, QwNeonRegister src1,
                     QwNeonRegister src2, int bytes) {
4773 4774 4775 4776 4777 4778 4779 4780 4781 4782 4783 4784 4785 4786
  DCHECK(IsEnabled(NEON));
  // Qd = vext(Qn, Qm) SIMD byte extract.
  // Instruction details available in ARM DDI 0406C.b, A8-890.
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  src1.split_code(&vn, &n);
  int vm, m;
  src2.split_code(&vm, &m);
  DCHECK_GT(16, bytes);
  emit(0x1E5U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | bytes * B8 |
       n * B7 | B6 | m * B5 | vm);
}

4787 4788
enum NeonSizedOp { VZIP, VUZP, VREV16, VREV32, VREV64, VTRN };

4789 4790
static Instr EncodeNeonSizedOp(NeonSizedOp op, NeonRegType reg_type,
                               NeonSize size, int dst_code, int src_code) {
4791 4792 4793 4794 4795 4796 4797 4798 4799 4800 4801 4802 4803 4804 4805 4806 4807 4808 4809 4810 4811 4812 4813 4814
  int op_encoding = 0;
  switch (op) {
    case VZIP:
      op_encoding = 0x2 * B16 | 0x3 * B7;
      break;
    case VUZP:
      op_encoding = 0x2 * B16 | 0x2 * B7;
      break;
    case VREV16:
      op_encoding = 0x2 * B7;
      break;
    case VREV32:
      op_encoding = 0x1 * B7;
      break;
    case VREV64:
      // op_encoding is 0;
      break;
    case VTRN:
      op_encoding = 0x2 * B16 | B7;
      break;
    default:
      UNREACHABLE();
      break;
  }
4815
  int vd, d;
4816
  NeonSplitCode(reg_type, dst_code, &vd, &d, &op_encoding);
4817
  int vm, m;
4818 4819
  NeonSplitCode(reg_type, src_code, &vm, &m, &op_encoding);

4820
  int sz = static_cast<int>(size);
4821 4822 4823 4824 4825
  return 0x1E7U * B23 | d * B22 | 0x3 * B20 | sz * B18 | vd * B12 | m * B5 |
         vm | op_encoding;
}

void Assembler::vzip(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4826 4827 4828 4829 4830 4831 4832 4833
  if (size == Neon32) {  // vzip.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
    vtrn(size, src1, src2);
  } else {
    DCHECK(IsEnabled(NEON));
    // vzip.<size>(Dn, Dm) SIMD zip (interleave).
    // Instruction details available in ARM DDI 0406C.b, A8-1102.
    emit(EncodeNeonSizedOp(VZIP, NEON_D, size, src1.code(), src2.code()));
  }
4834 4835 4836 4837
}

void Assembler::vzip(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
4838
  // vzip.<size>(Qn, Qm) SIMD zip (interleave).
4839
  // Instruction details available in ARM DDI 0406C.b, A8-1102.
4840 4841 4842 4843
  emit(EncodeNeonSizedOp(VZIP, NEON_Q, size, src1.code(), src2.code()));
}

void Assembler::vuzp(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
4844 4845 4846 4847 4848 4849 4850 4851
  if (size == Neon32) {  // vuzp.32 Dd, Dm is a pseudo-op for vtrn.32 Dd, Dm.
    vtrn(size, src1, src2);
  } else {
    DCHECK(IsEnabled(NEON));
    // vuzp.<size>(Dn, Dm) SIMD un-zip (de-interleave).
    // Instruction details available in ARM DDI 0406C.b, A8-1100.
    emit(EncodeNeonSizedOp(VUZP, NEON_D, size, src1.code(), src2.code()));
  }
4852 4853
}

4854 4855
void Assembler::vuzp(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
  DCHECK(IsEnabled(NEON));
4856
  // vuzp.<size>(Qn, Qm) SIMD un-zip (de-interleave).
4857
  // Instruction details available in ARM DDI 0406C.b, A8-1100.
4858
  emit(EncodeNeonSizedOp(VUZP, NEON_Q, size, src1.code(), src2.code()));
4859 4860 4861 4862
}

void Assembler::vrev16(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
  DCHECK(IsEnabled(NEON));
4863
  // Qd = vrev16.<size>(Qm) SIMD element reverse.
4864
  // Instruction details available in ARM DDI 0406C.b, A8-1028.
4865
  emit(EncodeNeonSizedOp(VREV16, NEON_Q, size, dst.code(), src.code()));
4866 4867
}

4868
void Assembler::vrev32(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4869
  DCHECK(IsEnabled(NEON));
4870
  // Qd = vrev32.<size>(Qm) SIMD element reverse.
4871
  // Instruction details available in ARM DDI 0406C.b, A8-1028.
4872
  emit(EncodeNeonSizedOp(VREV32, NEON_Q, size, dst.code(), src.code()));
4873 4874
}

4875
void Assembler::vrev64(NeonSize size, QwNeonRegister dst, QwNeonRegister src) {
4876
  DCHECK(IsEnabled(NEON));
4877
  // Qd = vrev64.<size>(Qm) SIMD element reverse.
4878
  // Instruction details available in ARM DDI 0406C.b, A8-1028.
4879 4880 4881 4882 4883 4884 4885 4886
  emit(EncodeNeonSizedOp(VREV64, NEON_Q, size, dst.code(), src.code()));
}

void Assembler::vtrn(NeonSize size, DwVfpRegister src1, DwVfpRegister src2) {
  DCHECK(IsEnabled(NEON));
  // vtrn.<size>(Dn, Dm) SIMD element transpose.
  // Instruction details available in ARM DDI 0406C.b, A8-1096.
  emit(EncodeNeonSizedOp(VTRN, NEON_D, size, src1.code(), src2.code()));
4887 4888
}

4889
void Assembler::vtrn(NeonSize size, QwNeonRegister src1, QwNeonRegister src2) {
4890
  DCHECK(IsEnabled(NEON));
4891
  // vtrn.<size>(Qn, Qm) SIMD element transpose.
4892
  // Instruction details available in ARM DDI 0406C.b, A8-1096.
4893
  emit(EncodeNeonSizedOp(VTRN, NEON_Q, size, src1.code(), src2.code()));
4894 4895
}

4896
// Encode NEON vtbl / vtbx instruction.
4897 4898
static Instr EncodeNeonVTB(DwVfpRegister dst, const NeonListOperand& list,
                           DwVfpRegister index, bool vtbx) {
4899 4900 4901 4902 4903 4904 4905 4906 4907 4908 4909 4910 4911 4912 4913
  // Dd = vtbl(table, Dm) SIMD vector permute, zero at out of range indices.
  // Instruction details available in ARM DDI 0406C.b, A8-1094.
  // Dd = vtbx(table, Dm) SIMD vector permute, skip out of range indices.
  // Instruction details available in ARM DDI 0406C.b, A8-1094.
  int vd, d;
  dst.split_code(&vd, &d);
  int vn, n;
  list.base().split_code(&vn, &n);
  int vm, m;
  index.split_code(&vm, &m);
  int op = vtbx ? 1 : 0;  // vtbl = 0, vtbx = 1.
  return 0x1E7U * B23 | d * B22 | 0x3 * B20 | vn * B16 | vd * B12 | 0x2 * B10 |
         list.length() * B8 | n * B7 | op * B6 | m * B5 | vm;
}

4914 4915
void Assembler::vtbl(DwVfpRegister dst, const NeonListOperand& list,
                     DwVfpRegister index) {
4916 4917 4918 4919
  DCHECK(IsEnabled(NEON));
  emit(EncodeNeonVTB(dst, list, index, false));
}

4920 4921
void Assembler::vtbx(DwVfpRegister dst, const NeonListOperand& list,
                     DwVfpRegister index) {
4922 4923 4924 4925
  DCHECK(IsEnabled(NEON));
  emit(EncodeNeonVTB(dst, list, index, true));
}

4926
// Pseudo instructions.
4927
void Assembler::nop(int type) {
4928 4929 4930 4931 4932
  // ARMv6{K/T2} and v7 have an actual NOP instruction but it serializes
  // some of the CPU's pipeline and has to issue. Older ARM chips simply used
  // MOV Rx, Rx as NOP and it performs better even in newer CPUs.
  // We therefore use MOV Rx, Rx, even on newer CPUs, and use Rx to encode
  // a type.
4933
  DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
4934 4935 4936
  emit(al | 13*B21 | type*B12 | type);
}

4937
void Assembler::pop() { add(sp, sp, Operand(kPointerSize)); }
4938

4939 4940 4941 4942
bool Assembler::IsMovT(Instr instr) {
  instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
             ((kNumRegisters-1)*B12) |            // mask out register
             EncodeMovwImmediate(0xFFFF));        // mask out immediate value
4943
  return instr == kMovtPattern;
4944 4945 4946 4947 4948 4949 4950
}


bool Assembler::IsMovW(Instr instr) {
  instr &= ~(((kNumberOfConditions - 1) << 28) |  // Mask off conditions
             ((kNumRegisters-1)*B12) |            // mask out destination
             EncodeMovwImmediate(0xFFFF));        // mask out immediate value
4951 4952 4953 4954 4955 4956 4957 4958 4959 4960 4961
  return instr == kMovwPattern;
}


Instr Assembler::GetMovTPattern() { return kMovtPattern; }


Instr Assembler::GetMovWPattern() { return kMovwPattern; }


Instr Assembler::EncodeMovwImmediate(uint32_t immediate) {
4962
  DCHECK_LT(immediate, 0x10000);
4963
  return ((immediate & 0xF000) << 4) | (immediate & 0xFFF);
4964 4965 4966 4967
}


Instr Assembler::PatchMovwImmediate(Instr instruction, uint32_t immediate) {
4968
  instruction &= ~EncodeMovwImmediate(0xFFFF);
4969
  return instruction | EncodeMovwImmediate(immediate);
4970 4971 4972
}


4973 4974 4975
int Assembler::DecodeShiftImm(Instr instr) {
  int rotate = Instruction::RotateValue(instr) * 2;
  int immed8 = Instruction::Immed8Value(instr);
4976
  return base::bits::RotateRight32(immed8, rotate);
4977 4978 4979 4980 4981 4982
}


Instr Assembler::PatchShiftImm(Instr instr, int immed) {
  uint32_t rotate_imm = 0;
  uint32_t immed_8 = 0;
4983
  bool immed_fits = FitsShifter(immed, &rotate_imm, &immed_8, nullptr);
4984
  DCHECK(immed_fits);
4985
  USE(immed_fits);
4986 4987 4988 4989
  return (instr & ~kOff12Mask) | (rotate_imm << 8) | immed_8;
}


4990
bool Assembler::IsNop(Instr instr, int type) {
4991
  DCHECK(0 <= type && type <= 14);  // mov pc, pc isn't a nop.
4992
  // Check for mov rx, rx where x = type.
4993 4994 4995 4996
  return instr == (al | 13*B21 | type*B12 | type);
}


4997 4998 4999 5000 5001 5002 5003 5004 5005 5006
bool Assembler::IsMovImmed(Instr instr) {
  return (instr & kMovImmedMask) == kMovImmedPattern;
}


bool Assembler::IsOrrImmed(Instr instr) {
  return (instr & kOrrImmedMask) == kOrrImmedPattern;
}


5007
// static
5008 5009 5010
bool Assembler::ImmediateFitsAddrMode1Instruction(int32_t imm32) {
  uint32_t dummy1;
  uint32_t dummy2;
5011
  return FitsShifter(imm32, &dummy1, &dummy2, nullptr);
5012 5013 5014
}


5015 5016 5017 5018 5019
bool Assembler::ImmediateFitsAddrMode2Instruction(int32_t imm32) {
  return is_uint12(abs(imm32));
}


5020
// Debugging.
5021 5022 5023 5024 5025 5026
void Assembler::RecordConstPool(int size) {
  // We only need this for debugger support, to correctly compute offsets in the
  // code.
  RecordRelocInfo(RelocInfo::CONST_POOL, static_cast<intptr_t>(size));
}

5027

5028 5029 5030
void Assembler::GrowBuffer() {
  if (!own_buffer_) FATAL("external code buffer is too small");

5031
  // Compute new buffer size.
5032
  CodeDesc desc;  // the new buffer
5033
  if (buffer_size_ < 1 * MB) {
5034 5035 5036 5037
    desc.buffer_size = 2*buffer_size_;
  } else {
    desc.buffer_size = buffer_size_ + 1*MB;
  }
5038 5039 5040

  // Some internal data structures overflow for very large buffers,
  // they must ensure that kMaximalBufferSize is not too large.
5041
  if (desc.buffer_size > kMaximalBufferSize) {
5042
    V8::FatalProcessOutOfMemory(nullptr, "Assembler::GrowBuffer");
5043
  }
5044

5045
  // Set up new buffer.
5046 5047 5048 5049
  desc.buffer = NewArray<byte>(desc.buffer_size);

  desc.instr_size = pc_offset();
  desc.reloc_size = (buffer_ + buffer_size_) - reloc_info_writer.pos();
jochen's avatar
jochen committed
5050
  desc.origin = this;
5051

5052
  // Copy the data.
5053 5054
  int pc_delta = desc.buffer - buffer_;
  int rc_delta = (desc.buffer + desc.buffer_size) - (buffer_ + buffer_size_);
5055 5056 5057
  MemMove(desc.buffer, buffer_, desc.instr_size);
  MemMove(reloc_info_writer.pos() + rc_delta, reloc_info_writer.pos(),
          desc.reloc_size);
5058

5059
  // Switch buffers.
5060 5061 5062 5063 5064 5065 5066
  DeleteArray(buffer_);
  buffer_ = desc.buffer;
  buffer_size_ = desc.buffer_size;
  pc_ += pc_delta;
  reloc_info_writer.Reposition(reloc_info_writer.pos() + rc_delta,
                               reloc_info_writer.last_pc() + pc_delta);

5067 5068 5069
  // None of our relocation types are pc relative pointing outside the code
  // buffer nor pc absolute pointing inside the code buffer, so there is no need
  // to relocate any emitted relocation entries.
5070 5071 5072
}


5073
void Assembler::db(uint8_t data) {
5074 5075
  // db is used to write raw data. The constant pool should be emitted or
  // blocked before using db.
5076 5077
  DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
  DCHECK(is_const_pool_blocked() || pending_64_bit_constants_.empty());
5078 5079 5080 5081 5082 5083 5084
  CheckBuffer();
  *reinterpret_cast<uint8_t*>(pc_) = data;
  pc_ += sizeof(uint8_t);
}


void Assembler::dd(uint32_t data) {
5085 5086
  // dd is used to write raw data. The constant pool should be emitted or
  // blocked before using dd.
5087 5088
  DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
  DCHECK(is_const_pool_blocked() || pending_64_bit_constants_.empty());
5089 5090 5091 5092 5093 5094
  CheckBuffer();
  *reinterpret_cast<uint32_t*>(pc_) = data;
  pc_ += sizeof(uint32_t);
}


5095
void Assembler::dq(uint64_t value) {
5096 5097
  // dq is used to write raw data. The constant pool should be emitted or
  // blocked before using dq.
5098 5099
  DCHECK(is_const_pool_blocked() || pending_32_bit_constants_.empty());
  DCHECK(is_const_pool_blocked() || pending_64_bit_constants_.empty());
5100 5101 5102 5103 5104
  CheckBuffer();
  *reinterpret_cast<uint64_t*>(pc_) = value;
  pc_ += sizeof(uint64_t);
}

5105
void Assembler::RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data) {
5106
  if (options().disable_reloc_info_for_patching) return;
5107 5108
  if (RelocInfo::IsNone(rmode) ||
      // Don't record external references unless the heap will be serialized.
5109 5110
      (RelocInfo::IsOnlyForSerializer(rmode) &&
       !options().record_reloc_info_for_serialization && !emit_debug_code())) {
5111 5112
    return;
  }
5113
  DCHECK_GE(buffer_space(), kMaxRelocSize);  // too late to grow buffer here
5114
  RelocInfo rinfo(reinterpret_cast<Address>(pc_), rmode, data, nullptr);
5115
  reloc_info_writer.Write(&rinfo);
5116 5117
}

5118 5119
void Assembler::ConstantPoolAddEntry(int position, RelocInfo::Mode rmode,
                                     intptr_t value) {
5120
  DCHECK(rmode != RelocInfo::COMMENT && rmode != RelocInfo::CONST_POOL);
5121 5122 5123 5124 5125 5126
  // We can share CODE_TARGETs because we don't patch the code objects anymore,
  // and we make sure we emit only one reloc info for them (thus delta patching)
  // will apply the delta only once. At the moment, we do not dedup code targets
  // if they are wrapped in a heap object request (value == 0).
  bool sharing_ok = RelocInfo::IsShareableRelocMode(rmode) ||
                    (rmode == RelocInfo::CODE_TARGET && value != 0);
5127
  DCHECK_LT(pending_32_bit_constants_.size(), kMaxNumPending32Constants);
5128 5129
  if (pending_32_bit_constants_.empty()) {
    first_const_pool_32_use_ = position;
5130
  }
5131
  ConstantPoolEntry entry(position, value, sharing_ok, rmode);
5132 5133 5134 5135 5136 5137 5138

  bool shared = false;
  if (sharing_ok) {
    // Merge the constant, if possible.
    for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
      ConstantPoolEntry& current_entry = pending_32_bit_constants_[i];
      if (!current_entry.sharing_ok()) continue;
5139 5140
      if (entry.value() == current_entry.value() &&
          entry.rmode() == current_entry.rmode()) {
5141 5142 5143 5144 5145 5146 5147
        entry.set_merged_index(i);
        shared = true;
        break;
      }
    }
  }

5148 5149 5150 5151 5152
  pending_32_bit_constants_.push_back(entry);

  // Make sure the constant pool is not emitted in place of the next
  // instruction for which we just recorded relocation info.
  BlockConstPoolFor(1);
5153

5154
  // Emit relocation info.
5155
  if (MustOutputRelocInfo(rmode, this) && !shared) {
5156 5157 5158
    RecordRelocInfo(rmode);
  }
}
5159

5160 5161 5162
void Assembler::BlockConstPoolFor(int instructions) {
  int pc_limit = pc_offset() + instructions * kInstrSize;
  if (no_const_pool_before_ < pc_limit) {
5163 5164 5165
    // Max pool start (if we need a jump and an alignment).
#ifdef DEBUG
    int start = pc_limit + kInstrSize + 2 * kPointerSize;
5166
    DCHECK(pending_32_bit_constants_.empty() ||
5167
           (start - first_const_pool_32_use_ +
5168
                pending_64_bit_constants_.size() * kDoubleSize <
5169
            kMaxDistToIntPool));
5170
    DCHECK(pending_64_bit_constants_.empty() ||
5171 5172
           (start - first_const_pool_64_use_ < kMaxDistToFPPool));
#endif
5173
    no_const_pool_before_ = pc_limit;
5174 5175
  }

5176 5177 5178 5179
  if (next_buffer_check_ < no_const_pool_before_) {
    next_buffer_check_ = no_const_pool_before_;
  }
}
5180 5181


5182 5183 5184 5185 5186
void Assembler::CheckConstPool(bool force_emit, bool require_jump) {
  // Some short sequence of instruction mustn't be broken up by constant pool
  // emission, such sequences are protected by calls to BlockConstPoolFor and
  // BlockConstPoolScope.
  if (is_const_pool_blocked()) {
5187
    // Something is wrong if emission is forced and blocked at the same time.
5188
    DCHECK(!force_emit);
5189 5190 5191
    return;
  }

5192
  // There is nothing to do if there are no pending constant pool entries.
5193
  if (pending_32_bit_constants_.empty() && pending_64_bit_constants_.empty()) {
5194 5195 5196 5197 5198
    // Calculate the offset of the next check.
    next_buffer_check_ = pc_offset() + kCheckPoolInterval;
    return;
  }

5199
  // Check that the code buffer is large enough before emitting the constant
5200 5201 5202
  // pool (include the jump over the pool and the constant pool marker and
  // the gap to the relocation information).
  int jump_instr = require_jump ? kInstrSize : 0;
5203
  int size_up_to_marker = jump_instr + kInstrSize;
5204
  int estimated_size_after_marker =
5205 5206 5207
      pending_32_bit_constants_.size() * kPointerSize;
  bool has_int_values = !pending_32_bit_constants_.empty();
  bool has_fp_values = !pending_64_bit_constants_.empty();
5208 5209
  bool require_64_bit_align = false;
  if (has_fp_values) {
5210 5211 5212
    require_64_bit_align =
        !IsAligned(reinterpret_cast<intptr_t>(pc_ + size_up_to_marker),
                   kDoubleAlignment);
5213
    if (require_64_bit_align) {
5214
      estimated_size_after_marker += kInstrSize;
5215
    }
5216 5217
    estimated_size_after_marker +=
        pending_64_bit_constants_.size() * kDoubleSize;
5218
  }
5219
  int estimated_size = size_up_to_marker + estimated_size_after_marker;
5220 5221 5222 5223 5224 5225 5226 5227 5228 5229

  // We emit a constant pool when:
  //  * requested to do so by parameter force_emit (e.g. after each function).
  //  * the distance from the first instruction accessing the constant pool to
  //    any of the constant pool entries will exceed its limit the next
  //    time the pool is checked. This is overly restrictive, but we don't emit
  //    constant pool entries in-order so it's conservatively correct.
  //  * the instruction doesn't require a jump after itself to jump over the
  //    constant pool, and we're getting close to running out of range.
  if (!force_emit) {
5230
    DCHECK(has_fp_values || has_int_values);
5231
    bool need_emit = false;
5232
    if (has_fp_values) {
5233 5234
      // The 64-bit constants are always emitted before the 32-bit constants, so
      // we can ignore the effect of the 32-bit constants on estimated_size.
5235
      int dist64 = pc_offset() + estimated_size -
5236
                   pending_32_bit_constants_.size() * kPointerSize -
5237 5238 5239 5240
                   first_const_pool_64_use_;
      if ((dist64 >= kMaxDistToFPPool - kCheckPoolInterval) ||
          (!require_jump && (dist64 >= kMaxDistToFPPool / 2))) {
        need_emit = true;
5241 5242
      }
    }
5243 5244 5245 5246 5247 5248
    if (has_int_values) {
      int dist32 = pc_offset() + estimated_size - first_const_pool_32_use_;
      if ((dist32 >= kMaxDistToIntPool - kCheckPoolInterval) ||
          (!require_jump && (dist32 >= kMaxDistToIntPool / 2))) {
        need_emit = true;
      }
5249 5250
    }
    if (!need_emit) return;
5251 5252
  }

5253 5254
  // Deduplicate constants.
  int size_after_marker = estimated_size_after_marker;
5255
  for (size_t i = 0; i < pending_64_bit_constants_.size(); i++) {
5256
    ConstantPoolEntry& entry = pending_64_bit_constants_[i];
5257
    if (entry.is_merged()) size_after_marker -= kDoubleSize;
5258 5259
  }

5260
  for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5261
    ConstantPoolEntry& entry = pending_32_bit_constants_[i];
5262
    if (entry.is_merged()) size_after_marker -= kPointerSize;
5263 5264 5265 5266
  }

  int size = size_up_to_marker + size_after_marker;

5267
  int needed_space = size + kGap;
5268 5269 5270 5271 5272
  while (buffer_space() <= needed_space) GrowBuffer();

  {
    // Block recursive calls to CheckConstPool.
    BlockConstPoolScope block_const_pool(this);
5273 5274
    RecordComment("[ Constant Pool");
    RecordConstPool(size);
5275

5276 5277 5278
    Label size_check;
    bind(&size_check);

5279
    // Emit jump over constant pool if necessary.
5280 5281 5282 5283
    Label after_pool;
    if (require_jump) {
      b(&after_pool);
    }
5284

5285
    // Put down constant pool marker "Undefined instruction".
5286
    // The data size helps disassembly know what to print.
5287 5288
    emit(kConstantPoolMarker |
         EncodeConstantPoolLength(size_after_marker / kPointerSize));
5289

5290 5291 5292 5293 5294 5295
    if (require_64_bit_align) {
      emit(kConstantPoolMarker);
    }

    // Emit 64-bit constant pool entries first: their range is smaller than
    // 32-bit entries.
5296
    for (size_t i = 0; i < pending_64_bit_constants_.size(); i++) {
5297
      ConstantPoolEntry& entry = pending_64_bit_constants_[i];
5298

5299
      Instr instr = instr_at(entry.position());
5300
      // Instruction to patch must be 'vldr rd, [pc, #offset]' with offset == 0.
5301
      DCHECK((IsVldrDPcImmediateOffset(instr) &&
5302 5303
              GetVldrDRegisterImmediateOffset(instr) == 0));

5304
      int delta = pc_offset() - entry.position() - Instruction::kPcLoadDelta;
5305
      DCHECK(is_uint10(delta));
5306

5307 5308 5309 5310 5311 5312 5313 5314
      if (entry.is_merged()) {
        ConstantPoolEntry& merged =
            pending_64_bit_constants_[entry.merged_index()];
        DCHECK(entry.value64() == merged.value64());
        Instr merged_instr = instr_at(merged.position());
        DCHECK(IsVldrDPcImmediateOffset(merged_instr));
        delta = GetVldrDRegisterImmediateOffset(merged_instr);
        delta += merged.position() - entry.position();
5315
      }
5316 5317
      instr_at_put(entry.position(),
                   SetVldrDRegisterImmediateOffset(instr, delta));
5318 5319
      if (!entry.is_merged()) {
        DCHECK(IsAligned(reinterpret_cast<intptr_t>(pc_), kDoubleAlignment));
5320
        dq(entry.value64());
5321
      }
5322 5323 5324
    }

    // Emit 32-bit constant pool entries.
5325
    for (size_t i = 0; i < pending_32_bit_constants_.size(); i++) {
5326 5327
      ConstantPoolEntry& entry = pending_32_bit_constants_[i];
      Instr instr = instr_at(entry.position());
5328 5329

      // 64-bit loads shouldn't get here.
5330
      DCHECK(!IsVldrDPcImmediateOffset(instr));
5331 5332 5333
      DCHECK(!IsMovW(instr));
      DCHECK(IsLdrPcImmediateOffset(instr) &&
             GetLdrRegisterImmediateOffset(instr) == 0);
5334

5335
      int delta = pc_offset() - entry.position() - Instruction::kPcLoadDelta;
5336 5337 5338 5339 5340 5341 5342 5343 5344 5345 5346 5347 5348 5349 5350 5351 5352 5353 5354 5355
      DCHECK(is_uint12(delta));
      // 0 is the smallest delta:
      //   ldr rd, [pc, #0]
      //   constant pool marker
      //   data

      if (entry.is_merged()) {
        DCHECK(entry.sharing_ok());
        ConstantPoolEntry& merged =
            pending_32_bit_constants_[entry.merged_index()];
        DCHECK(entry.value() == merged.value());
        Instr merged_instr = instr_at(merged.position());
        DCHECK(IsLdrPcImmediateOffset(merged_instr));
        delta = GetLdrRegisterImmediateOffset(merged_instr);
        delta += merged.position() - entry.position();
      }
      instr_at_put(entry.position(),
                   SetLdrRegisterImmediateOffset(instr, delta));
      if (!entry.is_merged()) {
        emit(entry.value());
5356
      }
5357
    }
5358

5359 5360
    pending_32_bit_constants_.clear();
    pending_64_bit_constants_.clear();
5361

5362 5363
    first_const_pool_32_use_ = -1;
    first_const_pool_64_use_ = -1;
5364 5365 5366

    RecordComment("]");

5367
    DCHECK_EQ(size, SizeOfCodeGeneratedSince(&size_check));
5368 5369 5370 5371

    if (after_pool.is_linked()) {
      bind(&after_pool);
    }
5372 5373 5374 5375
  }

  // Since a constant pool was just emitted, move the check offset forward by
  // the standard interval.
5376
  next_buffer_check_ = pc_offset() + kCheckPoolInterval;
5377 5378
}

5379 5380
PatchingAssembler::PatchingAssembler(const AssemblerOptions& options,
                                     byte* address, int instructions)
5381
    : Assembler(options, address, instructions * kInstrSize + kGap) {
5382 5383 5384 5385 5386 5387 5388 5389 5390 5391 5392 5393 5394
  DCHECK_EQ(reloc_info_writer.pos(), buffer_ + buffer_size_);
}

PatchingAssembler::~PatchingAssembler() {
  // Check that we don't have any pending constant pools.
  DCHECK(pending_32_bit_constants_.empty());
  DCHECK(pending_64_bit_constants_.empty());

  // Check that the code was patched as expected.
  DCHECK_EQ(pc_, buffer_ + buffer_size_ - kGap);
  DCHECK_EQ(reloc_info_writer.pos(), buffer_ + buffer_size_);
}

5395
void PatchingAssembler::Emit(Address addr) { emit(static_cast<Instr>(addr)); }
5396

5397
UseScratchRegisterScope::UseScratchRegisterScope(Assembler* assembler)
5398 5399 5400
    : assembler_(assembler),
      old_available_(*assembler->GetScratchRegisterList()),
      old_available_vfp_(*assembler->GetScratchVfpRegisterList()) {}
5401 5402

UseScratchRegisterScope::~UseScratchRegisterScope() {
5403 5404
  *assembler_->GetScratchRegisterList() = old_available_;
  *assembler_->GetScratchVfpRegisterList() = old_available_vfp_;
5405 5406 5407
}

Register UseScratchRegisterScope::Acquire() {
5408 5409 5410 5411 5412 5413 5414
  RegList* available = assembler_->GetScratchRegisterList();
  DCHECK_NOT_NULL(available);
  DCHECK_NE(*available, 0);
  int index = static_cast<int>(base::bits::CountTrailingZeros32(*available));
  Register reg = Register::from_code(index);
  *available &= ~reg.bit();
  return reg;
5415 5416
}

5417 5418
}  // namespace internal
}  // namespace v8
5419

5420
#endif  // V8_TARGET_ARCH_ARM