assembler-x64.h 52.4 KB
Newer Older
1 2 3
// Copyright (c) 1994-2006 Sun Microsystems Inc.
// All Rights Reserved.
//
4 5 6 7
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
8 9 10 11 12 13
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// - Redistribution in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
14
//
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
// - Neither the name of Sun Microsystems or the names of contributors may
// be used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// The original source code covered by the above license above has been
// modified significantly by Google Inc.
33
// Copyright 2012 the V8 project authors. All rights reserved.
34 35

// A lightweight X64 Assembler.
36

37 38 39
#ifndef V8_X64_ASSEMBLER_X64_H_
#define V8_X64_ASSEMBLER_X64_H_

40 41
#include "serialize.h"

42 43
namespace v8 {
namespace internal {
44

45 46 47
// Utility functions

// Test whether a 64-bit value is in a specific range.
48
inline bool is_uint32(int64_t x) {
49 50
  static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
  return static_cast<uint64_t>(x) <= kMaxUInt32;
51 52
}

53
inline bool is_int32(int64_t x) {
54 55
  static const int64_t kMinInt32 = -V8_INT64_C(0x80000000);
  return is_uint32(x - kMinInt32);
lrn@chromium.org's avatar
lrn@chromium.org committed
56 57
}

58
inline bool uint_is_int32(uint64_t x) {
59 60
  static const uint64_t kMaxInt32 = V8_UINT64_C(0x7fffffff);
  return x <= kMaxInt32;
lrn@chromium.org's avatar
lrn@chromium.org committed
61 62
}

63
inline bool is_uint32(uint64_t x) {
64 65
  static const uint64_t kMaxUInt32 = V8_UINT64_C(0xffffffff);
  return x <= kMaxUInt32;
66 67
}

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
// CPU Registers.
//
// 1) We would prefer to use an enum, but enum values are assignment-
// compatible with int, which has caused code-generation bugs.
//
// 2) We would prefer to use a class instead of a struct but we don't like
// the register initialization to depend on the particular initialization
// order (which appears to be different on OS X, Linux, and Windows for the
// installed versions of C++ we tried). Using a struct permits C-style
// "initialization". Also, the Register objects cannot be const as this
// forces initialization stubs in MSVC, making us dependent on initialization
// order.
//
// 3) By not using an enum, we are possibly preventing the compiler from
// doing certain constant folds, which may significantly reduce the
// code generated for some assembly instructions (because they boil down
// to a few constants). If this is a problem, we could change the code
// such that we use an enum in optimized mode, and the struct in debug
// mode. This way we get the compile-time error checking in debug mode
// and best performance in optimized code.
//

90
struct Register {
91 92 93 94 95
  // The non-allocatable registers are:
  //  rsp - stack pointer
  //  rbp - frame pointer
  //  rsi - context register
  //  r10 - fixed scratch register
96
  //  r12 - smi constant register
97
  //  r13 - root register
98 99 100 101
  static const int kMaxNumAllocatableRegisters = 10;
  static int NumAllocatableRegisters() {
    return kMaxNumAllocatableRegisters;
  }
102 103
  static const int kNumRegisters = 16;

104
  static int ToAllocationIndex(Register reg) {
105
    return kAllocationIndexByRegisterCode[reg.code()];
106 107 108
  }

  static Register FromAllocationIndex(int index) {
109
    ASSERT(index >= 0 && index < kMaxNumAllocatableRegisters);
110
    Register result = { kRegisterCodeByAllocationIndex[index] };
111 112 113
    return result;
  }

114
  static const char* AllocationIndexToString(int index) {
115
    ASSERT(index >= 0 && index < kMaxNumAllocatableRegisters);
116 117 118
    const char* const names[] = {
      "rax",
      "rbx",
119 120
      "rdx",
      "rcx",
121 122 123 124
      "rdi",
      "r8",
      "r9",
      "r11",
125
      "r14",
126
      "r15"
127 128 129 130
    };
    return names[index];
  }

131
  static Register from_code(int code) {
132
    Register r = { code };
133 134
    return r;
  }
135
  bool is_valid() const { return 0 <= code_ && code_ < kNumRegisters; }
136
  bool is(Register reg) const { return code_ == reg.code_; }
137 138
  // rax, rbx, rcx and rdx are byte registers, the rest are not.
  bool is_byte_register() const { return code_ <= 3; }
139
  int code() const {
140 141 142
    ASSERT(is_valid());
    return code_;
  }
143
  int bit() const {
144
    return 1 << code_;
145 146
  }

147 148 149 150 151 152 153 154 155 156 157
  // Return the high bit of the register code as a 0 or 1.  Used often
  // when constructing the REX prefix byte.
  int high_bit() const {
    return code_ >> 3;
  }
  // Return the 3 low bits of the register code.  Used when encoding registers
  // in modR/M, SIB, and opcode bytes.
  int low_bits() const {
    return code_ & 0x7;
  }

158 159
  // Unfortunately we can't make this private in a struct when initializing
  // by assignment.
160
  int code_;
161

162
 private:
163
  static const int kRegisterCodeByAllocationIndex[kMaxNumAllocatableRegisters];
164
  static const int kAllocationIndexByRegisterCode[kNumRegisters];
165 166
};

167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
const int kRegister_rax_Code = 0;
const int kRegister_rcx_Code = 1;
const int kRegister_rdx_Code = 2;
const int kRegister_rbx_Code = 3;
const int kRegister_rsp_Code = 4;
const int kRegister_rbp_Code = 5;
const int kRegister_rsi_Code = 6;
const int kRegister_rdi_Code = 7;
const int kRegister_r8_Code = 8;
const int kRegister_r9_Code = 9;
const int kRegister_r10_Code = 10;
const int kRegister_r11_Code = 11;
const int kRegister_r12_Code = 12;
const int kRegister_r13_Code = 13;
const int kRegister_r14_Code = 14;
const int kRegister_r15_Code = 15;
const int kRegister_no_reg_Code = -1;

const Register rax = { kRegister_rax_Code };
const Register rcx = { kRegister_rcx_Code };
const Register rdx = { kRegister_rdx_Code };
const Register rbx = { kRegister_rbx_Code };
const Register rsp = { kRegister_rsp_Code };
const Register rbp = { kRegister_rbp_Code };
const Register rsi = { kRegister_rsi_Code };
const Register rdi = { kRegister_rdi_Code };
const Register r8 = { kRegister_r8_Code };
const Register r9 = { kRegister_r9_Code };
const Register r10 = { kRegister_r10_Code };
const Register r11 = { kRegister_r11_Code };
const Register r12 = { kRegister_r12_Code };
const Register r13 = { kRegister_r13_Code };
const Register r14 = { kRegister_r14_Code };
const Register r15 = { kRegister_r15_Code };
const Register no_reg = { kRegister_no_reg_Code };
202

203 204
#ifdef _WIN64
  // Windows calling convention
205 206 207 208
  const Register arg_reg_1 = { kRegister_rcx_Code };
  const Register arg_reg_2 = { kRegister_rdx_Code };
  const Register arg_reg_3 = { kRegister_r8_Code };
  const Register arg_reg_4 = { kRegister_r9_Code };
209 210
#else
  // AMD64 calling convention
211 212 213 214
  const Register arg_reg_1 = { kRegister_rdi_Code };
  const Register arg_reg_2 = { kRegister_rsi_Code };
  const Register arg_reg_3 = { kRegister_rdx_Code };
  const Register arg_reg_4 = { kRegister_rcx_Code };
215
#endif  // _WIN64
216

217
struct XMMRegister {
218
  static const int kMaxNumRegisters = 16;
219 220 221 222
  static const int kMaxNumAllocatableRegisters = 15;
  static int NumAllocatableRegisters() {
    return kMaxNumAllocatableRegisters;
  }
223 224 225 226 227 228

  static int ToAllocationIndex(XMMRegister reg) {
    ASSERT(reg.code() != 0);
    return reg.code() - 1;
  }

229
  static XMMRegister FromAllocationIndex(int index) {
230
    ASSERT(0 <= index && index < kMaxNumAllocatableRegisters);
231 232 233 234
    XMMRegister result = { index + 1 };
    return result;
  }

235
  static const char* AllocationIndexToString(int index) {
236
    ASSERT(index >= 0 && index < kMaxNumAllocatableRegisters);
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
    const char* const names[] = {
      "xmm1",
      "xmm2",
      "xmm3",
      "xmm4",
      "xmm5",
      "xmm6",
      "xmm7",
      "xmm8",
      "xmm9",
      "xmm10",
      "xmm11",
      "xmm12",
      "xmm13",
      "xmm14",
      "xmm15"
    };
    return names[index];
  }

257 258
  static XMMRegister from_code(int code) {
    ASSERT(code >= 0);
259
    ASSERT(code < kMaxNumRegisters);
260 261 262
    XMMRegister r = { code };
    return r;
  }
263
  bool is_valid() const { return 0 <= code_ && code_ < kMaxNumRegisters; }
264
  bool is(XMMRegister reg) const { return code_ == reg.code_; }
265
  int code() const {
266 267 268 269
    ASSERT(is_valid());
    return code_;
  }

270 271 272 273 274 275 276 277 278 279 280
  // Return the high bit of the register code as a 0 or 1.  Used often
  // when constructing the REX prefix byte.
  int high_bit() const {
    return code_ >> 3;
  }
  // Return the 3 low bits of the register code.  Used when encoding registers
  // in modR/M, SIB, and opcode bytes.
  int low_bits() const {
    return code_ & 0x7;
  }

281 282 283
  int code_;
};

284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
const XMMRegister xmm0 = { 0 };
const XMMRegister xmm1 = { 1 };
const XMMRegister xmm2 = { 2 };
const XMMRegister xmm3 = { 3 };
const XMMRegister xmm4 = { 4 };
const XMMRegister xmm5 = { 5 };
const XMMRegister xmm6 = { 6 };
const XMMRegister xmm7 = { 7 };
const XMMRegister xmm8 = { 8 };
const XMMRegister xmm9 = { 9 };
const XMMRegister xmm10 = { 10 };
const XMMRegister xmm11 = { 11 };
const XMMRegister xmm12 = { 12 };
const XMMRegister xmm13 = { 13 };
const XMMRegister xmm14 = { 14 };
const XMMRegister xmm15 = { 15 };
300

301 302 303 304

typedef XMMRegister DoubleRegister;


305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325
enum Condition {
  // any value < 0 is considered no_condition
  no_condition  = -1,

  overflow      =  0,
  no_overflow   =  1,
  below         =  2,
  above_equal   =  3,
  equal         =  4,
  not_equal     =  5,
  below_equal   =  6,
  above         =  7,
  negative      =  8,
  positive      =  9,
  parity_even   = 10,
  parity_odd    = 11,
  less          = 12,
  greater_equal = 13,
  less_equal    = 14,
  greater       = 15,

326 327 328 329
  // Fake conditions that are handled by the
  // opcodes using them.
  always        = 16,
  never         = 17,
330 331 332 333 334 335
  // aliases
  carry         = below,
  not_carry     = above_equal,
  zero          = equal,
  not_zero      = not_equal,
  sign          = negative,
336 337
  not_sign      = positive,
  last_condition = greater
338 339
};

340 341 342 343 344

// Returns the equivalent of !cc.
// Negation of the default no_condition (-1) results in a non-default
// no_condition value (-2). As long as tests for no_condition check
// for condition < 0, this will work as expected.
345 346 347 348
inline Condition NegateCondition(Condition cc) {
  return static_cast<Condition>(cc ^ 1);
}

349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373

// Corresponds to transposing the operands of a comparison.
inline Condition ReverseCondition(Condition cc) {
  switch (cc) {
    case below:
      return above;
    case above:
      return below;
    case above_equal:
      return below_equal;
    case below_equal:
      return above_equal;
    case less:
      return greater;
    case greater:
      return less;
    case greater_equal:
      return less_equal;
    case less_equal:
      return greater_equal;
    default:
      return cc;
  };
}

374

375 376 377 378 379
// -----------------------------------------------------------------------------
// Machine instruction Immediates

class Immediate BASE_EMBEDDED {
 public:
380
  explicit Immediate(int32_t value) : value_(value) {}
381 382

 private:
383
  int32_t value_;
384 385 386 387 388 389 390 391 392

  friend class Assembler;
};


// -----------------------------------------------------------------------------
// Machine instruction Operands

enum ScaleFactor {
393 394 395 396 397 398
  times_1 = 0,
  times_2 = 1,
  times_4 = 2,
  times_8 = 3,
  times_int_size = times_4,
  times_pointer_size = times_8
399 400 401 402 403 404
};


class Operand BASE_EMBEDDED {
 public:
  // [base + disp/r]
405
  Operand(Register base, int32_t disp);
406 407

  // [base + index*scale + disp/r]
408 409 410 411
  Operand(Register base,
          Register index,
          ScaleFactor scale,
          int32_t disp);
412 413

  // [index*scale + disp/r]
414 415 416
  Operand(Register index,
          ScaleFactor scale,
          int32_t disp);
417

418 419 420 421 422
  // Offset from existing memory operand.
  // Offset is added to existing displacement as 32-bit signed values and
  // this must not overflow.
  Operand(const Operand& base, int32_t offset);

423 424 425 426
  // Checks whether either base or index register is the given register.
  // Does not check the "reg" part of the Operand.
  bool AddressUsesRegister(Register reg) const;

lrn@chromium.org's avatar
lrn@chromium.org committed
427 428 429 430 431 432 433
  // Queries related to the size of the generated instruction.
  // Whether the generated instruction will have a REX prefix.
  bool requires_rex() const { return rex_ != 0; }
  // Size of the ModR/M, SIB and displacement parts of the generated
  // instruction.
  int operand_size() const { return len_; }

434 435
 private:
  byte rex_;
436
  byte buf_[6];
437 438
  // The number of bytes of buf_ in use.
  byte len_;
439

440
  // Set the ModR/M byte without an encoded 'reg' register. The
441
  // register is encoded later as part of the emit_operand operation.
442
  // set_modrm can be called before or after set_sib and set_disp*.
443 444
  inline void set_modrm(int mod, Register rm);

445
  // Set the SIB byte if one is needed. Sets the length to 2 rather than 1.
446 447
  inline void set_sib(ScaleFactor scale, Register index, Register base);

448 449 450 451
  // Adds operand displacement fields (offsets added to the memory address).
  // Needs to be called after set_sib, not before it.
  inline void set_disp8(int disp);
  inline void set_disp32(int disp);
452

453
  friend class Assembler;
454 455 456 457
};


// CpuFeatures keeps track of which features are supported by the target CPU.
458
// Supported features must be enabled by a CpuFeatureScope before use.
459
// Example:
460 461
//   if (assembler->IsSupported(SSE3)) {
//     CpuFeatureScope fscope(assembler, SSE3);
462
//     // Generate SSE3 floating point code.
463
//   } else {
464
//     // Generate standard SSE2 floating point code.
465
//   }
466
class CpuFeatures : public AllStatic {
467 468 469
 public:
  // Detect features of the target CPU. Set safe defaults if the serializer
  // is enabled (snapshots must be portable).
470
  static void Probe();
471

472
  // Check whether a feature is supported by the target CPU.
473 474
  static bool IsSupported(CpuFeature f) {
    ASSERT(initialized_);
475
    if (f == SSE3 && !FLAG_enable_sse3) return false;
476
    if (f == SSE4_1 && !FLAG_enable_sse4_1) return false;
477 478 479
    if (f == CMOV && !FLAG_enable_cmov) return false;
    if (f == RDTSC && !FLAG_enable_rdtsc) return false;
    if (f == SAHF && !FLAG_enable_sahf) return false;
480
    return (supported_ & (static_cast<uint64_t>(1) << f)) != 0;
481
  }
482

483
  static bool IsFoundByRuntimeProbingOnly(CpuFeature f) {
484
    ASSERT(initialized_);
485 486
    return (found_by_runtime_probing_only_ &
            (static_cast<uint64_t>(1) << f)) != 0;
487
  }
488

489 490 491 492 493
  static bool IsSafeForSnapshot(CpuFeature f) {
    return (IsSupported(f) &&
            (!Serializer::enabled() || !IsFoundByRuntimeProbingOnly(f)));
  }

494
 private:
495
  // Safe defaults include CMOV for X64. It is always available, if
496
  // anyone checks, but they shouldn't need to check.
497 498
  // The required user mode extensions in X64 are (from AMD64 ABI Table A.1):
  //   fpu, tsc, cx8, cmov, mmx, sse, sse2, fxsr, syscall
499
  static const uint64_t kDefaultCpuFeatures = (1 << CMOV);
500

501 502 503 504
#ifdef DEBUG
  static bool initialized_;
#endif
  static uint64_t supported_;
505
  static uint64_t found_by_runtime_probing_only_;
506

507
  friend class ExternalReference;
508
  DISALLOW_COPY_AND_ASSIGN(CpuFeatures);
509 510 511
};


512
class Assembler : public AssemblerBase {
513
 private:
514 515 516
  // We check before assembling an instruction that there is sufficient
  // space to write an instruction and its relocation information.
  // The relocation writer's position must be kGap bytes above the end of
517
  // the generated instructions. This leaves enough space for the
518 519 520 521 522
  // longest possible x64 instruction, 15 bytes, and the longest possible
  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
  // (There is a 15 byte limit on x64 instruction length that rules out some
  // otherwise valid instructions.)
  // This allows for a single, fast space check per instruction.
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
  static const int kGap = 32;

 public:
  // Create an assembler. Instructions and relocation information are emitted
  // into a buffer, with the instructions starting from the beginning and the
  // relocation information starting from the end of the buffer. See CodeDesc
  // for a detailed comment on the layout (globals.h).
  //
  // If the provided buffer is NULL, the assembler allocates and grows its own
  // buffer, and buffer_size determines the initial buffer size. The buffer is
  // owned by the assembler and deallocated upon destruction of the assembler.
  //
  // If the provided buffer is not NULL, the assembler uses the provided buffer
  // for code generation and assumes its size to be buffer_size. If the buffer
  // is too small, a fatal error occurs. No deallocation of the buffer is done
  // upon destruction of the assembler.
539
  Assembler(Isolate* isolate, void* buffer, int buffer_size);
540
  virtual ~Assembler() { }
541 542 543 544 545 546

  // GetCode emits any pending (non-emitted) code and fills the descriptor
  // desc. GetCode() is idempotent; it returns the same result if no other
  // Assembler functions are invoked in between GetCode() calls.
  void GetCode(CodeDesc* desc);

547 548 549 550 551 552 553
  // Read/Modify the code target in the relative branch/call instruction at pc.
  // On the x64 architecture, we use relative jumps with a 32-bit displacement
  // to jump to other Code objects in the Code space in the heap.
  // Jumps to C functions are done indirectly through a 64-bit register holding
  // the absolute address of the target.
  // These functions convert between absolute Addresses of Code objects and
  // the relative displacements stored in the code.
554 555
  static inline Address target_address_at(Address pc);
  static inline void set_target_address_at(Address pc, Address target);
556

557 558 559 560
  // Return the code target address at a call site from the return address
  // of that call in the instruction stream.
  static inline Address target_address_from_return_address(Address pc);

561
  // This sets the branch destination (which is in the instruction on x64).
562
  // This is for calls and branches within generated code.
563 564
  inline static void deserialization_set_special_target_at(
      Address instruction_payload, Address target) {
565 566
    set_target_address_at(instruction_payload, target);
  }
567 568 569 570 571 572 573 574

  // This sets the branch destination (which is a load instruction on x64).
  // This is for calls and branches to runtime code.
  inline static void set_external_target_at(Address instruction_payload,
                                            Address target) {
    *reinterpret_cast<Address*>(instruction_payload) = target;
  }

575
  inline Handle<Object> code_target_object_handle_at(Address pc);
576
  inline Address runtime_entry_at(Address pc);
577
  // Number of bytes taken up by the branch target in the code.
578
  static const int kSpecialTargetSize = 4;  // Use 32-bit displacement.
579
  // Distance between the address of the code target in the call instruction
580 581
  // and the return address pushed on the stack.
  static const int kCallTargetAddressOffset = 4;  // Use 32-bit displacement.
582 583 584
  // The length of call(kScratchRegister).
  static const int kCallScratchRegisterInstructionLength = 3;
  // The length of call(Immediate32).
585
  static const int kShortCallInstructionLength = 5;
586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
  // The length of movq(kScratchRegister, address).
  static const int kMoveAddressIntoScratchRegisterInstructionLength =
      2 + kPointerSize;
  // The length of movq(kScratchRegister, address) and call(kScratchRegister).
  static const int kCallSequenceLength =
      kMoveAddressIntoScratchRegisterInstructionLength +
      kCallScratchRegisterInstructionLength;

  // The js return and debug break slot must be able to contain an indirect
  // call sequence, some x64 JS code is padded with int3 to make it large
  // enough to hold an instruction when the debugger patches it.
  static const int kJSReturnSequenceLength = kCallSequenceLength;
  static const int kDebugBreakSlotLength = kCallSequenceLength;
  static const int kPatchDebugBreakSlotReturnOffset = kCallTargetAddressOffset;
  // Distance between the start of the JS return sequence and where the
  // 32-bit displacement of a short call would be. The short call is from
  // SetDebugBreakAtIC from debug-x64.cc.
  static const int kPatchReturnSequenceAddressOffset =
      kJSReturnSequenceLength - kPatchDebugBreakSlotReturnOffset;
  // Distance between the start of the JS return sequence and where the
  // 32-bit displacement of a short call would be. The short call is from
  // SetDebugBreakAtIC from debug-x64.cc.
  static const int kPatchDebugBreakSlotAddressOffset =
      kDebugBreakSlotLength - kPatchDebugBreakSlotReturnOffset;
  static const int kRealPatchReturnSequenceAddressOffset =
      kMoveAddressIntoScratchRegisterInstructionLength - kPointerSize;
612

613 614
  // One byte opcode for test eax,0xXXXXXXXX.
  static const byte kTestEaxByte = 0xA9;
615 616
  // One byte opcode for test al, 0xXX.
  static const byte kTestAlByte = 0xA8;
617 618 619 620 621 622 623
  // One byte opcode for nop.
  static const byte kNopByte = 0x90;

  // One byte prefix for a short conditional jump.
  static const byte kJccShortPrefix = 0x70;
  static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
  static const byte kJcShortOpcode = kJccShortPrefix | carry;
624 625
  static const byte kJnzShortOpcode = kJccShortPrefix | not_zero;
  static const byte kJzShortOpcode = kJccShortPrefix | zero;
626

627

628 629 630
  // ---------------------------------------------------------------------------
  // Code generation
  //
631 632 633 634 635
  // Function names correspond one-to-one to x64 instruction mnemonics.
  // Unless specified otherwise, instructions operate on 64-bit operands.
  //
  // If we need versions of an assembly instruction that operate on different
  // width arguments, we add a single-letter suffix specifying the width.
636 637
  // This is done for the following instructions: mov, cmp, inc, dec,
  // add, sub, and test.
638 639 640 641 642 643 644 645
  // There are no versions of these instructions without the suffix.
  // - Instructions on 8-bit (byte) operands/registers have a trailing 'b'.
  // - Instructions on 16-bit (word) operands/registers have a trailing 'w'.
  // - Instructions on 32-bit (doubleword) operands/registers use 'l'.
  // - Instructions on 64-bit (quadword) operands/registers use 'q'.
  //
  // Some mnemonics, such as "and", are the same as C++ keywords.
  // Naming conflicts with C++ keywords are resolved by adding a trailing '_'.
646 647 648

  // Insert the smallest number of nop instructions
  // possible to align the pc offset to a multiple
649
  // of m, where m must be a power of 2.
650
  void Align(int m);
651
  void Nop(int bytes = 1);
652 653
  // Aligns code to something that's optimal for a jump target for the platform.
  void CodeTargetAlign();
654 655

  // Stack
656 657
  void pushfq();
  void popfq();
658

659
  void push(Immediate value);
660 661 662
  // Push a 32 bit integer, and guarantee that it is actually pushed as a
  // 32 bit value, the normal push will optimize the 8 bit case.
  void push_imm32(int32_t imm32);
663 664 665 666 667 668
  void push(Register src);
  void push(const Operand& src);

  void pop(Register dst);
  void pop(const Operand& dst);

669
  void enter(Immediate size);
670 671 672
  void leave();

  // Moves
673
  void movb(Register dst, const Operand& src);
674
  void movb(Register dst, Immediate imm);
675 676
  void movb(const Operand& dst, Register src);

677 678 679 680
  // Move the low 16 bits of a 64-bit register value to a 16-bit
  // memory location.
  void movw(const Operand& dst, Register src);

681 682 683
  void movl(Register dst, Register src);
  void movl(Register dst, const Operand& src);
  void movl(const Operand& dst, Register src);
684
  void movl(const Operand& dst, Immediate imm);
685 686
  // Load a 32-bit immediate value, zero-extended to 64 bits.
  void movl(Register dst, Immediate imm32);
687

688 689 690
  // Move 64 bit register value to 64-bit memory location.
  void movq(const Operand& dst, Register src);
  // Move 64 bit memory location to 64-bit register value.
691
  void movq(Register dst, const Operand& src);
692
  void movq(Register dst, Register src);
693 694
  // Sign extends immediate 32-bit value to 64 bits.
  void movq(Register dst, Immediate x);
695 696 697
  // Move the offset of the label location relative to the current
  // position (after the move) to the destination.
  void movl(const Operand& dst, Label* src);
698

699 700
  // Move sign extended immediate to memory location.
  void movq(const Operand& dst, Immediate value);
701
  // Instructions to load a 64-bit immediate into a register.
702 703 704 705
  // All 64-bit immediates must have a relocation mode.
  void movq(Register dst, void* ptr, RelocInfo::Mode rmode);
  void movq(Register dst, int64_t value, RelocInfo::Mode rmode);
  void movq(Register dst, const char* s, RelocInfo::Mode rmode);
lrn@chromium.org's avatar
lrn@chromium.org committed
706 707
  // Moves the address of the external reference into the register.
  void movq(Register dst, ExternalReference ext);
708 709
  void movq(Register dst, Handle<Object> handle, RelocInfo::Mode rmode);

710 711
  void movsxbq(Register dst, const Operand& src);
  void movsxwq(Register dst, const Operand& src);
712
  void movsxlq(Register dst, Register src);
713
  void movsxlq(Register dst, const Operand& src);
714
  void movzxbq(Register dst, const Operand& src);
715
  void movzxbl(Register dst, const Operand& src);
716
  void movzxwq(Register dst, const Operand& src);
717
  void movzxwl(Register dst, const Operand& src);
718
  void movzxwl(Register dst, Register src);
719

720 721 722 723 724 725 726
  // Repeated moves.

  void repmovsb();
  void repmovsw();
  void repmovsl();
  void repmovsq();

727
  // Instruction to load from an immediate 64-bit pointer into RAX.
728
  void load_rax(void* ptr, RelocInfo::Mode rmode);
lrn@chromium.org's avatar
lrn@chromium.org committed
729
  void load_rax(ExternalReference ext);
730

731 732 733 734 735
  // Conditional moves.
  void cmovq(Condition cc, Register dst, Register src);
  void cmovq(Condition cc, Register dst, const Operand& src);
  void cmovl(Condition cc, Register dst, Register src);
  void cmovl(Condition cc, Register dst, const Operand& src);
736 737 738 739 740

  // Exchange two registers
  void xchg(Register dst, Register src);

  // Arithmetics
741
  void addl(Register dst, Register src) {
742
    arithmetic_op_32(0x03, dst, src);
743 744
  }

745 746 747 748
  void addl(Register dst, Immediate src) {
    immediate_arithmetic_op_32(0x0, dst, src);
  }

749 750 751 752
  void addl(Register dst, const Operand& src) {
    arithmetic_op_32(0x03, dst, src);
  }

753 754 755 756
  void addl(const Operand& dst, Immediate src) {
    immediate_arithmetic_op_32(0x0, dst, src);
  }

757 758 759 760
  void addl(const Operand& dst, Register src) {
    arithmetic_op_32(0x01, src, dst);
  }

761
  void addq(Register dst, Register src) {
762 763 764
    arithmetic_op(0x03, dst, src);
  }

765 766 767
  void addq(Register dst, const Operand& src) {
    arithmetic_op(0x03, dst, src);
  }
768

769
  void addq(const Operand& dst, Register src) {
770 771 772
    arithmetic_op(0x01, src, dst);
  }

773
  void addq(Register dst, Immediate src) {
774 775 776
    immediate_arithmetic_op(0x0, dst, src);
  }

777
  void addq(const Operand& dst, Immediate src) {
778 779 780
    immediate_arithmetic_op(0x0, dst, src);
  }

781
  void sbbl(Register dst, Register src) {
782
    arithmetic_op_32(0x1b, dst, src);
783 784
  }

785 786 787 788
  void sbbq(Register dst, Register src) {
    arithmetic_op(0x1b, dst, src);
  }

789 790 791 792
  void cmpb(Register dst, Immediate src) {
    immediate_arithmetic_op_8(0x7, dst, src);
  }

793 794 795 796 797 798 799 800 801 802 803 804 805 806
  void cmpb_al(Immediate src);

  void cmpb(Register dst, Register src) {
    arithmetic_op(0x3A, dst, src);
  }

  void cmpb(Register dst, const Operand& src) {
    arithmetic_op(0x3A, dst, src);
  }

  void cmpb(const Operand& dst, Register src) {
    arithmetic_op(0x38, src, dst);
  }

807 808 809 810
  void cmpb(const Operand& dst, Immediate src) {
    immediate_arithmetic_op_8(0x7, dst, src);
  }

811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830
  void cmpw(const Operand& dst, Immediate src) {
    immediate_arithmetic_op_16(0x7, dst, src);
  }

  void cmpw(Register dst, Immediate src) {
    immediate_arithmetic_op_16(0x7, dst, src);
  }

  void cmpw(Register dst, const Operand& src) {
    arithmetic_op_16(0x3B, dst, src);
  }

  void cmpw(Register dst, Register src) {
    arithmetic_op_16(0x3B, dst, src);
  }

  void cmpw(const Operand& dst, Register src) {
    arithmetic_op_16(0x39, src, dst);
  }

831 832 833 834
  void cmpl(Register dst, Register src) {
    arithmetic_op_32(0x3B, dst, src);
  }

835
  void cmpl(Register dst, const Operand& src) {
836
    arithmetic_op_32(0x3B, dst, src);
837 838 839
  }

  void cmpl(const Operand& dst, Register src) {
840
    arithmetic_op_32(0x39, src, dst);
841 842
  }

843 844 845 846
  void cmpl(Register dst, Immediate src) {
    immediate_arithmetic_op_32(0x7, dst, src);
  }

847 848 849 850
  void cmpl(const Operand& dst, Immediate src) {
    immediate_arithmetic_op_32(0x7, dst, src);
  }

851
  void cmpq(Register dst, Register src) {
852 853 854
    arithmetic_op(0x3B, dst, src);
  }

855
  void cmpq(Register dst, const Operand& src) {
856 857 858
    arithmetic_op(0x3B, dst, src);
  }

859
  void cmpq(const Operand& dst, Register src) {
860 861 862
    arithmetic_op(0x39, src, dst);
  }

863
  void cmpq(Register dst, Immediate src) {
864 865 866
    immediate_arithmetic_op(0x7, dst, src);
  }

867
  void cmpq(const Operand& dst, Immediate src) {
868 869
    immediate_arithmetic_op(0x7, dst, src);
  }
870

871 872 873 874 875 876 877 878 879 880 881 882 883 884 885
  void and_(Register dst, Register src) {
    arithmetic_op(0x23, dst, src);
  }

  void and_(Register dst, const Operand& src) {
    arithmetic_op(0x23, dst, src);
  }

  void and_(const Operand& dst, Register src) {
    arithmetic_op(0x21, src, dst);
  }

  void and_(Register dst, Immediate src) {
    immediate_arithmetic_op(0x4, dst, src);
  }
886

887 888 889
  void and_(const Operand& dst, Immediate src) {
    immediate_arithmetic_op(0x4, dst, src);
  }
890

891 892 893 894
  void andl(Register dst, Immediate src) {
    immediate_arithmetic_op_32(0x4, dst, src);
  }

895 896 897 898
  void andl(Register dst, Register src) {
    arithmetic_op_32(0x23, dst, src);
  }

899 900 901 902
  void andl(Register dst, const Operand& src) {
    arithmetic_op_32(0x23, dst, src);
  }

903 904 905
  void andb(Register dst, Immediate src) {
    immediate_arithmetic_op_8(0x4, dst, src);
  }
906

907 908
  void decq(Register dst);
  void decq(const Operand& dst);
909
  void decl(Register dst);
910
  void decl(const Operand& dst);
911 912
  void decb(Register dst);
  void decb(const Operand& dst);
913

914 915
  // Sign-extends rax into rdx:rax.
  void cqo();
916 917
  // Sign-extends eax into edx:eax.
  void cdq();
918

919
  // Divide rdx:rax by src.  Quotient in rax, remainder in rdx.
920 921 922
  void idivq(Register src);
  // Divide edx:eax by lower 32 bits of src.  Quotient in eax, rem. in edx.
  void idivl(Register src);
923

924 925 926 927 928
  // Signed multiply instructions.
  void imul(Register src);                               // rdx:rax = rax * src.
  void imul(Register dst, Register src);                 // dst = dst * src.
  void imul(Register dst, const Operand& src);           // dst = dst * src.
  void imul(Register dst, Register src, Immediate imm);  // dst = src * imm.
929
  // Signed 32-bit multiply instructions.
930
  void imull(Register dst, Register src);                 // dst = dst * src.
931
  void imull(Register dst, const Operand& src);           // dst = dst * src.
932
  void imull(Register dst, Register src, Immediate imm);  // dst = src * imm.
933

934 935
  void incq(Register dst);
  void incq(const Operand& dst);
936
  void incl(Register dst);
937
  void incl(const Operand& dst);
938 939

  void lea(Register dst, const Operand& src);
940
  void leal(Register dst, const Operand& src);
941

942
  // Multiply rax by src, put the result in rdx:rax.
943 944 945
  void mul(Register src);

  void neg(Register dst);
946
  void neg(const Operand& dst);
947
  void negl(Register dst);
948 949

  void not_(Register dst);
950
  void not_(const Operand& dst);
951
  void notl(Register dst);
952

953 954 955 956
  void or_(Register dst, Register src) {
    arithmetic_op(0x0B, dst, src);
  }

957 958 959 960
  void orl(Register dst, Register src) {
    arithmetic_op_32(0x0B, dst, src);
  }

961 962 963 964
  void or_(Register dst, const Operand& src) {
    arithmetic_op(0x0B, dst, src);
  }

965 966 967 968
  void orl(Register dst, const Operand& src) {
    arithmetic_op_32(0x0B, dst, src);
  }

969 970 971 972 973 974 975 976
  void or_(const Operand& dst, Register src) {
    arithmetic_op(0x09, src, dst);
  }

  void or_(Register dst, Immediate src) {
    immediate_arithmetic_op(0x1, dst, src);
  }

977 978 979 980
  void orl(Register dst, Immediate src) {
    immediate_arithmetic_op_32(0x1, dst, src);
  }

981 982 983 984
  void or_(const Operand& dst, Immediate src) {
    immediate_arithmetic_op(0x1, dst, src);
  }

985 986 987 988
  void orl(const Operand& dst, Immediate src) {
    immediate_arithmetic_op_32(0x1, dst, src);
  }

989

990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
  void rcl(Register dst, Immediate imm8) {
    shift(dst, imm8, 0x2);
  }

  void rol(Register dst, Immediate imm8) {
    shift(dst, imm8, 0x0);
  }

  void rcr(Register dst, Immediate imm8) {
    shift(dst, imm8, 0x3);
  }

  void ror(Register dst, Immediate imm8) {
    shift(dst, imm8, 0x1);
  }
1005

1006 1007 1008 1009 1010 1011 1012 1013
  void rorl(Register dst, Immediate imm8) {
    shift_32(dst, imm8, 0x1);
  }

  void rorl_cl(Register dst) {
    shift_32(dst, 0x1);
  }

1014 1015
  // Shifts dst:src left by cl bits, affecting only dst.
  void shld(Register dst, Register src);
1016

1017 1018
  // Shifts src:dst right by cl bits, affecting only dst.
  void shrd(Register dst, Register src);
1019

1020 1021 1022 1023 1024 1025
  // Shifts dst right, duplicating sign bit, by shift_amount bits.
  // Shifting by 1 is handled efficiently.
  void sar(Register dst, Immediate shift_amount) {
    shift(dst, shift_amount, 0x7);
  }

1026 1027 1028 1029 1030 1031
  // Shifts dst right, duplicating sign bit, by shift_amount bits.
  // Shifting by 1 is handled efficiently.
  void sarl(Register dst, Immediate shift_amount) {
    shift_32(dst, shift_amount, 0x7);
  }

1032
  // Shifts dst right, duplicating sign bit, by cl % 64 bits.
1033
  void sar_cl(Register dst) {
1034 1035 1036
    shift(dst, 0x7);
  }

1037
  // Shifts dst right, duplicating sign bit, by cl % 64 bits.
1038
  void sarl_cl(Register dst) {
1039 1040 1041
    shift_32(dst, 0x7);
  }

1042 1043 1044 1045
  void shl(Register dst, Immediate shift_amount) {
    shift(dst, shift_amount, 0x4);
  }

1046
  void shl_cl(Register dst) {
1047 1048 1049
    shift(dst, 0x4);
  }

1050
  void shll_cl(Register dst) {
1051 1052 1053
    shift_32(dst, 0x4);
  }

1054 1055 1056 1057
  void shll(Register dst, Immediate shift_amount) {
    shift_32(dst, shift_amount, 0x4);
  }

1058 1059 1060 1061
  void shr(Register dst, Immediate shift_amount) {
    shift(dst, shift_amount, 0x5);
  }

1062
  void shr_cl(Register dst) {
1063 1064
    shift(dst, 0x5);
  }
1065

1066
  void shrl_cl(Register dst) {
1067 1068 1069
    shift_32(dst, 0x5);
  }

1070 1071 1072 1073
  void shrl(Register dst, Immediate shift_amount) {
    shift_32(dst, shift_amount, 0x5);
  }

lrn@chromium.org's avatar
lrn@chromium.org committed
1074 1075 1076
  void store_rax(void* dst, RelocInfo::Mode mode);
  void store_rax(ExternalReference ref);

1077
  void subq(Register dst, Register src) {
1078 1079 1080
    arithmetic_op(0x2B, dst, src);
  }

1081
  void subq(Register dst, const Operand& src) {
1082 1083 1084
    arithmetic_op(0x2B, dst, src);
  }

1085
  void subq(const Operand& dst, Register src) {
1086 1087 1088
    arithmetic_op(0x29, src, dst);
  }

1089
  void subq(Register dst, Immediate src) {
1090 1091 1092
    immediate_arithmetic_op(0x5, dst, src);
  }

1093
  void subq(const Operand& dst, Immediate src) {
1094 1095 1096
    immediate_arithmetic_op(0x5, dst, src);
  }

1097 1098 1099 1100
  void subl(Register dst, Register src) {
    arithmetic_op_32(0x2B, dst, src);
  }

1101 1102 1103 1104
  void subl(Register dst, const Operand& src) {
    arithmetic_op_32(0x2B, dst, src);
  }

1105 1106 1107 1108
  void subl(const Operand& dst, Immediate src) {
    immediate_arithmetic_op_32(0x5, dst, src);
  }

1109 1110 1111 1112
  void subl(Register dst, Immediate src) {
    immediate_arithmetic_op_32(0x5, dst, src);
  }

1113 1114 1115 1116
  void subb(Register dst, Immediate src) {
    immediate_arithmetic_op_8(0x5, dst, src);
  }

1117
  void testb(Register dst, Register src);
1118 1119
  void testb(Register reg, Immediate mask);
  void testb(const Operand& op, Immediate mask);
1120
  void testb(const Operand& op, Register reg);
1121
  void testl(Register dst, Register src);
1122 1123
  void testl(Register reg, Immediate mask);
  void testl(const Operand& op, Immediate mask);
1124 1125
  void testq(const Operand& op, Register reg);
  void testq(Register dst, Register src);
1126
  void testq(Register dst, Immediate mask);
1127

1128
  void xor_(Register dst, Register src) {
1129 1130 1131 1132 1133
    if (dst.code() == src.code()) {
      arithmetic_op_32(0x33, dst, src);
    } else {
      arithmetic_op(0x33, dst, src);
    }
1134 1135
  }

1136 1137 1138 1139
  void xorl(Register dst, Register src) {
    arithmetic_op_32(0x33, dst, src);
  }

1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
  void xorl(Register dst, const Operand& src) {
    arithmetic_op_32(0x33, dst, src);
  }

  void xorl(Register dst, Immediate src) {
    immediate_arithmetic_op_32(0x6, dst, src);
  }

  void xorl(const Operand& dst, Immediate src) {
    immediate_arithmetic_op_32(0x6, dst, src);
  }

1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
  void xor_(Register dst, const Operand& src) {
    arithmetic_op(0x33, dst, src);
  }

  void xor_(const Operand& dst, Register src) {
    arithmetic_op(0x31, src, dst);
  }

  void xor_(Register dst, Immediate src) {
    immediate_arithmetic_op(0x6, dst, src);
  }

  void xor_(const Operand& dst, Immediate src) {
    immediate_arithmetic_op(0x6, dst, src);
  }

1168 1169 1170 1171 1172
  // Bit operations.
  void bt(const Operand& dst, Register src);
  void bts(const Operand& dst, Register src);

  // Miscellaneous
1173
  void clc();
1174
  void cld();
1175
  void cpuid();
1176 1177 1178 1179 1180
  void hlt();
  void int3();
  void nop();
  void rdtsc();
  void ret(int imm16);
1181
  void setcc(Condition cc, Register reg);
1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200

  // Label operations & relative jumps (PPUM Appendix D)
  //
  // Takes a branch opcode (cc) and a label (L) and generates
  // either a backward branch or a forward branch and links it
  // to the label fixup chain. Usage:
  //
  // Label L;    // unbound label
  // j(cc, &L);  // forward branch to unbound label
  // bind(&L);   // bind label to the current pc
  // j(cc, &L);  // backward branch to bound label
  // bind(&L);   // illegal: a label may be bound only once
  //
  // Note: The same Label can be used for forward and backward branches
  // but it may be bound only once.

  void bind(Label* L);  // binds an unbound label L to the current code position

  // Calls
1201
  // Call near relative 32-bit displacement, relative to next instruction.
1202
  void call(Label* L);
1203
  void call(Address entry, RelocInfo::Mode rmode);
1204
  void call(Handle<Code> target,
1205
            RelocInfo::Mode rmode = RelocInfo::CODE_TARGET,
1206
            TypeFeedbackId ast_id = TypeFeedbackId::None());
1207

1208 1209 1210 1211 1212 1213
  // Calls directly to the given address using a relative offset.
  // Should only ever be used in Code objects for calls within the
  // same Code object. Should not be used when generating new code (use labels),
  // but only when patching existing code.
  void call(Address target);

1214 1215
  // Call near absolute indirect, address in register
  void call(Register adr);
1216

lrn@chromium.org's avatar
lrn@chromium.org committed
1217 1218 1219
  // Call near indirect
  void call(const Operand& operand);

1220
  // Jumps
1221
  // Jump short or near relative.
1222
  // Use a 32-bit signed displacement.
1223 1224
  // Unconditional jump to L
  void jmp(Label* L, Label::Distance distance = Label::kFar);
1225
  void jmp(Address entry, RelocInfo::Mode rmode);
1226
  void jmp(Handle<Code> target, RelocInfo::Mode rmode);
1227 1228 1229

  // Jump near absolute indirect (r64)
  void jmp(Register adr);
1230

1231 1232 1233
  // Jump near absolute indirect (m64)
  void jmp(const Operand& src);

1234
  // Conditional jumps
1235 1236 1237
  void j(Condition cc,
         Label* L,
         Label::Distance distance = Label::kFar);
1238
  void j(Condition cc, Address entry, RelocInfo::Mode rmode);
1239
  void j(Condition cc, Handle<Code> target, RelocInfo::Mode rmode);
1240 1241 1242 1243 1244 1245

  // Floating-point operations
  void fld(int i);

  void fld1();
  void fldz();
1246
  void fldpi();
1247
  void fldln2();
1248 1249 1250 1251 1252 1253

  void fld_s(const Operand& adr);
  void fld_d(const Operand& adr);

  void fstp_s(const Operand& adr);
  void fstp_d(const Operand& adr);
1254
  void fstp(int index);
1255 1256 1257 1258 1259 1260 1261 1262 1263 1264

  void fild_s(const Operand& adr);
  void fild_d(const Operand& adr);

  void fist_s(const Operand& adr);

  void fistp_s(const Operand& adr);
  void fistp_d(const Operand& adr);

  void fisttp_s(const Operand& adr);
1265
  void fisttp_d(const Operand& adr);
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291

  void fabs();
  void fchs();

  void fadd(int i);
  void fsub(int i);
  void fmul(int i);
  void fdiv(int i);

  void fisub_s(const Operand& adr);

  void faddp(int i = 1);
  void fsubp(int i = 1);
  void fsubrp(int i = 1);
  void fmulp(int i = 1);
  void fdivp(int i = 1);
  void fprem();
  void fprem1();

  void fxch(int i = 1);
  void fincstp();
  void ffree(int i = 0);

  void ftst();
  void fucomp(int i);
  void fucompp();
1292
  void fucomi(int i);
1293 1294
  void fucomip();

1295 1296 1297 1298 1299
  void fcompp();
  void fnstsw_ax();
  void fwait();
  void fnclex();

1300 1301
  void fsin();
  void fcos();
1302
  void fptan();
1303
  void fyl2x();
1304 1305 1306
  void f2xm1();
  void fscale();
  void fninit();
1307

1308 1309
  void frndint();

1310 1311 1312
  void sahf();

  // SSE2 instructions
1313
  void movd(XMMRegister dst, Register src);
1314 1315 1316
  void movd(Register dst, XMMRegister src);
  void movq(XMMRegister dst, Register src);
  void movq(Register dst, XMMRegister src);
lrn@chromium.org's avatar
lrn@chromium.org committed
1317
  void movq(XMMRegister dst, XMMRegister src);
1318
  void extractps(Register dst, XMMRegister src, byte imm8);
1319

lrn@chromium.org's avatar
lrn@chromium.org committed
1320 1321 1322 1323
  // Don't use this unless it's important to keep the
  // top half of the destination register unchanged.
  // Used movaps when moving double values and movq for integer
  // values in xmm registers.
1324
  void movsd(XMMRegister dst, XMMRegister src);
lrn@chromium.org's avatar
lrn@chromium.org committed
1325 1326

  void movsd(const Operand& dst, XMMRegister src);
1327
  void movsd(XMMRegister dst, const Operand& src);
1328

1329 1330 1331
  void movdqa(const Operand& dst, XMMRegister src);
  void movdqa(XMMRegister dst, const Operand& src);

1332 1333 1334
  void movdqu(const Operand& dst, XMMRegister src);
  void movdqu(XMMRegister dst, const Operand& src);

lrn@chromium.org's avatar
lrn@chromium.org committed
1335 1336 1337
  void movapd(XMMRegister dst, XMMRegister src);
  void movaps(XMMRegister dst, XMMRegister src);

1338 1339 1340
  void movss(XMMRegister dst, const Operand& src);
  void movss(const Operand& dst, XMMRegister src);

1341
  void cvttss2si(Register dst, const Operand& src);
1342
  void cvttss2si(Register dst, XMMRegister src);
1343
  void cvttsd2si(Register dst, const Operand& src);
1344
  void cvttsd2si(Register dst, XMMRegister src);
1345
  void cvttsd2siq(Register dst, XMMRegister src);
1346

1347 1348 1349 1350
  void cvtlsi2sd(XMMRegister dst, const Operand& src);
  void cvtlsi2sd(XMMRegister dst, Register src);
  void cvtqsi2sd(XMMRegister dst, const Operand& src);
  void cvtqsi2sd(XMMRegister dst, Register src);
1351

1352 1353
  void cvtlsi2ss(XMMRegister dst, Register src);

1354
  void cvtss2sd(XMMRegister dst, XMMRegister src);
1355 1356
  void cvtss2sd(XMMRegister dst, const Operand& src);
  void cvtsd2ss(XMMRegister dst, XMMRegister src);
1357

1358 1359 1360
  void cvtsd2si(Register dst, XMMRegister src);
  void cvtsd2siq(Register dst, XMMRegister src);

1361
  void addsd(XMMRegister dst, XMMRegister src);
1362
  void addsd(XMMRegister dst, const Operand& src);
1363 1364
  void subsd(XMMRegister dst, XMMRegister src);
  void mulsd(XMMRegister dst, XMMRegister src);
1365
  void mulsd(XMMRegister dst, const Operand& src);
1366 1367
  void divsd(XMMRegister dst, XMMRegister src);

1368 1369
  void andpd(XMMRegister dst, XMMRegister src);
  void orpd(XMMRegister dst, XMMRegister src);
1370
  void xorpd(XMMRegister dst, XMMRegister src);
lrn@chromium.org's avatar
lrn@chromium.org committed
1371
  void xorps(XMMRegister dst, XMMRegister src);
1372
  void sqrtsd(XMMRegister dst, XMMRegister src);
1373 1374

  void ucomisd(XMMRegister dst, XMMRegister src);
1375
  void ucomisd(XMMRegister dst, const Operand& src);
1376

1377 1378 1379 1380 1381 1382 1383 1384 1385
  enum RoundingMode {
    kRoundToNearest = 0x0,
    kRoundDown      = 0x1,
    kRoundUp        = 0x2,
    kRoundToZero    = 0x3
  };

  void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);

1386
  void movmskpd(Register dst, XMMRegister src);
1387
  void movmskps(Register dst, XMMRegister src);
1388

1389
  // The first argument is the reg field, the second argument is the r/m field.
1390 1391 1392
  void emit_sse_operand(XMMRegister dst, XMMRegister src);
  void emit_sse_operand(XMMRegister reg, const Operand& adr);
  void emit_sse_operand(XMMRegister dst, Register src);
1393
  void emit_sse_operand(Register dst, XMMRegister src);
1394

1395 1396 1397 1398
  // Debugging
  void Print();

  // Check the code size generated from label to here.
1399 1400 1401
  int SizeOfCodeGeneratedSince(Label* label) {
    return pc_offset() - label->pos();
  }
1402 1403 1404 1405

  // Mark address of the ExitJSFrame code.
  void RecordJSReturn();

1406 1407 1408
  // Mark address of a debug break slot.
  void RecordDebugBreakSlot();

1409
  // Record a comment relocation entry that can be used by a disassembler.
1410
  // Use --code-comments to enable.
1411
  void RecordComment(const char* msg, bool force = false);
1412

1413 1414
  // Writes a single word of data in the code stream.
  // Used for inline tables, e.g., jump-tables.
1415
  void db(uint8_t data);
1416 1417
  void dd(uint32_t data);

1418
  PositionsRecorder* positions_recorder() { return &positions_recorder_; }
1419 1420 1421 1422

  // Check if there is less than kGap bytes available in the buffer.
  // If this is the case, we need to grow the buffer before emitting
  // an instruction or relocation information.
1423 1424 1425
  inline bool buffer_overflow() const {
    return pc_ >= reloc_info_writer.pos() - kGap;
  }
1426 1427

  // Get the number of bytes available in the buffer.
1428 1429 1430
  inline int available_space() const {
    return static_cast<int>(reloc_info_writer.pos() - pc_);
  }
1431

1432
  static bool IsNop(Address addr);
1433

1434 1435 1436
  // Avoid overflows for displacements etc.
  static const int kMaximalBufferSize = 512*MB;

1437 1438 1439
  byte byte_at(int pos)  { return buffer_[pos]; }
  void set_byte_at(int pos, byte value) { buffer_[pos] = value; }

1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450
 private:
  byte* addr_at(int pos)  { return buffer_ + pos; }
  uint32_t long_at(int pos)  {
    return *reinterpret_cast<uint32_t*>(addr_at(pos));
  }
  void long_at_put(int pos, uint32_t x)  {
    *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
  }

  // code emission
  void GrowBuffer();
1451 1452

  void emit(byte x) { *pc_++ = x; }
1453
  inline void emitl(uint32_t x);
1454
  inline void emitp(void* x, RelocInfo::Mode rmode);
1455
  inline void emitq(uint64_t x, RelocInfo::Mode rmode);
1456
  inline void emitw(uint16_t x);
1457 1458
  inline void emit_code_target(Handle<Code> target,
                               RelocInfo::Mode rmode,
1459
                               TypeFeedbackId ast_id = TypeFeedbackId::None());
1460
  inline void emit_runtime_entry(Address entry, RelocInfo::Mode rmode);
1461
  void emit(Immediate x) { emitl(x.value_); }
1462

1463 1464
  // Emits a REX prefix that encodes a 64-bit operand size and
  // the top bit of both register codes.
1465 1466
  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
  // REX.W is set.
1467
  inline void emit_rex_64(XMMRegister reg, Register rm_reg);
1468 1469
  inline void emit_rex_64(Register reg, XMMRegister rm_reg);
  inline void emit_rex_64(Register reg, Register rm_reg);
1470 1471 1472

  // Emits a REX prefix that encodes a 64-bit operand size and
  // the top bit of the destination, index, and base register codes.
1473 1474 1475
  // The high bit of reg is used for REX.R, the high bit of op's base
  // register is used for REX.B, and the high bit of op's index register
  // is used for REX.X.  REX.W is set.
1476
  inline void emit_rex_64(Register reg, const Operand& op);
1477
  inline void emit_rex_64(XMMRegister reg, const Operand& op);
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490

  // Emits a REX prefix that encodes a 64-bit operand size and
  // the top bit of the register code.
  // The high bit of register is used for REX.B.
  // REX.W is set and REX.R and REX.X are clear.
  inline void emit_rex_64(Register rm_reg);

  // Emits a REX prefix that encodes a 64-bit operand size and
  // the top bit of the index and base register codes.
  // The high bit of op's base register is used for REX.B, and the high
  // bit of op's index register is used for REX.X.
  // REX.W is set and REX.R clear.
  inline void emit_rex_64(const Operand& op);
1491

1492 1493 1494
  // Emit a REX prefix that only sets REX.W to choose a 64-bit operand size.
  void emit_rex_64() { emit(0x48); }

1495
  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
1496
  // REX.W is clear.
1497 1498 1499 1500 1501 1502 1503
  inline void emit_rex_32(Register reg, Register rm_reg);

  // The high bit of reg is used for REX.R, the high bit of op's base
  // register is used for REX.B, and the high bit of op's index register
  // is used for REX.X.  REX.W is cleared.
  inline void emit_rex_32(Register reg, const Operand& op);

1504 1505 1506 1507 1508 1509
  // High bit of rm_reg goes to REX.B.
  // REX.W, REX.R and REX.X are clear.
  inline void emit_rex_32(Register rm_reg);

  // High bit of base goes to REX.B and high bit of index to REX.X.
  // REX.W and REX.R are clear.
1510
  inline void emit_rex_32(const Operand& op);
1511

1512 1513 1514 1515 1516 1517 1518 1519 1520
  // High bit of reg goes to REX.R, high bit of rm_reg goes to REX.B.
  // REX.W is cleared.  If no REX bits are set, no byte is emitted.
  inline void emit_optional_rex_32(Register reg, Register rm_reg);

  // The high bit of reg is used for REX.R, the high bit of op's base
  // register is used for REX.B, and the high bit of op's index register
  // is used for REX.X.  REX.W is cleared.  If no REX bits are set, nothing
  // is emitted.
  inline void emit_optional_rex_32(Register reg, const Operand& op);
1521

1522 1523 1524 1525 1526
  // As for emit_optional_rex_32(Register, Register), except that
  // the registers are XMM registers.
  inline void emit_optional_rex_32(XMMRegister reg, XMMRegister base);

  // As for emit_optional_rex_32(Register, Register), except that
1527
  // one of the registers is an XMM registers.
1528 1529
  inline void emit_optional_rex_32(XMMRegister reg, Register base);

1530 1531 1532 1533
  // As for emit_optional_rex_32(Register, Register), except that
  // one of the registers is an XMM registers.
  inline void emit_optional_rex_32(Register reg, XMMRegister base);

1534 1535 1536 1537
  // As for emit_optional_rex_32(Register, const Operand&), except that
  // the register is an XMM register.
  inline void emit_optional_rex_32(XMMRegister reg, const Operand& op);

1538 1539 1540 1541 1542 1543 1544 1545 1546
  // Optionally do as emit_rex_32(Register) if the register number has
  // the high bit set.
  inline void emit_optional_rex_32(Register rm_reg);

  // Optionally do as emit_rex_32(const Operand&) if the operand register
  // numbers have a high bit set.
  inline void emit_optional_rex_32(const Operand& op);


1547
  // Emit the ModR/M byte, and optionally the SIB byte and
1548
  // 1- or 4-byte offset for a memory operand.  Also encodes
1549
  // the second operand of the operation, a register or operation
1550
  // subcode, into the reg field of the ModR/M byte.
1551
  void emit_operand(Register reg, const Operand& adr) {
1552
    emit_operand(reg.low_bits(), adr);
1553
  }
1554

1555
  // Emit the ModR/M byte, and optionally the SIB byte and
1556
  // 1- or 4-byte offset for a memory operand.  Also used to encode
1557
  // a three-bit opcode extension into the ModR/M byte.
1558 1559
  void emit_operand(int rm, const Operand& adr);

1560 1561
  // Emit a ModR/M byte with registers coded in the reg and rm_reg fields.
  void emit_modrm(Register reg, Register rm_reg) {
1562
    emit(0xC0 | reg.low_bits() << 3 | rm_reg.low_bits());
1563 1564 1565 1566 1567
  }

  // Emit a ModR/M byte with an operation subcode in the reg field and
  // a register in the rm_reg field.
  void emit_modrm(int code, Register rm_reg) {
1568 1569
    ASSERT(is_uint3(code));
    emit(0xC0 | code << 3 | rm_reg.low_bits());
1570 1571
  }

1572 1573 1574
  // Emit the code-object-relative offset of the label's position
  inline void emit_code_relative_offset(Label* label);

1575 1576 1577
  // Emit machine code for one of the operations ADD, ADC, SUB, SBC,
  // AND, OR, XOR, or CMP.  The encodings of these operations are all
  // similar, differing just in the opcode or in the reg field of the
1578
  // ModR/M byte.
1579 1580 1581
  void arithmetic_op_16(byte opcode, Register reg, Register rm_reg);
  void arithmetic_op_16(byte opcode, Register reg, const Operand& rm_reg);
  void arithmetic_op_32(byte opcode, Register reg, Register rm_reg);
1582
  void arithmetic_op_32(byte opcode, Register reg, const Operand& rm_reg);
1583
  void arithmetic_op(byte opcode, Register reg, Register rm_reg);
1584
  void arithmetic_op(byte opcode, Register reg, const Operand& rm_reg);
1585 1586
  void immediate_arithmetic_op(byte subcode, Register dst, Immediate src);
  void immediate_arithmetic_op(byte subcode, const Operand& dst, Immediate src);
1587
  // Operate on a byte in memory or register.
1588
  void immediate_arithmetic_op_8(byte subcode,
1589
                                 Register dst,
1590 1591
                                 Immediate src);
  void immediate_arithmetic_op_8(byte subcode,
1592
                                 const Operand& dst,
1593
                                 Immediate src);
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608
  // Operate on a word in memory or register.
  void immediate_arithmetic_op_16(byte subcode,
                                  Register dst,
                                  Immediate src);
  void immediate_arithmetic_op_16(byte subcode,
                                  const Operand& dst,
                                  Immediate src);
  // Operate on a 32-bit word in memory or register.
  void immediate_arithmetic_op_32(byte subcode,
                                  Register dst,
                                  Immediate src);
  void immediate_arithmetic_op_32(byte subcode,
                                  const Operand& dst,
                                  Immediate src);

1609 1610
  // Emit machine code for a shift operation.
  void shift(Register dst, Immediate shift_amount, int subcode);
1611
  void shift_32(Register dst, Immediate shift_amount, int subcode);
1612 1613
  // Shift dst by cl % 64 bits.
  void shift(Register dst, int subcode);
1614
  void shift_32(Register dst, int subcode);
1615

1616
  void emit_farith(int b1, int b2, int i);
1617 1618

  // labels
1619
  // void print(Label* L);
1620 1621 1622
  void bind_to(Label* L, int pos);

  // record reloc info for current pc_
1623
  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1624 1625 1626

  friend class CodePatcher;
  friend class EnsureSpace;
1627
  friend class RegExpMacroAssemblerX64;
1628 1629 1630 1631

  // code generation
  RelocInfoWriter reloc_info_writer;

1632
  List< Handle<Code> > code_targets_;
1633

1634 1635
  PositionsRecorder positions_recorder_;
  friend class PositionsRecorder;
1636 1637 1638 1639 1640 1641 1642 1643 1644 1645
};


// Helper class that ensures that there is enough space for generating
// instructions and relocation information.  The constructor makes
// sure that there is enough space and (in debug mode) the destructor
// checks that we did not generate too much.
class EnsureSpace BASE_EMBEDDED {
 public:
  explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1646
    if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665
#ifdef DEBUG
    space_before_ = assembler_->available_space();
#endif
  }

#ifdef DEBUG
  ~EnsureSpace() {
    int bytes_generated = space_before_ - assembler_->available_space();
    ASSERT(bytes_generated < assembler_->kGap);
  }
#endif

 private:
  Assembler* assembler_;
#ifdef DEBUG
  int space_before_;
#endif
};

1666 1667 1668
} }  // namespace v8::internal

#endif  // V8_X64_ASSEMBLER_X64_H_