assembler-ia32.h 56.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
// Copyright (c) 1994-2006 Sun Microsystems Inc.
// All Rights Reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// - Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// - Redistribution in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// - Neither the name of Sun Microsystems or the names of contributors may
// be used to endorse or promote products derived from this software without
// specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
// IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
// THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

// The original source code covered by the above license above has been
// modified significantly by Google Inc.
33
// Copyright 2011 the V8 project authors. All rights reserved.
34 35 36

// A light-weight IA32 Assembler.

37 38
#ifndef V8_IA32_ASSEMBLER_IA32_H_
#define V8_IA32_ASSEMBLER_IA32_H_
39

40 41
#include <deque>

42
#include "src/assembler.h"
43
#include "src/isolate.h"
44
#include "src/utils.h"
45

46 47
namespace v8 {
namespace internal {
48

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76
#define GENERAL_REGISTERS(V) \
  V(eax)                     \
  V(ecx)                     \
  V(edx)                     \
  V(ebx)                     \
  V(esp)                     \
  V(ebp)                     \
  V(esi)                     \
  V(edi)

#define ALLOCATABLE_GENERAL_REGISTERS(V) \
  V(eax)                                 \
  V(ecx)                                 \
  V(edx)                                 \
  V(ebx)                                 \
  V(esi)                                 \
  V(edi)

#define DOUBLE_REGISTERS(V) \
  V(xmm0)                   \
  V(xmm1)                   \
  V(xmm2)                   \
  V(xmm3)                   \
  V(xmm4)                   \
  V(xmm5)                   \
  V(xmm6)                   \
  V(xmm7)

77
#define FLOAT_REGISTERS DOUBLE_REGISTERS
78
#define SIMD128_REGISTERS DOUBLE_REGISTERS
79

80 81 82 83 84 85 86 87 88
#define ALLOCATABLE_DOUBLE_REGISTERS(V) \
  V(xmm1)                               \
  V(xmm2)                               \
  V(xmm3)                               \
  V(xmm4)                               \
  V(xmm5)                               \
  V(xmm6)                               \
  V(xmm7)

89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
// CPU Registers.
//
// 1) We would prefer to use an enum, but enum values are assignment-
// compatible with int, which has caused code-generation bugs.
//
// 2) We would prefer to use a class instead of a struct but we don't like
// the register initialization to depend on the particular initialization
// order (which appears to be different on OS X, Linux, and Windows for the
// installed versions of C++ we tried). Using a struct permits C-style
// "initialization". Also, the Register objects cannot be const as this
// forces initialization stubs in MSVC, making us dependent on initialization
// order.
//
// 3) By not using an enum, we are possibly preventing the compiler from
// doing certain constant folds, which may significantly reduce the
// code generated for some assembly instructions (because they boil down
// to a few constants). If this is a problem, we could change the code
// such that we use an enum in optimized mode, and the struct in debug
// mode. This way we get the compile-time error checking in debug mode
// and best performance in optimized code.
//
struct Register {
111 112 113 114 115 116 117
  enum Code {
#define REGISTER_CODE(R) kCode_##R,
    GENERAL_REGISTERS(REGISTER_CODE)
#undef REGISTER_CODE
        kAfterLast,
    kCode_no_reg = -1
  };
118

119
  static const int kNumRegisters = Code::kAfterLast;
120 121

  static Register from_code(int code) {
122 123
    DCHECK(code >= 0);
    DCHECK(code < kNumRegisters);
124
    Register r = {code};
125 126
    return r;
  }
127 128
  bool is_valid() const { return 0 <= reg_code && reg_code < kNumRegisters; }
  bool is(Register reg) const { return reg_code == reg.reg_code; }
129
  int code() const {
130
    DCHECK(is_valid());
131
    return reg_code;
132
  }
133
  int bit() const {
134
    DCHECK(is_valid());
135
    return 1 << reg_code;
136 137
  }

138 139
  bool is_byte_register() const { return reg_code <= 3; }

140
  // Unfortunately we can't make this private in a struct.
141
  int reg_code;
142 143
};

144

145 146 147 148
#define DECLARE_REGISTER(R) const Register R = {Register::kCode_##R};
GENERAL_REGISTERS(DECLARE_REGISTER)
#undef DECLARE_REGISTER
const Register no_reg = {Register::kCode_no_reg};
149

150 151
static const bool kSimpleFPAliasing = true;

152
struct XMMRegister {
153 154 155 156 157 158 159 160 161
  enum Code {
#define REGISTER_CODE(R) kCode_##R,
    DOUBLE_REGISTERS(REGISTER_CODE)
#undef REGISTER_CODE
        kAfterLast,
    kCode_no_reg = -1
  };

  static const int kMaxNumRegisters = Code::kAfterLast;
162

163 164
  static XMMRegister from_code(int code) {
    XMMRegister result = {code};
165 166 167
    return result;
  }

168
  bool is_valid() const { return 0 <= reg_code && reg_code < kMaxNumRegisters; }
169

170
  int code() const {
171
    DCHECK(is_valid());
172
    return reg_code;
173
  }
174

175
  bool is(XMMRegister reg) const { return reg_code == reg.reg_code; }
176

177 178
  int reg_code;
};
179

180 181 182 183 184 185
typedef XMMRegister FloatRegister;

typedef XMMRegister DoubleRegister;

typedef XMMRegister Simd128Register;

186 187 188 189 190
#define DECLARE_REGISTER(R) \
  const DoubleRegister R = {DoubleRegister::kCode_##R};
DOUBLE_REGISTERS(DECLARE_REGISTER)
#undef DECLARE_REGISTER
const DoubleRegister no_double_reg = {DoubleRegister::kCode_no_reg};
191

192 193 194 195 196 197 198 199 200 201 202 203
enum Condition {
  // any value < 0 is considered no_condition
  no_condition  = -1,

  overflow      =  0,
  no_overflow   =  1,
  below         =  2,
  above_equal   =  3,
  equal         =  4,
  not_equal     =  5,
  below_equal   =  6,
  above         =  7,
204 205
  negative      =  8,
  positive      =  9,
206 207 208 209 210 211 212 213
  parity_even   = 10,
  parity_odd    = 11,
  less          = 12,
  greater_equal = 13,
  less_equal    = 14,
  greater       = 15,

  // aliases
214 215
  carry         = below,
  not_carry     = above_equal,
216 217
  zero          = equal,
  not_zero      = not_equal,
218 219
  sign          = negative,
  not_sign      = positive
220 221
};

222

223 224 225 226
// Returns the equivalent of !cc.
// Negation of the default no_condition (-1) results in a non-default
// no_condition value (-2). As long as tests for no_condition check
// for condition < 0, this will work as expected.
227 228 229 230
inline Condition NegateCondition(Condition cc) {
  return static_cast<Condition>(cc ^ 1);
}

231

232
// Commute a condition such that {a cond b == b cond' a}.
233
inline Condition CommuteCondition(Condition cc) {
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252
  switch (cc) {
    case below:
      return above;
    case above:
      return below;
    case above_equal:
      return below_equal;
    case below_equal:
      return above_equal;
    case less:
      return greater;
    case greater:
      return less;
    case greater_equal:
      return less_equal;
    case less_equal:
      return greater_equal;
    default:
      return cc;
253
  }
254 255
}

256

257 258 259 260 261 262 263 264
enum RoundingMode {
  kRoundToNearest = 0x0,
  kRoundDown = 0x1,
  kRoundUp = 0x2,
  kRoundToZero = 0x3
};


265 266 267 268 269 270 271 272 273
// -----------------------------------------------------------------------------
// Machine instruction Immediates

class Immediate BASE_EMBEDDED {
 public:
  inline explicit Immediate(int x);
  inline explicit Immediate(const ExternalReference& ext);
  inline explicit Immediate(Handle<Object> handle);
  inline explicit Immediate(Smi* value);
274
  inline explicit Immediate(Address addr);
275
  inline explicit Immediate(Address x, RelocInfo::Mode rmode);
276

277 278 279 280
  static Immediate CodeRelativeOffset(Label* label) {
    return Immediate(label);
  }

281
  bool is_zero() const { return x_ == 0 && RelocInfo::IsNone(rmode_); }
282
  bool is_int8() const {
283
    return -128 <= x_ && x_ < 128 && RelocInfo::IsNone(rmode_);
284
  }
285 286 287
  bool is_uint8() const {
    return v8::internal::is_uint8(x_) && RelocInfo::IsNone(rmode_);
  }
288
  bool is_int16() const {
289
    return -32768 <= x_ && x_ < 32768 && RelocInfo::IsNone(rmode_);
290
  }
291 292 293
  bool is_uint16() const {
    return v8::internal::is_uint16(x_) && RelocInfo::IsNone(rmode_);
  }
294 295

 private:
296 297
  inline explicit Immediate(Label* value);

298
  int x_;
299
  RelocInfo::Mode rmode_;
300

301
  friend class Operand;
302
  friend class Assembler;
303
  friend class MacroAssembler;
304 305 306 307 308 309 310 311 312 313
};


// -----------------------------------------------------------------------------
// Machine instruction Operands

enum ScaleFactor {
  times_1 = 0,
  times_2 = 1,
  times_4 = 2,
314
  times_8 = 3,
315 316
  times_int_size = times_4,
  times_half_pointer_size = times_2,
317 318
  times_pointer_size = times_4,
  times_twice_pointer_size = times_8
319 320 321 322 323
};


class Operand BASE_EMBEDDED {
 public:
324 325 326
  // reg
  INLINE(explicit Operand(Register reg));

327 328 329
  // XMM reg
  INLINE(explicit Operand(XMMRegister xmm_reg));

330
  // [disp/r]
331
  INLINE(explicit Operand(int32_t disp, RelocInfo::Mode rmode));
332 333 334

  // [disp/r]
  INLINE(explicit Operand(Immediate imm));
335 336

  // [base + disp/r]
337
  explicit Operand(Register base, int32_t disp,
338
                   RelocInfo::Mode rmode = RelocInfo::NONE32);
339 340 341 342 343 344

  // [base + index*scale + disp/r]
  explicit Operand(Register base,
                   Register index,
                   ScaleFactor scale,
                   int32_t disp,
345
                   RelocInfo::Mode rmode = RelocInfo::NONE32);
346 347 348 349 350

  // [index*scale + disp/r]
  explicit Operand(Register index,
                   ScaleFactor scale,
                   int32_t disp,
351
                   RelocInfo::Mode rmode = RelocInfo::NONE32);
352

353 354 355 356 357
  static Operand JumpTable(Register index, ScaleFactor scale, Label* table) {
    return Operand(index, scale, reinterpret_cast<int32_t>(table),
                   RelocInfo::INTERNAL_REFERENCE);
  }

358 359
  static Operand StaticVariable(const ExternalReference& ext) {
    return Operand(reinterpret_cast<int32_t>(ext.address()),
360
                   RelocInfo::EXTERNAL_REFERENCE);
361 362 363 364 365 366
  }

  static Operand StaticArray(Register index,
                             ScaleFactor scale,
                             const ExternalReference& arr) {
    return Operand(index, scale, reinterpret_cast<int32_t>(arr.address()),
367
                   RelocInfo::EXTERNAL_REFERENCE);
368 369
  }

370
  static Operand ForCell(Handle<Cell> cell) {
371
    AllowDeferredHandleDereference embedding_raw_address;
372
    return Operand(reinterpret_cast<int32_t>(cell.location()),
373
                   RelocInfo::CELL);
374 375
  }

376 377 378 379
  static Operand ForRegisterPlusImmediate(Register base, Immediate imm) {
    return Operand(base, imm.x_, imm.rmode_);
  }

380 381 382
  // Returns true if this Operand is a wrapper for the specified register.
  bool is_reg(Register reg) const;

383 384 385 386 387 388 389
  // Returns true if this Operand is a wrapper for one register.
  bool is_reg_only() const;

  // Asserts that this Operand is a wrapper for one register and returns the
  // register.
  Register reg() const;

390
 private:
391 392 393 394
  // Set the ModRM byte without an encoded 'reg' register. The
  // register is encoded later as part of the emit_operand operation.
  inline void set_modrm(int mod, Register rm);

395 396
  inline void set_sib(ScaleFactor scale, Register index, Register base);
  inline void set_disp8(int8_t disp);
397
  inline void set_dispr(int32_t disp, RelocInfo::Mode rmode);
398

399 400 401 402 403 404
  byte buf_[6];
  // The number of bytes in buf_.
  unsigned int len_;
  // Only valid if len_ > 4.
  RelocInfo::Mode rmode_;

405
  friend class Assembler;
406
  friend class MacroAssembler;
407 408 409
};


410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425
// -----------------------------------------------------------------------------
// A Displacement describes the 32bit immediate field of an instruction which
// may be used together with a Label in order to refer to a yet unknown code
// position. Displacements stored in the instruction stream are used to describe
// the instruction and to chain a list of instructions using the same Label.
// A Displacement contains 2 different fields:
//
// next field: position of next displacement in the chain (0 = end of list)
// type field: instruction type
//
// A next value of null (0) indicates the end of a chain (note that there can
// be no displacement at position zero, because there is always at least one
// instruction byte before the displacement).
//
// Displacement _data field layout
//
426
// |31.....2|1......0|
427 428 429 430
// [  next  |  type  |

class Displacement BASE_EMBEDDED {
 public:
431
  enum Type { UNCONDITIONAL_JUMP, CODE_RELATIVE, OTHER, CODE_ABSOLUTE };
432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452

  int data() const { return data_; }
  Type type() const { return TypeField::decode(data_); }
  void next(Label* L) const {
    int n = NextField::decode(data_);
    n > 0 ? L->link_to(n) : L->Unuse();
  }
  void link_to(Label* L) { init(L, type()); }

  explicit Displacement(int data) { data_ = data; }

  Displacement(Label* L, Type type) { init(L, type); }

  void print() {
    PrintF("%s (%x) ", (type() == UNCONDITIONAL_JUMP ? "jmp" : "[other]"),
                       NextField::decode(data_));
  }

 private:
  int data_;

453 454
  class TypeField: public BitField<Type, 0, 2> {};
  class NextField: public BitField<int,  2, 32-2> {};
455 456 457 458 459

  void init(Label* L, Type type);
};


460
class Assembler : public AssemblerBase {
461
 private:
462 463 464
  // We check before assembling an instruction that there is sufficient
  // space to write an instruction and its relocation information.
  // The relocation writer's position must be kGap bytes above the end of
465
  // the generated instructions. This leaves enough space for the
466 467 468 469 470
  // longest possible ia32 instruction, 15 bytes, and the longest possible
  // relocation information encoding, RelocInfoWriter::kMaxLength == 16.
  // (There is a 15 byte limit on ia32 instruction length that rules out some
  // otherwise valid instructions.)
  // This allows for a single, fast space check per instruction.
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
  static const int kGap = 32;

 public:
  // Create an assembler. Instructions and relocation information are emitted
  // into a buffer, with the instructions starting from the beginning and the
  // relocation information starting from the end of the buffer. See CodeDesc
  // for a detailed comment on the layout (globals.h).
  //
  // If the provided buffer is NULL, the assembler allocates and grows its own
  // buffer, and buffer_size determines the initial buffer size. The buffer is
  // owned by the assembler and deallocated upon destruction of the assembler.
  //
  // If the provided buffer is not NULL, the assembler uses the provided buffer
  // for code generation and assumes its size to be buffer_size. If the buffer
  // is too small, a fatal error occurs. No deallocation of the buffer is done
  // upon destruction of the assembler.
487 488
  // TODO(vitalyr): the assembler does not need an isolate.
  Assembler(Isolate* isolate, void* buffer, int buffer_size);
489
  virtual ~Assembler() { }
490 491 492

  // GetCode emits any pending (non-emitted) code and fills the descriptor
  // desc. GetCode() is idempotent; it returns the same result if no other
493
  // Assembler functions are invoked in between GetCode() calls.
494 495 496
  void GetCode(CodeDesc* desc);

  // Read/Modify the code target in the branch/call instruction at pc.
497 498
  inline static Address target_address_at(Address pc, Address constant_pool);
  inline static void set_target_address_at(
499
      Isolate* isolate, Address pc, Address constant_pool, Address target,
500
      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED);
501
  static inline Address target_address_at(Address pc, Code* code) {
502
    Address constant_pool = code ? code->constant_pool() : NULL;
503 504
    return target_address_at(pc, constant_pool);
  }
505 506 507
  static inline void set_target_address_at(
      Isolate* isolate, Address pc, Code* code, Address target,
      ICacheFlushMode icache_flush_mode = FLUSH_ICACHE_IF_NEEDED) {
508
    Address constant_pool = code ? code->constant_pool() : NULL;
509
    set_target_address_at(isolate, pc, constant_pool, target);
510
  }
511

512 513 514 515
  // Return the code target address at a call site from the return address
  // of that call in the instruction stream.
  inline static Address target_address_from_return_address(Address pc);

516
  // This sets the branch destination (which is in the instruction on x86).
517
  // This is for calls and branches within generated code.
518
  inline static void deserialization_set_special_target_at(
519 520 521
      Isolate* isolate, Address instruction_payload, Code* code,
      Address target) {
    set_target_address_at(isolate, instruction_payload, code, target);
522 523
  }

524 525
  // This sets the internal reference at the pc.
  inline static void deserialization_set_target_internal_reference_at(
526
      Isolate* isolate, Address pc, Address target,
527
      RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
528

529
  static const int kSpecialTargetSize = kPointerSize;
530

531 532
  // Distance between the address of the code target in the call instruction
  // and the return address
533
  static const int kCallTargetAddressOffset = kPointerSize;
534

535
  static const int kCallInstructionLength = 5;
536

537 538 539
  // The debug break slot must be able to contain a call instruction.
  static const int kDebugBreakSlotLength = kCallInstructionLength;

540 541 542 543
  // Distance between start of patched debug break slot and the emitted address
  // to jump to.
  static const int kPatchDebugBreakSlotAddressOffset = 1;  // JMP imm32.

544 545
  // One byte opcode for test al, 0xXX.
  static const byte kTestAlByte = 0xA8;
546 547 548 549 550 551 552 553 554
  // One byte opcode for nop.
  static const byte kNopByte = 0x90;

  // One byte opcode for a short unconditional jump.
  static const byte kJmpShortOpcode = 0xEB;
  // One byte prefix for a short conditional jump.
  static const byte kJccShortPrefix = 0x70;
  static const byte kJncShortOpcode = kJccShortPrefix | not_carry;
  static const byte kJcShortOpcode = kJccShortPrefix | carry;
555 556 557
  static const byte kJnzShortOpcode = kJccShortPrefix | not_zero;
  static const byte kJzShortOpcode = kJccShortPrefix | zero;

558

559 560 561 562 563 564 565 566 567 568 569 570 571
  // ---------------------------------------------------------------------------
  // Code generation
  //
  // - function names correspond one-to-one to ia32 instruction mnemonics
  // - unless specified otherwise, instructions operate on 32bit operands
  // - instructions on 8bit (byte) operands/registers have a trailing '_b'
  // - instructions on 16bit (word) operands/registers have a trailing '_w'
  // - naming conflicts with C++ keywords are resolved via a trailing '_'

  // NOTE ON INTERFACE: Currently, the interface is not very consistent
  // in the sense that some operations (e.g. mov()) can be called in more
  // the one way to generate the same instruction: The Register argument
  // can in some cases be replaced with an Operand(Register) argument.
572
  // This should be cleaned up and made more orthogonal. The questions
573 574
  // is: should we always use Operands instead of Registers where an
  // Operand is possible, or should we have a Register (overloaded) form
575
  // instead? We must be careful to make sure that the selected instruction
576 577 578 579 580 581 582
  // is obvious from the parameters to avoid hard-to-find code generation
  // bugs.

  // Insert the smallest number of nop instructions
  // possible to align the pc offset to a multiple
  // of m. m must be a power of 2.
  void Align(int m);
583 584 585
  // Insert the smallest number of zero bytes possible to align the pc offset
  // to a mulitple of m. m must be a power of 2 (>= 2).
  void DataAlign(int m);
586
  void Nop(int bytes = 1);
587 588
  // Aligns code to something that's optimal for a jump target for the platform.
  void CodeTargetAlign();
589 590 591 592 593 594 595 596 597

  // Stack
  void pushad();
  void popad();

  void pushfd();
  void popfd();

  void push(const Immediate& x);
598
  void push_imm32(int32_t imm32);
599 600 601 602 603 604
  void push(Register src);
  void push(const Operand& src);

  void pop(Register dst);
  void pop(const Operand& dst);

605 606 607
  void enter(const Immediate& size);
  void leave();

608
  // Moves
609
  void mov_b(Register dst, Register src) { mov_b(dst, Operand(src)); }
610
  void mov_b(Register dst, const Operand& src);
611
  void mov_b(Register dst, int8_t imm8) { mov_b(Operand(dst), imm8); }
612 613
  void mov_b(const Operand& dst, int8_t src) { mov_b(dst, Immediate(src)); }
  void mov_b(const Operand& dst, const Immediate& src);
614 615 616
  void mov_b(const Operand& dst, Register src);

  void mov_w(Register dst, const Operand& src);
617 618
  void mov_w(const Operand& dst, int16_t src) { mov_w(dst, Immediate(src)); }
  void mov_w(const Operand& dst, const Immediate& src);
619 620 621
  void mov_w(const Operand& dst, Register src);

  void mov(Register dst, int32_t imm32);
622
  void mov(Register dst, const Immediate& x);
623 624
  void mov(Register dst, Handle<Object> handle);
  void mov(Register dst, const Operand& src);
625
  void mov(Register dst, Register src);
626 627 628 629
  void mov(const Operand& dst, const Immediate& x);
  void mov(const Operand& dst, Handle<Object> handle);
  void mov(const Operand& dst, Register src);

630
  void movsx_b(Register dst, Register src) { movsx_b(dst, Operand(src)); }
631 632
  void movsx_b(Register dst, const Operand& src);

633
  void movsx_w(Register dst, Register src) { movsx_w(dst, Operand(src)); }
634 635
  void movsx_w(Register dst, const Operand& src);

636
  void movzx_b(Register dst, Register src) { movzx_b(dst, Operand(src)); }
637 638
  void movzx_b(Register dst, const Operand& src);

639
  void movzx_w(Register dst, Register src) { movzx_w(dst, Operand(src)); }
640 641 642
  void movzx_w(Register dst, const Operand& src);

  // Conditional moves
643 644 645
  void cmov(Condition cc, Register dst, Register src) {
    cmov(cc, dst, Operand(src));
  }
646 647
  void cmov(Condition cc, Register dst, const Operand& src);

648 649 650
  // Flag management.
  void cld();

651
  // Repetitive string instructions.
652
  void rep_movs();
653
  void rep_stos();
654
  void stos();
655

656
  // Exchange
657
  void xchg(Register dst, Register src);
658
  void xchg(Register dst, const Operand& src);
659 660
  void xchg_b(Register reg, const Operand& op);
  void xchg_w(Register reg, const Operand& op);
661

662 663 664 665 666 667 668 669
  // Lock prefix
  void lock();

  // CompareExchange
  void cmpxchg(const Operand& dst, Register src);
  void cmpxchg_b(const Operand& dst, Register src);
  void cmpxchg_w(const Operand& dst, Register src);

670 671 672 673
  // Arithmetics
  void adc(Register dst, int32_t imm32);
  void adc(Register dst, const Operand& src);

674
  void add(Register dst, Register src) { add(dst, Operand(src)); }
675
  void add(Register dst, const Operand& src);
676
  void add(const Operand& dst, Register src);
677
  void add(Register dst, const Immediate& imm) { add(Operand(dst), imm); }
678 679 680
  void add(const Operand& dst, const Immediate& x);

  void and_(Register dst, int32_t imm32);
681
  void and_(Register dst, const Immediate& x);
682
  void and_(Register dst, Register src) { and_(dst, Operand(src)); }
683
  void and_(Register dst, const Operand& src);
684
  void and_(const Operand& dst, Register src);
685 686
  void and_(const Operand& dst, const Immediate& x);

687 688
  void cmpb(Register reg, Immediate imm8) { cmpb(Operand(reg), imm8); }
  void cmpb(const Operand& op, Immediate imm8);
689 690
  void cmpb(Register reg, const Operand& op);
  void cmpb(const Operand& op, Register reg);
691
  void cmpb(Register dst, Register src) { cmpb(Operand(dst), src); }
692 693
  void cmpb_al(const Operand& op);
  void cmpw_ax(const Operand& op);
694 695 696 697 698
  void cmpw(const Operand& dst, Immediate src);
  void cmpw(Register dst, Immediate src) { cmpw(Operand(dst), src); }
  void cmpw(Register dst, const Operand& src);
  void cmpw(Register dst, Register src) { cmpw(Operand(dst), src); }
  void cmpw(const Operand& dst, Register src);
699 700
  void cmp(Register reg, int32_t imm32);
  void cmp(Register reg, Handle<Object> handle);
701
  void cmp(Register reg0, Register reg1) { cmp(reg0, Operand(reg1)); }
702
  void cmp(Register reg, const Operand& op);
703
  void cmp(Register reg, const Immediate& imm) { cmp(Operand(reg), imm); }
704
  void cmp(const Operand& op, Register reg);
705
  void cmp(const Operand& op, const Immediate& imm);
706
  void cmp(const Operand& op, Handle<Object> handle);
707 708

  void dec_b(Register dst);
709
  void dec_b(const Operand& dst);
710 711 712 713 714 715

  void dec(Register dst);
  void dec(const Operand& dst);

  void cdq();

716 717 718 719
  void idiv(Register src) { idiv(Operand(src)); }
  void idiv(const Operand& src);
  void div(Register src) { div(Operand(src)); }
  void div(const Operand& src);
720

721 722
  // Signed multiply instructions.
  void imul(Register src);                               // edx:eax = eax * src.
723
  void imul(Register dst, Register src) { imul(dst, Operand(src)); }
724 725
  void imul(Register dst, const Operand& src);           // dst = dst * src.
  void imul(Register dst, Register src, int32_t imm32);  // dst = src * imm32.
726
  void imul(Register dst, const Operand& src, int32_t imm32);
727 728 729 730 731 732

  void inc(Register dst);
  void inc(const Operand& dst);

  void lea(Register dst, const Operand& src);

733 734
  // Unsigned multiply instruction.
  void mul(Register src);                                // edx:eax = eax * reg.
735 736

  void neg(Register dst);
737
  void neg(const Operand& dst);
738 739

  void not_(Register dst);
740
  void not_(const Operand& dst);
741 742

  void or_(Register dst, int32_t imm32);
743
  void or_(Register dst, Register src) { or_(dst, Operand(src)); }
744 745
  void or_(Register dst, const Operand& src);
  void or_(const Operand& dst, Register src);
746
  void or_(Register dst, const Immediate& imm) { or_(Operand(dst), imm); }
747 748 749
  void or_(const Operand& dst, const Immediate& x);

  void rcl(Register dst, uint8_t imm8);
750
  void rcr(Register dst, uint8_t imm8);
751 752 753 754 755

  void ror(Register dst, uint8_t imm8) { ror(Operand(dst), imm8); }
  void ror(const Operand& dst, uint8_t imm8);
  void ror_cl(Register dst) { ror_cl(Operand(dst)); }
  void ror_cl(const Operand& dst);
756

757 758 759 760
  void sar(Register dst, uint8_t imm8) { sar(Operand(dst), imm8); }
  void sar(const Operand& dst, uint8_t imm8);
  void sar_cl(Register dst) { sar_cl(Operand(dst)); }
  void sar_cl(const Operand& dst);
761 762 763

  void sbb(Register dst, const Operand& src);

764 765 766 767
  void shl(Register dst, uint8_t imm8) { shl(Operand(dst), imm8); }
  void shl(const Operand& dst, uint8_t imm8);
  void shl_cl(Register dst) { shl_cl(Operand(dst)); }
  void shl_cl(const Operand& dst);
768 769
  void shld(Register dst, Register src, uint8_t shift);
  void shld_cl(Register dst, Register src);
770

771 772 773 774
  void shr(Register dst, uint8_t imm8) { shr(Operand(dst), imm8); }
  void shr(const Operand& dst, uint8_t imm8);
  void shr_cl(Register dst) { shr_cl(Operand(dst)); }
  void shr_cl(const Operand& dst);
775 776 777
  void shrd(Register dst, Register src, uint8_t shift);
  void shrd_cl(Register dst, Register src) { shrd_cl(Operand(dst), src); }
  void shrd_cl(const Operand& dst, Register src);
778

779
  void sub(Register dst, const Immediate& imm) { sub(Operand(dst), imm); }
780
  void sub(const Operand& dst, const Immediate& x);
781
  void sub(Register dst, Register src) { sub(dst, Operand(src)); }
782 783 784 785
  void sub(Register dst, const Operand& src);
  void sub(const Operand& dst, Register src);

  void test(Register reg, const Immediate& imm);
786
  void test(Register reg0, Register reg1) { test(reg0, Operand(reg1)); }
787 788
  void test(Register reg, const Operand& op);
  void test(const Operand& op, const Immediate& imm);
789 790
  void test(const Operand& op, Register reg) { test(reg, op); }
  void test_b(Register reg, const Operand& op);
791 792
  void test_b(Register reg, Immediate imm8);
  void test_b(const Operand& op, Immediate imm8);
793 794 795 796 797 798 799
  void test_b(const Operand& op, Register reg) { test_b(reg, op); }
  void test_b(Register dst, Register src) { test_b(dst, Operand(src)); }
  void test_w(Register reg, const Operand& op);
  void test_w(Register reg, Immediate imm16);
  void test_w(const Operand& op, Immediate imm16);
  void test_w(const Operand& op, Register reg) { test_w(reg, op); }
  void test_w(Register dst, Register src) { test_w(dst, Operand(src)); }
800 801

  void xor_(Register dst, int32_t imm32);
802
  void xor_(Register dst, Register src) { xor_(dst, Operand(src)); }
803
  void xor_(Register dst, const Operand& src);
804 805
  void xor_(const Operand& dst, Register src);
  void xor_(Register dst, const Immediate& imm) { xor_(Operand(dst), imm); }
806 807 808
  void xor_(const Operand& dst, const Immediate& x);

  // Bit operations.
809
  void bt(const Operand& dst, Register src);
810
  void bts(Register dst, Register src) { bts(Operand(dst), src); }
811
  void bts(const Operand& dst, Register src);
812 813
  void bsr(Register dst, Register src) { bsr(dst, Operand(src)); }
  void bsr(Register dst, const Operand& src);
814 815
  void bsf(Register dst, Register src) { bsf(dst, Operand(src)); }
  void bsf(Register dst, const Operand& src);
816 817 818 819 820 821

  // Miscellaneous
  void hlt();
  void int3();
  void nop();
  void ret(int imm16);
822
  void ud2();
823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842

  // Label operations & relative jumps (PPUM Appendix D)
  //
  // Takes a branch opcode (cc) and a label (L) and generates
  // either a backward branch or a forward branch and links it
  // to the label fixup chain. Usage:
  //
  // Label L;    // unbound label
  // j(cc, &L);  // forward branch to unbound label
  // bind(&L);   // bind label to the current pc
  // j(cc, &L);  // backward branch to bound label
  // bind(&L);   // illegal: a label may be bound only once
  //
  // Note: The same Label can be used for forward and backward branches
  // but it may be bound only once.

  void bind(Label* L);  // binds an unbound label L to the current code position

  // Calls
  void call(Label* L);
843
  void call(byte* entry, RelocInfo::Mode rmode);
844
  int CallSize(const Operand& adr);
845
  void call(Register reg) { call(Operand(reg)); }
846
  void call(const Operand& adr);
847
  int CallSize(Handle<Code> code, RelocInfo::Mode mode);
848
  void call(Handle<Code> code,
849 850
            RelocInfo::Mode rmode,
            TypeFeedbackId id = TypeFeedbackId::None());
851 852

  // Jumps
853 854
  // unconditional jump to L
  void jmp(Label* L, Label::Distance distance = Label::kFar);
855
  void jmp(byte* entry, RelocInfo::Mode rmode);
856
  void jmp(Register reg) { jmp(Operand(reg)); }
857
  void jmp(const Operand& adr);
858
  void jmp(Handle<Code> code, RelocInfo::Mode rmode);
859 860

  // Conditional jumps
861 862 863
  void j(Condition cc,
         Label* L,
         Label::Distance distance = Label::kFar);
864
  void j(Condition cc, byte* entry, RelocInfo::Mode rmode);
865 866
  void j(Condition cc, Handle<Code> code,
         RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
867 868 869

  // Floating-point operations
  void fld(int i);
870
  void fstp(int i);
871 872 873

  void fld1();
  void fldz();
874
  void fldpi();
875
  void fldln2();
876 877 878 879 880

  void fld_s(const Operand& adr);
  void fld_d(const Operand& adr);

  void fstp_s(const Operand& adr);
881
  void fst_s(const Operand& adr);
882
  void fstp_d(const Operand& adr);
883
  void fst_d(const Operand& adr);
884 885 886 887 888 889 890 891 892

  void fild_s(const Operand& adr);
  void fild_d(const Operand& adr);

  void fist_s(const Operand& adr);

  void fistp_s(const Operand& adr);
  void fistp_d(const Operand& adr);

893
  // The fisttp instructions require SSE3.
894
  void fisttp_s(const Operand& adr);
895
  void fisttp_d(const Operand& adr);
896

897 898
  void fabs();
  void fchs();
899 900
  void fcos();
  void fsin();
901
  void fptan();
902
  void fyl2x();
903 904 905
  void f2xm1();
  void fscale();
  void fninit();
906 907

  void fadd(int i);
908
  void fadd_i(int i);
909
  void fsub(int i);
910
  void fsub_i(int i);
911
  void fmul(int i);
912
  void fmul_i(int i);
913
  void fdiv(int i);
914
  void fdiv_i(int i);
915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932

  void fisub_s(const Operand& adr);

  void faddp(int i = 1);
  void fsubp(int i = 1);
  void fsubrp(int i = 1);
  void fmulp(int i = 1);
  void fdivp(int i = 1);
  void fprem();
  void fprem1();

  void fxch(int i = 1);
  void fincstp();
  void ffree(int i = 0);

  void ftst();
  void fucomp(int i);
  void fucompp();
933
  void fucomi(int i);
934
  void fucomip();
935 936 937
  void fcompp();
  void fnstsw_ax();
  void fwait();
938
  void fnclex();
939 940 941 942

  void frndint();

  void sahf();
943
  void setcc(Condition cc, Register reg);
944 945 946

  void cpuid();

947
  // SSE instructions
948 949 950 951 952 953 954 955
  void addss(XMMRegister dst, XMMRegister src) { addss(dst, Operand(src)); }
  void addss(XMMRegister dst, const Operand& src);
  void subss(XMMRegister dst, XMMRegister src) { subss(dst, Operand(src)); }
  void subss(XMMRegister dst, const Operand& src);
  void mulss(XMMRegister dst, XMMRegister src) { mulss(dst, Operand(src)); }
  void mulss(XMMRegister dst, const Operand& src);
  void divss(XMMRegister dst, XMMRegister src) { divss(dst, Operand(src)); }
  void divss(XMMRegister dst, const Operand& src);
956 957
  void sqrtss(XMMRegister dst, XMMRegister src) { sqrtss(dst, Operand(src)); }
  void sqrtss(XMMRegister dst, const Operand& src);
958 959 960

  void ucomiss(XMMRegister dst, XMMRegister src) { ucomiss(dst, Operand(src)); }
  void ucomiss(XMMRegister dst, const Operand& src);
961
  void movaps(XMMRegister dst, XMMRegister src);
962 963 964
  void movups(XMMRegister dst, XMMRegister src);
  void movups(XMMRegister dst, const Operand& src);
  void movups(const Operand& dst, XMMRegister src);
965 966
  void shufps(XMMRegister dst, XMMRegister src, byte imm8);

967 968 969 970 971
  void maxss(XMMRegister dst, XMMRegister src) { maxss(dst, Operand(src)); }
  void maxss(XMMRegister dst, const Operand& src);
  void minss(XMMRegister dst, XMMRegister src) { minss(dst, Operand(src)); }
  void minss(XMMRegister dst, const Operand& src);

972 973 974 975 976 977 978 979 980 981 982 983 984 985 986
  void andps(XMMRegister dst, const Operand& src);
  void andps(XMMRegister dst, XMMRegister src) { andps(dst, Operand(src)); }
  void xorps(XMMRegister dst, const Operand& src);
  void xorps(XMMRegister dst, XMMRegister src) { xorps(dst, Operand(src)); }
  void orps(XMMRegister dst, const Operand& src);
  void orps(XMMRegister dst, XMMRegister src) { orps(dst, Operand(src)); }

  void addps(XMMRegister dst, const Operand& src);
  void addps(XMMRegister dst, XMMRegister src) { addps(dst, Operand(src)); }
  void subps(XMMRegister dst, const Operand& src);
  void subps(XMMRegister dst, XMMRegister src) { subps(dst, Operand(src)); }
  void mulps(XMMRegister dst, const Operand& src);
  void mulps(XMMRegister dst, XMMRegister src) { mulps(dst, Operand(src)); }
  void divps(XMMRegister dst, const Operand& src);
  void divps(XMMRegister dst, XMMRegister src) { divps(dst, Operand(src)); }
987

988 989
  // SSE2 instructions
  void cvttss2si(Register dst, const Operand& src);
990 991 992
  void cvttss2si(Register dst, XMMRegister src) {
    cvttss2si(dst, Operand(src));
  }
993
  void cvttsd2si(Register dst, const Operand& src);
994 995 996
  void cvttsd2si(Register dst, XMMRegister src) {
    cvttsd2si(dst, Operand(src));
  }
997
  void cvtsd2si(Register dst, XMMRegister src);
998

999 1000
  void cvtsi2ss(XMMRegister dst, Register src) { cvtsi2ss(dst, Operand(src)); }
  void cvtsi2ss(XMMRegister dst, const Operand& src);
1001
  void cvtsi2sd(XMMRegister dst, Register src) { cvtsi2sd(dst, Operand(src)); }
1002
  void cvtsi2sd(XMMRegister dst, const Operand& src);
1003 1004 1005 1006
  void cvtss2sd(XMMRegister dst, const Operand& src);
  void cvtss2sd(XMMRegister dst, XMMRegister src) {
    cvtss2sd(dst, Operand(src));
  }
1007 1008 1009 1010
  void cvtsd2ss(XMMRegister dst, const Operand& src);
  void cvtsd2ss(XMMRegister dst, XMMRegister src) {
    cvtsd2ss(dst, Operand(src));
  }
1011
  void addsd(XMMRegister dst, XMMRegister src) { addsd(dst, Operand(src)); }
1012
  void addsd(XMMRegister dst, const Operand& src);
1013
  void subsd(XMMRegister dst, XMMRegister src) { subsd(dst, Operand(src)); }
1014
  void subsd(XMMRegister dst, const Operand& src);
1015
  void mulsd(XMMRegister dst, XMMRegister src) { mulsd(dst, Operand(src)); }
1016
  void mulsd(XMMRegister dst, const Operand& src);
1017 1018
  void divsd(XMMRegister dst, XMMRegister src) { divsd(dst, Operand(src)); }
  void divsd(XMMRegister dst, const Operand& src);
1019
  void xorpd(XMMRegister dst, XMMRegister src);
1020
  void sqrtsd(XMMRegister dst, XMMRegister src) { sqrtsd(dst, Operand(src)); }
1021
  void sqrtsd(XMMRegister dst, const Operand& src);
1022

1023
  void andpd(XMMRegister dst, XMMRegister src);
1024
  void orpd(XMMRegister dst, XMMRegister src);
1025

1026
  void ucomisd(XMMRegister dst, XMMRegister src) { ucomisd(dst, Operand(src)); }
1027
  void ucomisd(XMMRegister dst, const Operand& src);
1028

1029
  void roundss(XMMRegister dst, XMMRegister src, RoundingMode mode);
1030 1031
  void roundsd(XMMRegister dst, XMMRegister src, RoundingMode mode);

1032
  void movmskpd(Register dst, XMMRegister src);
1033
  void movmskps(Register dst, XMMRegister src);
1034

1035
  void cmpltsd(XMMRegister dst, XMMRegister src);
1036
  void pcmpeqd(XMMRegister dst, XMMRegister src);
1037

1038 1039 1040
  void punpckldq(XMMRegister dst, XMMRegister src);
  void punpckhdq(XMMRegister dst, XMMRegister src);

1041 1042 1043 1044 1045
  void maxsd(XMMRegister dst, XMMRegister src) { maxsd(dst, Operand(src)); }
  void maxsd(XMMRegister dst, const Operand& src);
  void minsd(XMMRegister dst, XMMRegister src) { minsd(dst, Operand(src)); }
  void minsd(XMMRegister dst, const Operand& src);

1046 1047 1048 1049
  void movdqa(XMMRegister dst, const Operand& src);
  void movdqa(const Operand& dst, XMMRegister src);
  void movdqu(XMMRegister dst, const Operand& src);
  void movdqu(const Operand& dst, XMMRegister src);
1050 1051 1052 1053 1054 1055 1056
  void movdq(bool aligned, XMMRegister dst, const Operand& src) {
    if (aligned) {
      movdqa(dst, src);
    } else {
      movdqu(dst, src);
    }
  }
1057

1058
  void movd(XMMRegister dst, Register src) { movd(dst, Operand(src)); }
1059
  void movd(XMMRegister dst, const Operand& src);
1060 1061
  void movd(Register dst, XMMRegister src) { movd(Operand(dst), src); }
  void movd(const Operand& dst, XMMRegister src);
1062
  void movsd(XMMRegister dst, XMMRegister src) { movsd(dst, Operand(src)); }
1063 1064 1065
  void movsd(XMMRegister dst, const Operand& src);
  void movsd(const Operand& dst, XMMRegister src);

1066

1067
  void movss(XMMRegister dst, const Operand& src);
1068
  void movss(const Operand& dst, XMMRegister src);
1069
  void movss(XMMRegister dst, XMMRegister src) { movss(dst, Operand(src)); }
1070
  void extractps(Register dst, XMMRegister src, byte imm8);
1071

1072
  void pand(XMMRegister dst, XMMRegister src);
1073
  void pxor(XMMRegister dst, XMMRegister src);
1074
  void por(XMMRegister dst, XMMRegister src);
1075 1076
  void ptest(XMMRegister dst, XMMRegister src);

1077 1078
  void pslld(XMMRegister reg, int8_t shift);
  void psrld(XMMRegister reg, int8_t shift);
1079
  void psllq(XMMRegister reg, int8_t shift);
1080 1081 1082
  void psllq(XMMRegister dst, XMMRegister src);
  void psrlq(XMMRegister reg, int8_t shift);
  void psrlq(XMMRegister dst, XMMRegister src);
1083
  void pshufd(XMMRegister dst, XMMRegister src, uint8_t shuffle);
1084 1085 1086
  void pextrd(Register dst, XMMRegister src, int8_t offset) {
    pextrd(Operand(dst), src, offset);
  }
1087
  void pextrd(const Operand& dst, XMMRegister src, int8_t offset);
1088 1089 1090
  void pinsrd(XMMRegister dst, Register src, int8_t offset) {
    pinsrd(dst, Operand(src), offset);
  }
1091
  void pinsrd(XMMRegister dst, const Operand& src, int8_t offset);
1092

1093
  // AVX instructions
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241
  void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmadd132sd(dst, src1, Operand(src2));
  }
  void vfmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmadd213sd(dst, src1, Operand(src2));
  }
  void vfmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmadd231sd(dst, src1, Operand(src2));
  }
  void vfmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0x99, dst, src1, src2);
  }
  void vfmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0xa9, dst, src1, src2);
  }
  void vfmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0xb9, dst, src1, src2);
  }
  void vfmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmsub132sd(dst, src1, Operand(src2));
  }
  void vfmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmsub213sd(dst, src1, Operand(src2));
  }
  void vfmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmsub231sd(dst, src1, Operand(src2));
  }
  void vfmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0x9b, dst, src1, src2);
  }
  void vfmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0xab, dst, src1, src2);
  }
  void vfmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0xbb, dst, src1, src2);
  }
  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmadd132sd(dst, src1, Operand(src2));
  }
  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmadd213sd(dst, src1, Operand(src2));
  }
  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmadd231sd(dst, src1, Operand(src2));
  }
  void vfnmadd132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0x9d, dst, src1, src2);
  }
  void vfnmadd213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0xad, dst, src1, src2);
  }
  void vfnmadd231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0xbd, dst, src1, src2);
  }
  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmsub132sd(dst, src1, Operand(src2));
  }
  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmsub213sd(dst, src1, Operand(src2));
  }
  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmsub231sd(dst, src1, Operand(src2));
  }
  void vfnmsub132sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0x9f, dst, src1, src2);
  }
  void vfnmsub213sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0xaf, dst, src1, src2);
  }
  void vfnmsub231sd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmasd(0xbf, dst, src1, src2);
  }
  void vfmasd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);

  void vfmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmadd132ss(dst, src1, Operand(src2));
  }
  void vfmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmadd213ss(dst, src1, Operand(src2));
  }
  void vfmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmadd231ss(dst, src1, Operand(src2));
  }
  void vfmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0x99, dst, src1, src2);
  }
  void vfmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0xa9, dst, src1, src2);
  }
  void vfmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0xb9, dst, src1, src2);
  }
  void vfmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmsub132ss(dst, src1, Operand(src2));
  }
  void vfmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmsub213ss(dst, src1, Operand(src2));
  }
  void vfmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfmsub231ss(dst, src1, Operand(src2));
  }
  void vfmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0x9b, dst, src1, src2);
  }
  void vfmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0xab, dst, src1, src2);
  }
  void vfmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0xbb, dst, src1, src2);
  }
  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmadd132ss(dst, src1, Operand(src2));
  }
  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmadd213ss(dst, src1, Operand(src2));
  }
  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmadd231ss(dst, src1, Operand(src2));
  }
  void vfnmadd132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0x9d, dst, src1, src2);
  }
  void vfnmadd213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0xad, dst, src1, src2);
  }
  void vfnmadd231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0xbd, dst, src1, src2);
  }
  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmsub132ss(dst, src1, Operand(src2));
  }
  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmsub213ss(dst, src1, Operand(src2));
  }
  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vfnmsub231ss(dst, src1, Operand(src2));
  }
  void vfnmsub132ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0x9f, dst, src1, src2);
  }
  void vfnmsub213ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0xaf, dst, src1, src2);
  }
  void vfnmsub231ss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vfmass(0xbf, dst, src1, src2);
  }
  void vfmass(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);

1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265
  void vaddsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vaddsd(dst, src1, Operand(src2));
  }
  void vaddsd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vsd(0x58, dst, src1, src2);
  }
  void vsubsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vsubsd(dst, src1, Operand(src2));
  }
  void vsubsd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vsd(0x5c, dst, src1, src2);
  }
  void vmulsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vmulsd(dst, src1, Operand(src2));
  }
  void vmulsd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vsd(0x59, dst, src1, src2);
  }
  void vdivsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vdivsd(dst, src1, Operand(src2));
  }
  void vdivsd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vsd(0x5e, dst, src1, src2);
  }
1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277
  void vmaxsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vmaxsd(dst, src1, Operand(src2));
  }
  void vmaxsd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vsd(0x5f, dst, src1, src2);
  }
  void vminsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vminsd(dst, src1, Operand(src2));
  }
  void vminsd(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vsd(0x5d, dst, src1, src2);
  }
1278 1279
  void vsd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);

1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317
  void vaddss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vaddss(dst, src1, Operand(src2));
  }
  void vaddss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vss(0x58, dst, src1, src2);
  }
  void vsubss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vsubss(dst, src1, Operand(src2));
  }
  void vsubss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vss(0x5c, dst, src1, src2);
  }
  void vmulss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vmulss(dst, src1, Operand(src2));
  }
  void vmulss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vss(0x59, dst, src1, src2);
  }
  void vdivss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vdivss(dst, src1, Operand(src2));
  }
  void vdivss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vss(0x5e, dst, src1, src2);
  }
  void vmaxss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vmaxss(dst, src1, Operand(src2));
  }
  void vmaxss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vss(0x5f, dst, src1, src2);
  }
  void vminss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
    vminss(dst, src1, Operand(src2));
  }
  void vminss(XMMRegister dst, XMMRegister src1, const Operand& src2) {
    vss(0x5d, dst, src1, src2);
  }
  void vss(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);

1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
  // BMI instruction
  void andn(Register dst, Register src1, Register src2) {
    andn(dst, src1, Operand(src2));
  }
  void andn(Register dst, Register src1, const Operand& src2) {
    bmi1(0xf2, dst, src1, src2);
  }
  void bextr(Register dst, Register src1, Register src2) {
    bextr(dst, Operand(src1), src2);
  }
  void bextr(Register dst, const Operand& src1, Register src2) {
    bmi1(0xf7, dst, src2, src1);
  }
  void blsi(Register dst, Register src) { blsi(dst, Operand(src)); }
  void blsi(Register dst, const Operand& src) {
    Register ireg = {3};
    bmi1(0xf3, ireg, dst, src);
  }
  void blsmsk(Register dst, Register src) { blsmsk(dst, Operand(src)); }
  void blsmsk(Register dst, const Operand& src) {
    Register ireg = {2};
    bmi1(0xf3, ireg, dst, src);
  }
  void blsr(Register dst, Register src) { blsr(dst, Operand(src)); }
  void blsr(Register dst, const Operand& src) {
    Register ireg = {1};
    bmi1(0xf3, ireg, dst, src);
  }
  void tzcnt(Register dst, Register src) { tzcnt(dst, Operand(src)); }
  void tzcnt(Register dst, const Operand& src);

  void lzcnt(Register dst, Register src) { lzcnt(dst, Operand(src)); }
  void lzcnt(Register dst, const Operand& src);

  void popcnt(Register dst, Register src) { popcnt(dst, Operand(src)); }
  void popcnt(Register dst, const Operand& src);

  void bzhi(Register dst, Register src1, Register src2) {
    bzhi(dst, Operand(src1), src2);
  }
  void bzhi(Register dst, const Operand& src1, Register src2) {
    bmi2(kNone, 0xf5, dst, src2, src1);
  }
  void mulx(Register dst1, Register dst2, Register src) {
    mulx(dst1, dst2, Operand(src));
  }
  void mulx(Register dst1, Register dst2, const Operand& src) {
    bmi2(kF2, 0xf6, dst1, dst2, src);
  }
  void pdep(Register dst, Register src1, Register src2) {
    pdep(dst, src1, Operand(src2));
  }
  void pdep(Register dst, Register src1, const Operand& src2) {
    bmi2(kF2, 0xf5, dst, src1, src2);
  }
  void pext(Register dst, Register src1, Register src2) {
    pext(dst, src1, Operand(src2));
  }
  void pext(Register dst, Register src1, const Operand& src2) {
    bmi2(kF3, 0xf5, dst, src1, src2);
  }
  void sarx(Register dst, Register src1, Register src2) {
    sarx(dst, Operand(src1), src2);
  }
  void sarx(Register dst, const Operand& src1, Register src2) {
    bmi2(kF3, 0xf7, dst, src2, src1);
  }
  void shlx(Register dst, Register src1, Register src2) {
    shlx(dst, Operand(src1), src2);
  }
  void shlx(Register dst, const Operand& src1, Register src2) {
    bmi2(k66, 0xf7, dst, src2, src1);
  }
  void shrx(Register dst, Register src1, Register src2) {
    shrx(dst, Operand(src1), src2);
  }
  void shrx(Register dst, const Operand& src1, Register src2) {
    bmi2(kF2, 0xf7, dst, src2, src1);
  }
  void rorx(Register dst, Register src, byte imm8) {
    rorx(dst, Operand(src), imm8);
  }
  void rorx(Register dst, const Operand& src, byte imm8);

1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425
#define PACKED_OP_LIST(V) \
  V(and, 0x54)            \
  V(xor, 0x57)

#define AVX_PACKED_OP_DECLARE(name, opcode)                                  \
  void v##name##ps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
    vps(opcode, dst, src1, Operand(src2));                                   \
  }                                                                          \
  void v##name##ps(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
    vps(opcode, dst, src1, src2);                                            \
  }                                                                          \
  void v##name##pd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {    \
    vpd(opcode, dst, src1, Operand(src2));                                   \
  }                                                                          \
  void v##name##pd(XMMRegister dst, XMMRegister src1, const Operand& src2) { \
    vpd(opcode, dst, src1, src2);                                            \
  }

  PACKED_OP_LIST(AVX_PACKED_OP_DECLARE);
  void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
  void vps(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);
  void vpd(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
  void vpd(byte op, XMMRegister dst, XMMRegister src1, const Operand& src2);

1426 1427 1428 1429 1430 1431
  // Prefetch src position into cache level.
  // Level 1, 2 or 3 specifies CPU cache level. Level 0 specifies a
  // non-temporal
  void prefetch(const Operand& src, int level);
  // TODO(lrn): Need SFENCE for movnt?

1432
  // Check the code size generated from label to here.
1433 1434 1435
  int SizeOfCodeGeneratedSince(Label* label) {
    return pc_offset() - label->pos();
  }
1436

1437
  // Mark address of a debug break slot.
1438
  void RecordDebugBreakSlot(RelocInfo::Mode mode);
1439

1440
  // Record a comment relocation entry that can be used by a disassembler.
1441 1442
  // Use --code-comments to enable.
  void RecordComment(const char* msg);
1443

1444 1445
  // Record a deoptimization reason that can be used by a log or cpu profiler.
  // Use --trace-deopt to enable.
1446 1447
  void RecordDeoptReason(DeoptimizeReason reason, SourcePosition position,
                         int id);
1448

1449 1450 1451 1452
  // Writes a single byte or word of data in the code stream.  Used for
  // inline tables, e.g., jump-tables.
  void db(uint8_t data);
  void dd(uint32_t data);
1453 1454
  void dq(uint64_t data);
  void dp(uintptr_t data) { dd(data); }
1455
  void dd(Label* label);
1456

1457 1458 1459
  // Check if there is less than kGap bytes available in the buffer.
  // If this is the case, we need to grow the buffer before emitting
  // an instruction or relocation information.
1460 1461 1462
  inline bool buffer_overflow() const {
    return pc_ >= reloc_info_writer.pos() - kGap;
  }
1463 1464 1465 1466

  // Get the number of bytes available in the buffer.
  inline int available_space() const { return reloc_info_writer.pos() - pc_; }

1467
  static bool IsNop(Address addr);
1468

1469 1470 1471 1472
  int relocation_writer_size() {
    return (buffer_ + buffer_size_) - reloc_info_writer.pos();
  }

1473 1474 1475
  // Avoid overflows for displacements etc.
  static const int kMaximalBufferSize = 512*MB;

1476
  byte byte_at(int pos) { return buffer_[pos]; }
1477 1478
  void set_byte_at(int pos, byte value) { buffer_[pos] = value; }

1479 1480 1481 1482 1483 1484
  void PatchConstantPoolAccessInstruction(int pc_offset, int offset,
                                          ConstantPoolEntry::Access access,
                                          ConstantPoolEntry::Type type) {
    // No embedded constant pool support.
    UNREACHABLE();
  }
1485

1486 1487 1488
 protected:
  void emit_sse_operand(XMMRegister reg, const Operand& adr);
  void emit_sse_operand(XMMRegister dst, XMMRegister src);
1489
  void emit_sse_operand(Register dst, XMMRegister src);
1490
  void emit_sse_operand(XMMRegister dst, Register src);
1491

1492 1493
  byte* addr_at(int pos) { return buffer_ + pos; }

1494

1495
 private:
1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506
  uint32_t long_at(int pos)  {
    return *reinterpret_cast<uint32_t*>(addr_at(pos));
  }
  void long_at_put(int pos, uint32_t x)  {
    *reinterpret_cast<uint32_t*>(addr_at(pos)) = x;
  }

  // code emission
  void GrowBuffer();
  inline void emit(uint32_t x);
  inline void emit(Handle<Object> handle);
1507 1508
  inline void emit(uint32_t x,
                   RelocInfo::Mode rmode,
1509
                   TypeFeedbackId id = TypeFeedbackId::None());
1510 1511 1512
  inline void emit(Handle<Code> code,
                   RelocInfo::Mode rmode,
                   TypeFeedbackId id = TypeFeedbackId::None());
1513
  inline void emit(const Immediate& x);
1514
  inline void emit_b(Immediate x);
1515
  inline void emit_w(const Immediate& x);
1516
  inline void emit_q(uint64_t x);
1517

1518 1519 1520
  // Emit the code-object-relative offset of the label's position
  inline void emit_code_relative_offset(Label* label);

1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531
  // instruction generation
  void emit_arith_b(int op1, int op2, Register dst, int imm8);

  // Emit a basic arithmetic instruction (i.e. first byte of the family is 0x81)
  // with a given destination expression and an immediate operand.  It attempts
  // to use the shortest encoding possible.
  // sel specifies the /n in the modrm byte (see the Intel PRM).
  void emit_arith(int sel, Operand dst, const Immediate& x);

  void emit_operand(Register reg, const Operand& adr);

1532 1533
  void emit_label(Label* label);

1534 1535
  void emit_farith(int b1, int b2, int i);

1536 1537
  // Emit vex prefix
  enum SIMDPrefix { kNone = 0x0, k66 = 0x1, kF3 = 0x2, kF2 = 0x3 };
1538
  enum VectorLength { kL128 = 0x0, kL256 = 0x4, kLIG = kL128, kLZ = kL128 };
1539
  enum VexW { kW0 = 0x0, kW1 = 0x80, kWIG = kW0 };
1540
  enum LeadingOpcode { k0F = 0x1, k0F38 = 0x2, k0F3A = 0x3 };
1541 1542
  inline void emit_vex_prefix(XMMRegister v, VectorLength l, SIMDPrefix pp,
                              LeadingOpcode m, VexW w);
1543 1544
  inline void emit_vex_prefix(Register v, VectorLength l, SIMDPrefix pp,
                              LeadingOpcode m, VexW w);
1545

1546 1547 1548 1549
  // labels
  void print(Label* L);
  void bind_to(Label* L, int pos);

1550 1551 1552 1553
  // displacements
  inline Displacement disp_at(Label* L);
  inline void disp_at_put(Label* L, Displacement disp);
  inline void emit_disp(Label* L, Displacement::Type type);
1554
  inline void emit_near_disp(Label* L);
1555

1556 1557 1558 1559 1560
  // Most BMI instructions are similiar.
  void bmi1(byte op, Register reg, Register vreg, const Operand& rm);
  void bmi2(SIMDPrefix pp, byte op, Register reg, Register vreg,
            const Operand& rm);

1561
  // record reloc info for current pc_
1562
  void RecordRelocInfo(RelocInfo::Mode rmode, intptr_t data = 0);
1563 1564 1565

  friend class CodePatcher;
  friend class EnsureSpace;
1566

1567 1568 1569 1570 1571
  // Internal reference positions, required for (potential) patching in
  // GrowBuffer(); contains only those internal references whose labels
  // are already bound.
  std::deque<int> internal_reference_positions_;

1572 1573
  // code generation
  RelocInfoWriter reloc_info_writer;
1574 1575 1576 1577 1578 1579 1580 1581 1582 1583
};


// Helper class that ensures that there is enough space for generating
// instructions and relocation information.  The constructor makes
// sure that there is enough space and (in debug mode) the destructor
// checks that we did not generate too much.
class EnsureSpace BASE_EMBEDDED {
 public:
  explicit EnsureSpace(Assembler* assembler) : assembler_(assembler) {
1584
    if (assembler_->buffer_overflow()) assembler_->GrowBuffer();
1585 1586 1587 1588 1589 1590 1591 1592
#ifdef DEBUG
    space_before_ = assembler_->available_space();
#endif
  }

#ifdef DEBUG
  ~EnsureSpace() {
    int bytes_generated = space_before_ - assembler_->available_space();
1593
    DCHECK(bytes_generated < assembler_->kGap);
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603
  }
#endif

 private:
  Assembler* assembler_;
#ifdef DEBUG
  int space_before_;
#endif
};

1604 1605
}  // namespace internal
}  // namespace v8
1606

1607
#endif  // V8_IA32_ASSEMBLER_IA32_H_