[Turbofan] Add concept of FP register aliasing on ARM 32.

- Modifies RegisterConfiguration to specify complex aliasing on ARM 32. - Modifies RegisterAllocator to consider aliasing. - Modifies ParallelMove::PrepareInsertAfter to handle aliasing. - Modifies GapResolver to split wider register moves when interference with smaller moves is detected. - Modifies MoveOptimizer to handle aliasing. - Adds ARM 32 macro-assembler pseudo move instructions to handle cases where split moves don't correspond to actual s-registers. - Modifies CodeGenerator::AssembleMove and AssembleSwap to handle moves of different widths, and moves involving pseudo-s-registers. - Adds unit tests for FP operand interference checking and PrepareInsertAfter. - Adds more tests of FP for the move optimizer and register allocator. LOG=N BUG=v8:4124 Review-Url: https://codereview.chromium.org/2410673002 Cr-Commit-Position: refs/heads/master@{#40597}

[Turbofan] Add concept of FP register aliasing on ARM 32.
- Modifies RegisterConfiguration to specify complex aliasing on ARM 32. - Modifies RegisterAllocator to consider aliasing. - Modifies ParallelMove::PrepareInsertAfter to handle aliasing. - Modifies GapResolver to split wider register moves when interference with smaller moves is detected. - Modifies MoveOptimizer to handle aliasing. - Adds ARM 32 macro-assembler pseudo move instructions to handle cases where split moves don't correspond to actual s-registers. - Modifies CodeGenerator::AssembleMove and AssembleSwap to handle moves of different widths, and moves involving pseudo-s-registers. - Adds unit tests for FP operand interference checking and PrepareInsertAfter. - Adds more tests of FP for the move optimizer and register allocator. LOG=N BUG=v8:4124 Review-Url: https://codereview.chromium.org/2410673002 Cr-Commit-Position: refs/heads/master@{#40597}
09ab8e6a · bbudge · Commit bot · f6c3fd0a · 09ab8e6a · 09ab8e6a
Commit 09ab8e6a authored Oct 26, 2016 by bbudge Committed by Commit bot Oct 26, 2016
25 changed files
--- a/src/arm/macro-assembler-arm.cc
+++ b/src/arm/macro-assembler-arm.cc
@@ -1051,6 +1051,69 @@ void MacroAssembler::VmovLow(DwVfpRegister dst, Register src) {
  }
 }

+void MacroAssembler::VmovExtended(Register dst, int src_code) {
+  DCHECK_LE(32, src_code);
+  DCHECK_GT(64, src_code);
+  if (src_code & 0x1) {
+    VmovHigh(dst, DwVfpRegister::from_code(src_code / 2));
+  } else {
+    VmovLow(dst, DwVfpRegister::from_code(src_code / 2));
+  }
+}
+
+void MacroAssembler::VmovExtended(int dst_code, Register src) {
+  DCHECK_LE(32, dst_code);
+  DCHECK_GT(64, dst_code);
+  if (dst_code & 0x1) {
+    VmovHigh(DwVfpRegister::from_code(dst_code / 2), src);
+  } else {
+    VmovLow(DwVfpRegister::from_code(dst_code / 2), src);
+  }
+}
+
+void MacroAssembler::VmovExtended(int dst_code, int src_code,
+                                  Register scratch) {
+  if (src_code < 32 && dst_code < 32) {
+    // src and dst are both s-registers.
+    vmov(SwVfpRegister::from_code(dst_code),
+         SwVfpRegister::from_code(src_code));
+  } else if (src_code < 32) {
+    // src is an s-register.
+    vmov(scratch, SwVfpRegister::from_code(src_code));
+    VmovExtended(dst_code, scratch);
+  } else if (dst_code < 32) {
+    // dst is an s-register.
+    VmovExtended(scratch, src_code);
+    vmov(SwVfpRegister::from_code(dst_code), scratch);
+  } else {
+    // Neither src or dst are s-registers.
+    DCHECK_GT(64, src_code);
+    DCHECK_GT(64, dst_code);
+    VmovExtended(scratch, src_code);
+    VmovExtended(dst_code, scratch);
+  }
+}
+
+void MacroAssembler::VmovExtended(int dst_code, const MemOperand& src,
+                                  Register scratch) {
+  if (dst_code >= 32) {
+    ldr(scratch, src);
+    VmovExtended(dst_code, scratch);
+  } else {
+    vldr(SwVfpRegister::from_code(dst_code), src);
+  }
+}
+
+void MacroAssembler::VmovExtended(const MemOperand& dst, int src_code,
+                                  Register scratch) {
+  if (src_code >= 32) {
+    VmovExtended(scratch, src_code);
+    str(scratch, dst);
+  } else {
+    vstr(SwVfpRegister::from_code(src_code), dst);
+  }
+}
+
 void MacroAssembler::LslPair(Register dst_low, Register dst_high,
                             Register src_low, Register src_high,
                             Register scratch, Register shift) {

--- a/src/arm/macro-assembler-arm.h
+++ b/src/arm/macro-assembler-arm.h
@@ -549,6 +549,14 @@ class MacroAssembler: public Assembler {
  void VmovLow(Register dst, DwVfpRegister src);
  void VmovLow(DwVfpRegister dst, Register src);

+  // Simulate s-register moves for imaginary s32 - s63 registers.
+  void VmovExtended(Register dst, int src_code);
+  void VmovExtended(int dst_code, Register src);
+  // Move between s-registers and imaginary s-registers.
+  void VmovExtended(int dst_code, int src_code, Register scratch);
+  void VmovExtended(int dst_code, const MemOperand& src, Register scratch);
+  void VmovExtended(const MemOperand& dst, int src_code, Register scratch);
+
  void LslPair(Register dst_low, Register dst_high, Register src_low,
               Register src_high, Register scratch, Register shift);
  void LslPair(Register dst_low, Register dst_high, Register src_low,

--- a/src/compiler/arm/code-generator-arm.cc
+++ b/src/compiler/arm/code-generator-arm.cc
--- a/src/compiler/gap-resolver.cc
+++ b/src/compiler/gap-resolver.cc
--- a/src/compiler/gap-resolver.h
+++ b/src/compiler/gap-resolver.h
@@ -26,18 +26,24 @@ class GapResolver final {
                              InstructionOperand* destination) = 0;
  };

-  explicit GapResolver(Assembler* assembler) : assembler_(assembler) {}
+  explicit GapResolver(Assembler* assembler)
+      : assembler_(assembler), split_rep_(MachineRepresentation::kSimd128) {}

  // Resolve a set of parallel moves, emitting assembler instructions.
-  void Resolve(ParallelMove* parallel_move) const;
+  void Resolve(ParallelMove* parallel_move);

 private:
-  // Perform the given move, possibly requiring other moves to satisfy
-  // dependencies.
-  void PerformMove(ParallelMove* moves, MoveOperands* move) const;
+  // Performs the given move, possibly performing other moves to unblock the
+  // destination operand.
+  void PerformMove(ParallelMove* moves, MoveOperands* move);

  // Assembler used to emit moves and save registers.
  Assembler* const assembler_;
+
+  // While resolving moves, the largest FP representation that can be moved.
+  // Any larger moves must be split into an equivalent series of moves of this
+  // representation.
+  MachineRepresentation split_rep_;
 };

 }  // namespace compiler

--- a/src/compiler/instruction.cc
+++ b/src/compiler/instruction.cc
@@ -64,8 +64,35 @@ FlagsCondition CommuteFlagsCondition(FlagsCondition condition) {
  return condition;
 }

-bool InstructionOperand::InterferesWith(const InstructionOperand& that) const {
-  return EqualsCanonicalized(that);
+bool InstructionOperand::InterferesWith(const InstructionOperand& other) const {
+  if (kSimpleFPAliasing || !this->IsFPLocationOperand() ||
+      !other.IsFPLocationOperand())
+    return EqualsCanonicalized(other);
+  // Aliasing is complex and both operands are fp locations.
+  const LocationOperand& loc = *LocationOperand::cast(this);
+  const LocationOperand& other_loc = LocationOperand::cast(other);
+  LocationOperand::LocationKind kind = loc.location_kind();
+  LocationOperand::LocationKind other_kind = other_loc.location_kind();
+  if (kind != other_kind) return false;
+  MachineRepresentation rep = loc.representation();
+  MachineRepresentation other_rep = other_loc.representation();
+  if (rep == other_rep) return EqualsCanonicalized(other);
+  if (kind == LocationOperand::REGISTER) {
+    // FP register-register interference.
+    return GetRegConfig()->AreAliases(rep, loc.register_code(), other_rep,
+                                      other_loc.register_code());
+  } else {
+    // FP slot-slot interference. Slots of different FP reps can alias because
+    // the gap resolver may break a move into 2 or 4 equivalent smaller moves.
+    DCHECK_EQ(LocationOperand::STACK_SLOT, kind);
+    int index_hi = loc.index();
+    int index_lo = index_hi - (1 << ElementSizeLog2Of(rep)) / kPointerSize + 1;
+    int other_index_hi = other_loc.index();
+    int other_index_lo =
+        other_index_hi - (1 << ElementSizeLog2Of(other_rep)) / kPointerSize + 1;
+    return other_index_hi >= index_lo && index_hi >= other_index_lo;
+  }
+  return false;
 }

 void InstructionOperand::Print(const RegisterConfiguration* config) const {
@@ -232,28 +259,31 @@ bool ParallelMove::IsRedundant() const {
  return true;
 }

-
-MoveOperands* ParallelMove::PrepareInsertAfter(MoveOperands* move) const {
+void ParallelMove::PrepareInsertAfter(
+    MoveOperands* move, ZoneVector<MoveOperands*>* to_eliminate) const {
+  bool no_aliasing =
+      kSimpleFPAliasing || !move->destination().IsFPLocationOperand();
  MoveOperands* replacement = nullptr;
-  MoveOperands* to_eliminate = nullptr;
+  MoveOperands* eliminated = nullptr;
  for (MoveOperands* curr : *this) {
    if (curr->IsEliminated()) continue;
    if (curr->destination().EqualsCanonicalized(move->source())) {
+      // We must replace move's source with curr's destination in order to
+      // insert it into this ParallelMove.
      DCHECK(!replacement);
      replacement = curr;
-      if (to_eliminate != nullptr) break;
-    } else if (curr->destination().EqualsCanonicalized(move->destination())) {
-      DCHECK(!to_eliminate);
-      to_eliminate = curr;
-      if (replacement != nullptr) break;
+      if (no_aliasing && eliminated != nullptr) break;
+    } else if (curr->destination().InterferesWith(move->destination())) {
+      // We can eliminate curr, since move overwrites at least a part of its
+      // destination, implying its value is no longer live.
+      eliminated = curr;
+      to_eliminate->push_back(curr);
+      if (no_aliasing && replacement != nullptr) break;
    }
  }
-  DCHECK_IMPLIES(replacement == to_eliminate, replacement == nullptr);
  if (replacement != nullptr) move->set_source(replacement->source());
-  return to_eliminate;
 }

-
 ExplicitOperand::ExplicitOperand(LocationKind kind, MachineRepresentation rep,
                                 int index)
    : LocationOperand(EXPLICIT, kind, rep, index) {

--- a/src/compiler/instruction.h
+++ b/src/compiler/instruction.h
@@ -28,8 +28,7 @@ namespace compiler {
 // Forward declarations.
 class Schedule;

-
-class InstructionOperand {
+class V8_EXPORT_PRIVATE InstructionOperand {
 public:
  static const int kInvalidVirtualRegister = -1;

@@ -119,7 +118,7 @@ class InstructionOperand {
    return this->GetCanonicalizedValue() < that.GetCanonicalizedValue();
  }

-  bool InterferesWith(const InstructionOperand& that) const;
+  bool InterferesWith(const InstructionOperand& other) const;

  // APIs to aid debugging. For general-stream APIs, use operator<<
  void Print(const RegisterConfiguration* config) const;
@@ -641,8 +640,14 @@ uint64_t InstructionOperand::GetCanonicalizedValue() const {
  if (IsAnyLocationOperand()) {
    MachineRepresentation canonical = MachineRepresentation::kNone;
    if (IsFPRegister()) {
-      // We treat all FP register operands the same for simple aliasing.
-      canonical = MachineRepresentation::kFloat64;
+      if (kSimpleFPAliasing) {
+        // We treat all FP register operands the same for simple aliasing.
+        canonical = MachineRepresentation::kFloat64;
+      } else {
+        // We need to distinguish FP register operands of different reps when
+        // aliasing is not simple (e.g. ARM).
+        canonical = LocationOperand::cast(this)->representation();
+      }
    }
    return InstructionOperand::KindField::update(
        LocationOperand::RepresentationField::update(this->value_, canonical),
@@ -659,8 +664,8 @@ struct CompareOperandModuloType {
  }
 };

-
-class MoveOperands final : public ZoneObject {
+class V8_EXPORT_PRIVATE MoveOperands final
+    : public NON_EXPORTED_BASE(ZoneObject) {
 public:
  MoveOperands(const InstructionOperand& source,
               const InstructionOperand& destination)
@@ -685,11 +690,6 @@ class MoveOperands final : public ZoneObject {
  }
  void SetPending() { destination_ = InstructionOperand(); }

-  // True if this move is a move into the given destination operand.
-  bool Blocks(const InstructionOperand& destination) const {
-    return !IsEliminated() && source().InterferesWith(destination);
-  }
-
  // A move is redundant if it's been eliminated or if its source and
  // destination are the same.
  bool IsRedundant() const {
@@ -724,8 +724,9 @@ struct PrintableMoveOperands {

 std::ostream& operator<<(std::ostream& os, const PrintableMoveOperands& mo);

-
-class ParallelMove final : public ZoneVector<MoveOperands*>, public ZoneObject {
+class V8_EXPORT_PRIVATE ParallelMove final
+    : public NON_EXPORTED_BASE(ZoneVector<MoveOperands *>),
+      public NON_EXPORTED_BASE(ZoneObject) {
 public:
  explicit ParallelMove(Zone* zone) : ZoneVector<MoveOperands*>(zone) {
    reserve(4);
@@ -748,9 +749,10 @@ class ParallelMove final : public ZoneVector<MoveOperands*>, public ZoneObject {
  bool IsRedundant() const;

  // Prepare this ParallelMove to insert move as if it happened in a subsequent
-  // ParallelMove.  move->source() may be changed.  The MoveOperand returned
-  // must be Eliminated.
-  MoveOperands* PrepareInsertAfter(MoveOperands* move) const;
+  // ParallelMove.  move->source() may be changed.  Any MoveOperands added to
+  // to_eliminate must be Eliminated.
+  void PrepareInsertAfter(MoveOperands* move,
+                          ZoneVector<MoveOperands*>* to_eliminate) const;

 private:
  DISALLOW_COPY_AND_ASSIGN(ParallelMove);

--- a/src/compiler/move-optimizer.cc
+++ b/src/compiler/move-optimizer.cc
@@ -25,11 +25,79 @@ struct MoveKeyCompare {
 };

 typedef ZoneMap<MoveKey, unsigned, MoveKeyCompare> MoveMap;
-typedef ZoneSet<InstructionOperand, CompareOperandModuloType> OperandSet;

-bool Blocks(const OperandSet& set, const InstructionOperand& operand) {
-  return set.find(operand) != set.end();
-}
+class OperandSet {
+ public:
+  explicit OperandSet(Zone* zone) : set_(zone), fp_reps_(0) {}
+
+  void InsertOp(const InstructionOperand& op) {
+    set_.insert(op);
+    if (!kSimpleFPAliasing && op.IsFPRegister())
+      fp_reps_ |= RepBit(LocationOperand::cast(op).representation());
+  }
+
+  bool ContainsOpOrAlias(const InstructionOperand& op) const {
+    if (set_.find(op) != set_.end()) return true;
+
+    if (!kSimpleFPAliasing && op.IsFPRegister()) {
+      // Platforms where FP registers have complex aliasing need extra checks.
+      const LocationOperand& loc = LocationOperand::cast(op);
+      MachineRepresentation rep = loc.representation();
+      // If haven't encountered mixed rep FP registers, skip the extra checks.
+      if (!HasMixedFPReps(fp_reps_ | RepBit(rep))) return false;
+
+      // Check register against aliasing registers of other FP representations.
+      MachineRepresentation other_rep1, other_rep2;
+      switch (rep) {
+        case MachineRepresentation::kFloat32:
+          other_rep1 = MachineRepresentation::kFloat64;
+          other_rep2 = MachineRepresentation::kSimd128;
+          break;
+        case MachineRepresentation::kFloat64:
+          other_rep1 = MachineRepresentation::kFloat32;
+          other_rep2 = MachineRepresentation::kSimd128;
+          break;
+        case MachineRepresentation::kSimd128:
+          other_rep1 = MachineRepresentation::kFloat32;
+          other_rep2 = MachineRepresentation::kFloat64;
+          break;
+        default:
+          UNREACHABLE();
+          break;
+      }
+      const RegisterConfiguration* config = RegisterConfiguration::Turbofan();
+      int base = -1;
+      int aliases =
+          config->GetAliases(rep, loc.register_code(), other_rep1, &base);
+      DCHECK(aliases > 0 || (aliases == 0 && base == -1));
+      while (aliases--) {
+        if (set_.find(AllocatedOperand(LocationOperand::REGISTER, other_rep1,
+                                       base + aliases)) != set_.end())
+          return true;
+      }
+      aliases = config->GetAliases(rep, loc.register_code(), other_rep2, &base);
+      DCHECK(aliases > 0 || (aliases == 0 && base == -1));
+      while (aliases--) {
+        if (set_.find(AllocatedOperand(LocationOperand::REGISTER, other_rep2,
+                                       base + aliases)) != set_.end())
+          return true;
+      }
+    }
+    return false;
+  }
+
+ private:
+  static int RepBit(MachineRepresentation rep) {
+    return 1 << static_cast<int>(rep);
+  }
+
+  static bool HasMixedFPReps(int reps) {
+    return reps && !base::bits::IsPowerOfTwo32(reps);
+  }
+
+  ZoneSet<InstructionOperand, CompareOperandModuloType> set_;
+  int fp_reps_;
+};

 int FindFirstNonEmptySlot(const Instruction* instr) {
  int i = Instruction::FIRST_GAP_POSITION;
@@ -98,21 +166,21 @@ void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
  // Outputs and temps are treated together as potentially clobbering a
  // destination operand.
  for (size_t i = 0; i < instruction->OutputCount(); ++i) {
-    outputs.insert(*instruction->OutputAt(i));
+    outputs.InsertOp(*instruction->OutputAt(i));
  }
  for (size_t i = 0; i < instruction->TempCount(); ++i) {
-    outputs.insert(*instruction->TempAt(i));
+    outputs.InsertOp(*instruction->TempAt(i));
  }

  // Input operands block elisions.
  for (size_t i = 0; i < instruction->InputCount(); ++i) {
-    inputs.insert(*instruction->InputAt(i));
+    inputs.InsertOp(*instruction->InputAt(i));
  }

  // Elide moves made redundant by the instruction.
  for (MoveOperands* move : *moves) {
-    if (outputs.find(move->destination()) != outputs.end() &&
-        inputs.find(move->destination()) == inputs.end()) {
+    if (outputs.ContainsOpOrAlias(move->destination()) &&
+        !inputs.ContainsOpOrAlias(move->destination())) {
      move->Eliminate();
    }
  }
@@ -121,7 +189,7 @@ void MoveOptimizer::RemoveClobberedDestinations(Instruction* instruction) {
  // the one for its input.
  if (instruction->IsRet() || instruction->IsTailCall()) {
    for (MoveOperands* move : *moves) {
-      if (inputs.find(move->destination()) == inputs.end()) {
+      if (!inputs.ContainsOpOrAlias(move->destination())) {
        move->Eliminate();
      }
    }
@@ -140,7 +208,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
  // If an operand is an input to the instruction, we cannot move assignments
  // where it appears on the LHS.
  for (size_t i = 0; i < from->InputCount(); ++i) {
-    dst_cant_be.insert(*from->InputAt(i));
+    dst_cant_be.InsertOp(*from->InputAt(i));
  }
  // If an operand is output to the instruction, we cannot move assignments
  // where it appears on the RHS, because we would lose its value before the
@@ -149,10 +217,10 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
  // The output can't appear on the LHS because we performed
  // RemoveClobberedDestinations for the "from" instruction.
  for (size_t i = 0; i < from->OutputCount(); ++i) {
-    src_cant_be.insert(*from->OutputAt(i));
+    src_cant_be.InsertOp(*from->OutputAt(i));
  }
  for (size_t i = 0; i < from->TempCount(); ++i) {
-    src_cant_be.insert(*from->TempAt(i));
+    src_cant_be.InsertOp(*from->TempAt(i));
  }
  for (MoveOperands* move : *from_moves) {
    if (move->IsRedundant()) continue;
@@ -160,7 +228,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
    // move "z = dest", because z would become y rather than "V".
    // We assume CompressMoves has happened before this, which means we don't
    // have more than one assignment to dest.
-    src_cant_be.insert(move->destination());
+    src_cant_be.InsertOp(move->destination());
  }

  ZoneSet<MoveKey, MoveKeyCompare> move_candidates(local_zone());
@@ -168,7 +236,7 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
  // destination operands are eligible for being moved down.
  for (MoveOperands* move : *from_moves) {
    if (move->IsRedundant()) continue;
-    if (!Blocks(dst_cant_be, move->destination())) {
+    if (!dst_cant_be.ContainsOpOrAlias(move->destination())) {
      MoveKey key = {move->source(), move->destination()};
      move_candidates.insert(key);
    }
@@ -183,8 +251,8 @@ void MoveOptimizer::MigrateMoves(Instruction* to, Instruction* from) {
      auto current = iter;
      ++iter;
      InstructionOperand src = current->source;
-      if (Blocks(src_cant_be, src)) {
-        src_cant_be.insert(current->destination);
+      if (src_cant_be.ContainsOpOrAlias(src)) {
+        src_cant_be.InsertOp(current->destination);
        move_candidates.erase(current);
        changed = true;
      }
@@ -223,8 +291,7 @@ void MoveOptimizer::CompressMoves(ParallelMove* left, MoveOpVector* right) {
    // merging the two gaps.
    for (MoveOperands* move : *right) {
      if (move->IsRedundant()) continue;
-      MoveOperands* to_eliminate = left->PrepareInsertAfter(move);
-      if (to_eliminate != nullptr) eliminated.push_back(to_eliminate);
+      left->PrepareInsertAfter(move, &eliminated);
    }
    // Eliminate dead moves.
    for (MoveOperands* to_eliminate : eliminated) {
@@ -360,7 +427,7 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
        // there are such moves, we could move them, but the destination of the
        // moves staying behind can't appear as a source of a common move,
        // because the move staying behind will clobber this destination.
-        conflicting_srcs.insert(dest);
+        conflicting_srcs.InsertOp(dest);
        move_map.erase(current);
      }
    }
@@ -374,9 +441,8 @@ void MoveOptimizer::OptimizeMerge(InstructionBlock* block) {
        auto current = iter;
        ++iter;
        DCHECK_EQ(block->PredecessorCount(), current->second);
-        if (conflicting_srcs.find(current->first.source) !=
-            conflicting_srcs.end()) {
-          conflicting_srcs.insert(current->first.destination);
+        if (conflicting_srcs.ContainsOpOrAlias(current->first.source)) {
+          conflicting_srcs.InsertOp(current->first.destination);
          move_map.erase(current);
          changed = true;
        }

--- a/src/compiler/register-allocator.cc
+++ b/src/compiler/register-allocator.cc
--- a/src/compiler/register-allocator.h
+++ b/src/compiler/register-allocator.h
@@ -357,6 +357,11 @@ class V8_EXPORT_PRIVATE LiveRange : public NON_EXPORTED_BASE(ZoneObject) {
  UsePosition* NextUsePositionRegisterIsBeneficial(
      LifetimePosition start) const;

+  // Returns lifetime position for which register is beneficial in this live
+  // range and which follows both start and last processed use position.
+  LifetimePosition NextLifetimePositionRegisterIsBeneficial(
+      const LifetimePosition& start) const;
+
  // Returns use position for which register is beneficial in this live
  // range and which precedes start.
  UsePosition* PreviousUsePositionRegisterIsBeneficial(
@@ -773,12 +778,24 @@ class RegisterAllocationData final : public ZoneObject {
  ZoneVector<TopLevelLiveRange*>& fixed_live_ranges() {
    return fixed_live_ranges_;
  }
+  ZoneVector<TopLevelLiveRange*>& fixed_float_live_ranges() {
+    return fixed_float_live_ranges_;
+  }
+  const ZoneVector<TopLevelLiveRange*>& fixed_float_live_ranges() const {
+    return fixed_float_live_ranges_;
+  }
  ZoneVector<TopLevelLiveRange*>& fixed_double_live_ranges() {
    return fixed_double_live_ranges_;
  }
  const ZoneVector<TopLevelLiveRange*>& fixed_double_live_ranges() const {
    return fixed_double_live_ranges_;
  }
+  ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() {
+    return fixed_simd128_live_ranges_;
+  }
+  const ZoneVector<TopLevelLiveRange*>& fixed_simd128_live_ranges() const {
+    return fixed_simd128_live_ranges_;
+  }
  ZoneVector<BitVector*>& live_in_sets() { return live_in_sets_; }
  ZoneVector<BitVector*>& live_out_sets() { return live_out_sets_; }
  ZoneVector<SpillRange*>& spill_ranges() { return spill_ranges_; }
@@ -840,7 +857,9 @@ class RegisterAllocationData final : public ZoneObject {
  ZoneVector<BitVector*> live_out_sets_;
  ZoneVector<TopLevelLiveRange*> live_ranges_;
  ZoneVector<TopLevelLiveRange*> fixed_live_ranges_;
+  ZoneVector<TopLevelLiveRange*> fixed_float_live_ranges_;
  ZoneVector<TopLevelLiveRange*> fixed_double_live_ranges_;
+  ZoneVector<TopLevelLiveRange*> fixed_simd128_live_ranges_;
  ZoneVector<SpillRange*> spill_ranges_;
  DelayedReferences delayed_references_;
  BitVector* assigned_registers_;
@@ -1058,6 +1077,8 @@ class LinearScanAllocator final : public RegisterAllocator {
                          const Vector<LifetimePosition>& free_until_pos);
  bool TryAllocatePreferredReg(LiveRange* range,
                               const Vector<LifetimePosition>& free_until_pos);
+  void GetFPRegisterSet(MachineRepresentation rep, int* num_regs,
+                        int* num_codes, const int** codes) const;
  void FindFreeRegistersForRange(LiveRange* range,
                                 Vector<LifetimePosition> free_until_pos);
  void ProcessCurrentRange(LiveRange* current);

--- a/src/compiler/wasm-linkage.cc
+++ b/src/compiler/wasm-linkage.cc
@@ -178,6 +178,17 @@ struct Allocator {
      // Allocate a floating point register/stack location.
      if (fp_offset < fp_count) {
        DoubleRegister reg = fp_regs[fp_offset++];
+#if V8_TARGET_ARCH_ARM
+        // Allocate floats using a double register, but modify the code to
+        // reflect how ARM FP registers alias.
+        // TODO(bbudge) Modify wasm linkage to allow use of all float regs.
+        if (type == kAstF32) {
+          int float_reg_code = reg.code() * 2;
+          DCHECK(float_reg_code < RegisterConfiguration::kMaxFPRegisters);
+          return regloc(DoubleRegister::from_code(float_reg_code),
+                        MachineTypeFor(type));
+        }
+#endif
        return regloc(reg, MachineTypeFor(type));
      } else {
        int offset = -1 - stack_offset;

--- a/src/machine-type.h
+++ b/src/machine-type.h
@@ -239,7 +239,7 @@ inline bool IsAnyTagged(MachineRepresentation rep) {
 }

 // Gets the log2 of the element size in bytes of the machine type.
-inline int ElementSizeLog2Of(MachineRepresentation rep) {
+V8_EXPORT_PRIVATE inline int ElementSizeLog2Of(MachineRepresentation rep) {
  switch (rep) {
    case MachineRepresentation::kBit:
    case MachineRepresentation::kWord8:

--- a/src/register-configuration.cc
+++ b/src/register-configuration.cc
@@ -70,15 +70,12 @@ class ArchDefaultRegisterConfiguration : public RegisterConfiguration {
 #if V8_TARGET_ARCH_IA32
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_X87
            kMaxAllocatableGeneralRegisterCount,
            compiler == TURBOFAN ? 1 : kMaxAllocatableDoubleRegisterCount,
-            compiler == TURBOFAN ? 1 : kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_X64
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_ARM
            FLAG_enable_embedded_constant_pool
                ? (kMaxAllocatableGeneralRegisterCount - 1)
@@ -86,27 +83,21 @@ class ArchDefaultRegisterConfiguration : public RegisterConfiguration {
            CpuFeatures::IsSupported(VFP32DREGS)
                ? kMaxAllocatableDoubleRegisterCount
                : (ALLOCATABLE_NO_VFP32_DOUBLE_REGISTERS(REGISTER_COUNT) 0),
-            ALLOCATABLE_NO_VFP32_DOUBLE_REGISTERS(REGISTER_COUNT) 0,
 #elif V8_TARGET_ARCH_ARM64
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_MIPS
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_MIPS64
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_PPC
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #elif V8_TARGET_ARCH_S390
            kMaxAllocatableGeneralRegisterCount,
            kMaxAllocatableDoubleRegisterCount,
-            kMaxAllocatableDoubleRegisterCount,
 #else
 #error Unsupported target architecture.
 #endif
@@ -145,7 +136,6 @@ const RegisterConfiguration* RegisterConfiguration::Turbofan() {
 RegisterConfiguration::RegisterConfiguration(
    int num_general_registers, int num_double_registers,
    int num_allocatable_general_registers, int num_allocatable_double_registers,
-    int num_allocatable_aliased_double_registers,
    const int* allocatable_general_codes, const int* allocatable_double_codes,
    AliasingKind fp_aliasing_kind, const char* const* general_register_names,
    const char* const* float_register_names,
@@ -158,8 +148,6 @@ RegisterConfiguration::RegisterConfiguration(
      num_allocatable_general_registers_(num_allocatable_general_registers),
      num_allocatable_float_registers_(0),
      num_allocatable_double_registers_(num_allocatable_double_registers),
-      num_allocatable_aliased_double_registers_(
-          num_allocatable_aliased_double_registers),
      num_allocatable_simd128_registers_(0),
      allocatable_general_codes_mask_(0),
      allocatable_float_codes_mask_(0),

--- a/src/register-configuration.h
+++ b/src/register-configuration.h
@@ -36,7 +36,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
  RegisterConfiguration(int num_general_registers, int num_double_registers,
                        int num_allocatable_general_registers,
                        int num_allocatable_double_registers,
-                        int num_allocatable_aliased_double_registers,
                        const int* allocatable_general_codes,
                        const int* allocatable_double_codes,
                        AliasingKind fp_aliasing_kind,
@@ -58,12 +57,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
  int num_allocatable_double_registers() const {
    return num_allocatable_double_registers_;
  }
-  // TODO(bbudge): This is a temporary work-around required because our
-  // register allocator does not yet support the aliasing of single/double
-  // registers on ARM.
-  int num_allocatable_aliased_double_registers() const {
-    return num_allocatable_aliased_double_registers_;
-  }
  int num_allocatable_simd128_registers() const {
    return num_allocatable_simd128_registers_;
  }
@@ -143,7 +136,6 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
  int num_allocatable_general_registers_;
  int num_allocatable_float_registers_;
  int num_allocatable_double_registers_;
-  int num_allocatable_aliased_double_registers_;
  int num_allocatable_simd128_registers_;
  int32_t allocatable_general_codes_mask_;
  int32_t allocatable_float_codes_mask_;

--- a/src/zone/zone-allocator.h
+++ b/src/zone/zone-allocator.h
@@ -26,6 +26,8 @@ class zone_allocator {
    typedef zone_allocator<O> other;
  };

+  // TODO(bbudge) Remove when V8 updates to MSVS 2015. See crbug.com/603131.
+  zone_allocator() : zone_(nullptr) { UNREACHABLE(); }
  explicit zone_allocator(Zone* zone) throw() : zone_(zone) {}
  explicit zone_allocator(const zone_allocator& other) throw()
      : zone_(other.zone_) {}
@@ -62,7 +64,6 @@ class zone_allocator {
  Zone* zone() { return zone_; }

 private:
-  zone_allocator();
  Zone* zone_;
 };


--- a/test/cctest/compiler/test-gap-resolver.cc
+++ b/test/cctest/compiler/test-gap-resolver.cc
@@ -13,15 +13,32 @@ namespace compiler {

 const auto GetRegConfig = RegisterConfiguration::Turbofan;

-// Fragments the given operand into an equivalent set of operands to simplify
-// ParallelMove equivalence testing.
+// Fragments the given FP operand into an equivalent set of FP operands to
+// simplify ParallelMove equivalence testing.
 void GetCanonicalOperands(const InstructionOperand& op,
                          std::vector<InstructionOperand>* fragments) {
  CHECK(!kSimpleFPAliasing);
  CHECK(op.IsFPLocationOperand());
-  // TODO(bbudge) Split into float operands on platforms with non-simple FP
-  // register aliasing.
-  fragments->push_back(op);
+  const LocationOperand& loc = LocationOperand::cast(op);
+  MachineRepresentation rep = loc.representation();
+  int base = -1;
+  int aliases = GetRegConfig()->GetAliases(
+      rep, 0, MachineRepresentation::kFloat32, &base);
+  CHECK_LT(0, aliases);
+  CHECK_GE(4, aliases);
+  int index = -1;
+  int step = 1;
+  if (op.IsFPRegister()) {
+    index = loc.register_code() * aliases;
+  } else {
+    index = loc.index();
+    step = -1;
+  }
+  for (int i = 0; i < aliases; i++) {
+    fragments->push_back(AllocatedOperand(loc.location_kind(),
+                                          MachineRepresentation::kFloat32,
+                                          index + i * step));
+  }
 }

 // The state of our move interpreter is the mapping of operands to values. Note
@@ -36,7 +53,9 @@ class InterpreterState {
      const InstructionOperand& dst = m->destination();
      if (!kSimpleFPAliasing && src.IsFPLocationOperand() &&
          dst.IsFPLocationOperand()) {
-        // Canonicalize FP location-location moves.
+        // Canonicalize FP location-location moves by fragmenting them into
+        // an equivalent sequence of float32 moves, to simplify state
+        // equivalence testing.
        std::vector<InstructionOperand> src_fragments;
        GetCanonicalOperands(src, &src_fragments);
        CHECK(!src_fragments.empty());
@@ -115,9 +134,11 @@ class InterpreterState {
    int index;
    if (!is_constant) {
      const LocationOperand& loc_op = LocationOperand::cast(op);
-      // Canonicalize FP location operand representations to kFloat64.
+      // Preserve FP representation when FP register aliasing is complex.
+      // Otherwise, canonicalize to kFloat64.
      if (IsFloatingPoint(loc_op.representation())) {
-        rep = MachineRepresentation::kFloat64;
+        rep = kSimpleFPAliasing ? MachineRepresentation::kFloat64
+                                : loc_op.representation();
      }
      if (loc_op.IsAnyRegister()) {
        index = loc_op.register_code();
@@ -321,9 +342,11 @@ class ParallelMoveCreator : public HandleAndZoneScope {
    auto GetValidRegisterCode = [&conf](MachineRepresentation rep, int index) {
      switch (rep) {
        case MachineRepresentation::kFloat32:
+          return conf->RegisterConfiguration::GetAllocatableFloatCode(index);
        case MachineRepresentation::kFloat64:
-        case MachineRepresentation::kSimd128:
          return conf->RegisterConfiguration::GetAllocatableDoubleCode(index);
+        case MachineRepresentation::kSimd128:
+          return conf->RegisterConfiguration::GetAllocatableSimd128Code(index);
        default:
          return conf->RegisterConfiguration::GetAllocatableGeneralCode(index);
      }
@@ -368,6 +391,118 @@ void RunTest(ParallelMove* pm, Zone* zone) {
  CHECK_EQ(mi1.state(), mi2.state());
 }

+TEST(Aliasing) {
+  // On platforms with simple aliasing, these parallel moves are ill-formed.
+  if (kSimpleFPAliasing) return;
+
+  ParallelMoveCreator pmc;
+  Zone* zone = pmc.main_zone();
+
+  auto s0 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 0);
+  auto s1 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 1);
+  auto s2 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 2);
+  auto s3 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 3);
+  auto s4 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat32, 4);
+
+  auto d0 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat64, 0);
+  auto d1 = AllocatedOperand(LocationOperand::REGISTER,
+                             MachineRepresentation::kFloat64, 1);
+  auto d16 = AllocatedOperand(LocationOperand::REGISTER,
+                              MachineRepresentation::kFloat64, 16);
+
+  // Double slots must be odd to match frame allocation.
+  auto dSlot = AllocatedOperand(LocationOperand::STACK_SLOT,
+                                MachineRepresentation::kFloat64, 3);
+
+  // Cycles involving s- and d-registers.
+  {
+    std::vector<InstructionOperand> moves = {
+        s2, s0,  // s2 <- s0
+        d0, d1   // d0 <- d1
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        d0, d1,  // d0 <- d1
+        s2, s0   // s2 <- s0
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        s2, s1,  // s2 <- s1
+        d0, d1   // d0 <- d1
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        d0, d1,  // d0 <- d1
+        s2, s1   // s2 <- s1
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  // Two cycles involving a single d-register.
+  {
+    std::vector<InstructionOperand> moves = {
+        d0, d1,  // d0 <- d1
+        s2, s1,  // s2 <- s1
+        s3, s0   // s3 <- s0
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  // Cycle with a float move that must be deferred until after swaps.
+  {
+    std::vector<InstructionOperand> moves = {
+        d0, d1,  // d0 <- d1
+        s2, s0,  // s2 <- s0
+        s3, s4   // s3 <- s4  must be deferred
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  // Cycles involving s-registers and a non-aliased d-register.
+  {
+    std::vector<InstructionOperand> moves = {
+        d16, d0,  // d16 <- d0
+        s1,  s2,  // s1 <- s2
+        d1,  d16  // d1 <- d16
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        s2,  s1,   // s1 <- s2
+        d0,  d16,  // d16 <- d0
+        d16, d1    // d1 <- d16
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  {
+    std::vector<InstructionOperand> moves = {
+        d0,  d16,  // d0 <- d16
+        d16, d1,   // s2 <- s0
+        s3,  s0    // d0 <- d1
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+  // Cycle involving aliasing registers and a slot.
+  {
+    std::vector<InstructionOperand> moves = {
+        dSlot, d0,     // dSlot <- d0
+        d1,    dSlot,  // d1 <- dSlot
+        s0,    s3      // s0 <- s3
+    };
+    RunTest(pmc.Create(moves), zone);
+  }
+}
+
 TEST(FuzzResolver) {
  ParallelMoveCreator pmc;
  for (int size = 0; size < 80; ++size) {

--- a/test/cctest/compiler/test-run-native-calls.cc
+++ b/test/cctest/compiler/test-run-native-calls.cc
@@ -87,8 +87,16 @@ class RegisterPairs : public Pairs {
 class Float32RegisterPairs : public Pairs {
 public:
  Float32RegisterPairs()
-      : Pairs(100, GetRegConfig()->num_allocatable_aliased_double_registers(),
-              GetRegConfig()->allocatable_double_codes()) {}
+      : Pairs(
+            100,
+#if V8_TARGET_ARCH_ARM
+            // TODO(bbudge) Modify wasm linkage to allow use of all float regs.
+            GetRegConfig()->num_allocatable_double_registers() / 2 - 2,
+#else
+            GetRegConfig()->num_allocatable_double_registers(),
+#endif
+            GetRegConfig()->allocatable_double_codes()) {
+  }
 };


@@ -127,6 +135,10 @@ struct Allocator {
      // Allocate a floating point register/stack location.
      if (fp_offset < fp_count) {
        int code = fp_regs[fp_offset++];
+#if V8_TARGET_ARCH_ARM
+        // TODO(bbudge) Modify wasm linkage to allow use of all float regs.
+        if (type.representation() == MachineRepresentation::kFloat32) code *= 2;
+#endif
        return LinkageLocation::ForRegister(code, type);
      } else {
        int offset = -1 - stack_offset;

--- a/test/unittests/BUILD.gn
+++ b/test/unittests/BUILD.gn
@@ -50,6 +50,7 @@ v8_executable("unittests") {
    "compiler/instruction-selector-unittest.h",
    "compiler/instruction-sequence-unittest.cc",
    "compiler/instruction-sequence-unittest.h",
+    "compiler/instruction-unittest.cc",
    "compiler/int64-lowering-unittest.cc",
    "compiler/js-builtin-reducer-unittest.cc",
    "compiler/js-create-lowering-unittest.cc",

--- a/test/unittests/compiler/instruction-sequence-unittest.cc
+++ b/test/unittests/compiler/instruction-sequence-unittest.cc
@@ -22,11 +22,8 @@ static char register_names_[10 * (RegisterConfiguration::kMaxGeneralRegisters +
 namespace {
 static int allocatable_codes[InstructionSequenceTest::kDefaultNRegs] = {
    0, 1, 2, 3, 4, 5, 6, 7};
-static int allocatable_double_codes[InstructionSequenceTest::kDefaultNRegs] = {
-    0, 1, 2, 3, 4, 5, 6, 7};
 }

-
 static void InitializeRegisterNames() {
  char* loc = register_names_;
  for (int i = 0; i < RegisterConfiguration::kMaxGeneralRegisters; ++i) {
@@ -92,8 +89,7 @@ RegisterConfiguration* InstructionSequenceTest::config() {
  if (!config_) {
    config_.reset(new RegisterConfiguration(
        num_general_registers_, num_double_registers_, num_general_registers_,
-        num_double_registers_, num_double_registers_, allocatable_codes,
-        allocatable_double_codes,
+        num_double_registers_, allocatable_codes, allocatable_codes,
        kSimpleFPAliasing ? RegisterConfiguration::OVERLAP
                          : RegisterConfiguration::COMBINE,
        general_register_names_,

--- a/test/unittests/compiler/instruction-sequence-unittest.h
+++ b/test/unittests/compiler/instruction-sequence-unittest.h
@@ -20,7 +20,9 @@ class InstructionSequenceTest : public TestWithIsolateAndZone {
  static const int kDefaultNRegs = 8;
  static const int kNoValue = kMinInt;
  static const MachineRepresentation kNoRep = MachineRepresentation::kNone;
+  static const MachineRepresentation kFloat32 = MachineRepresentation::kFloat32;
  static const MachineRepresentation kFloat64 = MachineRepresentation::kFloat64;
+  static const MachineRepresentation kSimd128 = MachineRepresentation::kSimd128;

  typedef RpoNumber Rpo;


--- a/test/unittests/compiler/instruction-unittest.cc
+++ b/test/unittests/compiler/instruction-unittest.cc
+// Copyright 2016 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/compiler/instruction.h"
+#include "src/register-configuration.h"
+#include "test/unittests/test-utils.h"
+#include "testing/gtest-support.h"
+
+namespace v8 {
+namespace internal {
+namespace compiler {
+
+namespace {
+
+const MachineRepresentation kWord = MachineRepresentation::kWord32;
+const MachineRepresentation kFloat = MachineRepresentation::kFloat32;
+const MachineRepresentation kDouble = MachineRepresentation::kFloat64;
+
+bool Interfere(LocationOperand::LocationKind kind, MachineRepresentation rep1,
+               int index1, MachineRepresentation rep2, int index2) {
+  return AllocatedOperand(kind, rep1, index1)
+      .InterferesWith(AllocatedOperand(kind, rep2, index2));
+}
+
+bool Contains(const ZoneVector<MoveOperands*>* moves,
+              const InstructionOperand& to, const InstructionOperand& from) {
+  for (auto move : *moves) {
+    if (move->destination().Equals(to) && move->source().Equals(from)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+}  // namespace
+
+class InstructionTest : public TestWithZone {
+ public:
+  InstructionTest() {}
+  virtual ~InstructionTest() {}
+
+  ParallelMove* CreateParallelMove(
+      const std::vector<InstructionOperand>& operand_pairs) {
+    ParallelMove* parallel_move = new (zone()) ParallelMove(zone());
+    for (size_t i = 0; i < operand_pairs.size(); i += 2)
+      parallel_move->AddMove(operand_pairs[i + 1], operand_pairs[i]);
+    return parallel_move;
+  }
+};
+
+TEST_F(InstructionTest, OperandInterference) {
+  // All general registers and slots interfere only with themselves.
+  for (int i = 0; i < RegisterConfiguration::kMaxGeneralRegisters; ++i) {
+    EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, i));
+    EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, i));
+    for (int j = i + 1; j < RegisterConfiguration::kMaxGeneralRegisters; ++j) {
+      EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, j));
+      EXPECT_FALSE(Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, j));
+    }
+  }
+
+  // All FP registers interfere with themselves.
+  for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
+    EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kFloat, i, kFloat, i));
+    EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kFloat, i, kFloat, i));
+    EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kDouble, i, kDouble, i));
+    EXPECT_TRUE(Interfere(LocationOperand::STACK_SLOT, kDouble, i, kDouble, i));
+  }
+
+  if (kSimpleFPAliasing) {
+    // Simple FP aliasing: interfering registers of different reps have the same
+    // index.
+    for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
+      EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kFloat, i, kDouble, i));
+      EXPECT_TRUE(Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i));
+      for (int j = i + 1; j < RegisterConfiguration::kMaxFPRegisters; ++j) {
+        EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kWord, i, kWord, j));
+        EXPECT_FALSE(
+            Interfere(LocationOperand::STACK_SLOT, kWord, i, kWord, j));
+      }
+    }
+  } else {
+    // Complex FP aliasing: sub-registers intefere with containing registers.
+    // Test sub-register indices which may not exist on the platform. This is
+    // necessary since the GapResolver may split large moves into smaller ones.
+    for (int i = 0; i < RegisterConfiguration::kMaxFPRegisters; ++i) {
+      EXPECT_TRUE(
+          Interfere(LocationOperand::REGISTER, kFloat, i * 2, kDouble, i));
+      EXPECT_TRUE(
+          Interfere(LocationOperand::REGISTER, kFloat, i * 2 + 1, kDouble, i));
+      EXPECT_TRUE(
+          Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i * 2));
+      EXPECT_TRUE(
+          Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, i * 2 + 1));
+
+      for (int j = i + 1; j < RegisterConfiguration::kMaxFPRegisters; ++j) {
+        EXPECT_FALSE(
+            Interfere(LocationOperand::REGISTER, kFloat, i * 2, kDouble, j));
+        EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kFloat, i * 2 + 1,
+                               kDouble, j));
+        EXPECT_FALSE(
+            Interfere(LocationOperand::REGISTER, kDouble, i, kFloat, j * 2));
+        EXPECT_FALSE(Interfere(LocationOperand::REGISTER, kDouble, i, kFloat,
+                               j * 2 + 1));
+      }
+    }
+  }
+}
+
+TEST_F(InstructionTest, PrepareInsertAfter) {
+  InstructionOperand r0 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kWord32, 0);
+  InstructionOperand r1 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kWord32, 1);
+  InstructionOperand r2 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kWord32, 2);
+
+  InstructionOperand d0 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kFloat64, 0);
+  InstructionOperand d1 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kFloat64, 1);
+  InstructionOperand d2 = AllocatedOperand(LocationOperand::REGISTER,
+                                           MachineRepresentation::kFloat64, 2);
+
+  {
+    // Moves inserted after should pick up assignments to their sources.
+    // Moves inserted after should cause interfering moves to be eliminated.
+    ZoneVector<MoveOperands*> to_eliminate(zone());
+    std::vector<InstructionOperand> moves = {
+        r1, r0,  // r1 <- r0
+        r2, r0,  // r2 <- r0
+        d1, d0,  // d1 <- d0
+        d2, d0   // d2 <- d0
+    };
+
+    ParallelMove* pm = CreateParallelMove(moves);
+    MoveOperands m1(r1, r2);  // r2 <- r1
+    pm->PrepareInsertAfter(&m1, &to_eliminate);
+    CHECK(m1.source().Equals(r0));
+    CHECK(Contains(&to_eliminate, r2, r0));
+    MoveOperands m2(d1, d2);  // d2 <- d1
+    pm->PrepareInsertAfter(&m2, &to_eliminate);
+    CHECK(m2.source().Equals(d0));
+    CHECK(Contains(&to_eliminate, d2, d0));
+  }
+
+  if (!kSimpleFPAliasing) {
+    // Moves inserted after should cause all interfering moves to be eliminated.
+    auto s0 = AllocatedOperand(LocationOperand::REGISTER,
+                               MachineRepresentation::kFloat32, 0);
+    auto s1 = AllocatedOperand(LocationOperand::REGISTER,
+                               MachineRepresentation::kFloat32, 1);
+    auto s2 = AllocatedOperand(LocationOperand::REGISTER,
+                               MachineRepresentation::kFloat32, 2);
+
+    {
+      ZoneVector<MoveOperands*> to_eliminate(zone());
+      std::vector<InstructionOperand> moves = {
+          s0, s2,  // s0 <- s2
+          s1, s2   // s1 <- s2
+      };
+
+      ParallelMove* pm = CreateParallelMove(moves);
+      MoveOperands m1(d1, d0);  // d0 <- d1
+      pm->PrepareInsertAfter(&m1, &to_eliminate);
+      CHECK(Contains(&to_eliminate, s0, s2));
+      CHECK(Contains(&to_eliminate, s1, s2));
+    }
+  }
+}
+
+}  // namespace compiler
+}  // namespace internal
+}  // namespace v8
--- a/test/unittests/compiler/move-optimizer-unittest.cc
+++ b/test/unittests/compiler/move-optimizer-unittest.cc
@@ -12,6 +12,14 @@ namespace compiler {

 class MoveOptimizerTest : public InstructionSequenceTest {
 public:
+  // FP register indices which don't interfere under simple or complex aliasing.
+  static const int kF64_1 = 0;
+  static const int kF64_2 = 1;
+  static const int kF32_1 = 4;
+  static const int kF32_2 = 5;
+  static const int kS128_1 = 2;
+  static const int kS128_2 = 3;
+
  Instruction* LastInstruction() { return sequence()->instructions().back(); }

  void AddMove(Instruction* instr, TestOperand from, TestOperand to,
@@ -99,8 +107,12 @@ TEST_F(MoveOptimizerTest, RemovesRedundant) {
  AddMove(first_instr, Reg(0), Reg(1));
  AddMove(last_instr, Reg(1), Reg(0));

-  AddMove(first_instr, FPReg(0), FPReg(1));
-  AddMove(last_instr, FPReg(1), FPReg(0));
+  AddMove(first_instr, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
+  AddMove(last_instr, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128));
+  AddMove(first_instr, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(last_instr, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
+  AddMove(first_instr, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
+  AddMove(last_instr, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));

  EndBlock(Last());

@@ -108,22 +120,38 @@ TEST_F(MoveOptimizerTest, RemovesRedundant) {

  CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0]));
  auto move = last_instr->parallel_moves()[0];
-  CHECK_EQ(2, NonRedundantSize(move));
+  CHECK_EQ(4, NonRedundantSize(move));
  CHECK(Contains(move, Reg(0), Reg(1)));
-  CHECK(Contains(move, FPReg(0), FPReg(1)));
+  CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
+  CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
+  CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
 }


 TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) {
-  int first_reg_index = GetAllocatableCode(0);
-  int second_reg_index = GetAllocatableCode(1);
+  int index1 = GetAllocatableCode(0);
+  int index2 = GetAllocatableCode(1);
+  int s128_1 = GetAllocatableCode(kS128_1, kSimd128);
+  int s128_2 = GetAllocatableCode(kS128_2, kSimd128);
+  int f64_1 = GetAllocatableCode(kF64_1, kFloat64);
+  int f64_2 = GetAllocatableCode(kF64_2, kFloat64);
+  int f32_1 = GetAllocatableCode(kF32_1, kFloat32);
+  int f32_2 = GetAllocatableCode(kF32_2, kFloat32);

  StartBlock();
  auto first_instr = EmitNop();
  auto last_instr = EmitNop();

-  AddMove(first_instr, Reg(first_reg_index), ExplicitReg(second_reg_index));
-  AddMove(last_instr, Reg(second_reg_index), Reg(first_reg_index));
+  AddMove(first_instr, Reg(index1), ExplicitReg(index2));
+  AddMove(last_instr, Reg(index2), Reg(index1));
+
+  AddMove(first_instr, FPReg(s128_1, kSimd128),
+          ExplicitFPReg(s128_2, kSimd128));
+  AddMove(last_instr, FPReg(s128_2, kSimd128), FPReg(s128_1, kSimd128));
+  AddMove(first_instr, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64));
+  AddMove(last_instr, FPReg(f64_2, kFloat64), FPReg(f64_1, kFloat64));
+  AddMove(first_instr, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32));
+  AddMove(last_instr, FPReg(f32_2, kFloat32), FPReg(f32_1, kFloat32));

  EndBlock(Last());

@@ -131,8 +159,12 @@ TEST_F(MoveOptimizerTest, RemovesRedundantExplicit) {

  CHECK_EQ(0, NonRedundantSize(first_instr->parallel_moves()[0]));
  auto move = last_instr->parallel_moves()[0];
-  CHECK_EQ(1, NonRedundantSize(move));
-  CHECK(Contains(move, Reg(first_reg_index), ExplicitReg(second_reg_index)));
+  CHECK_EQ(4, NonRedundantSize(move));
+  CHECK(Contains(move, Reg(index1), ExplicitReg(index2)));
+  CHECK(
+      Contains(move, FPReg(s128_1, kSimd128), ExplicitFPReg(s128_2, kSimd128)));
+  CHECK(Contains(move, FPReg(f64_1, kFloat64), ExplicitFPReg(f64_2, kFloat64)));
+  CHECK(Contains(move, FPReg(f32_1, kFloat32), ExplicitFPReg(f32_2, kFloat32)));
 }


@@ -167,10 +199,18 @@ TEST_F(MoveOptimizerTest, SimpleMerge) {
  StartBlock();
  EndBlock(Jump(2));
  AddMove(LastInstruction(), Reg(0), Reg(1));
+  AddMove(LastInstruction(), FPReg(kS128_1, kSimd128),
+          FPReg(kS128_2, kSimd128));
+  AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));

  StartBlock();
  EndBlock(Jump(1));
  AddMove(LastInstruction(), Reg(0), Reg(1));
+  AddMove(LastInstruction(), FPReg(kS128_1, kSimd128),
+          FPReg(kS128_2, kSimd128));
+  AddMove(LastInstruction(), FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(LastInstruction(), FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));

  StartBlock();
  EndBlock(Last());
@@ -180,8 +220,11 @@ TEST_F(MoveOptimizerTest, SimpleMerge) {
  Optimize();

  auto move = last->parallel_moves()[0];
-  CHECK_EQ(1, NonRedundantSize(move));
+  CHECK_EQ(4, NonRedundantSize(move));
  CHECK(Contains(move, Reg(0), Reg(1)));
+  CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
+  CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
+  CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
 }


@@ -195,16 +238,25 @@ TEST_F(MoveOptimizerTest, SimpleMergeCycle) {
  AddMove(gap_0, Reg(0), Reg(1));
  AddMove(LastInstruction(), Reg(1), Reg(0));

-  AddMove(gap_0, FPReg(0), FPReg(1));
-  AddMove(LastInstruction(), FPReg(1), FPReg(0));
+  AddMove(gap_0, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
+  AddMove(LastInstruction(), FPReg(kS128_2, kSimd128),
+          FPReg(kS128_1, kSimd128));
+  AddMove(gap_0, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(LastInstruction(), FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
+  AddMove(gap_0, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
+  AddMove(LastInstruction(), FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));

  StartBlock();
  EndBlock(Jump(1));
  auto gap_1 = LastInstruction();
  AddMove(gap_1, Reg(0), Reg(1));
  AddMove(gap_1, Reg(1), Reg(0));
-  AddMove(gap_1, FPReg(0), FPReg(1));
-  AddMove(gap_1, FPReg(1), FPReg(0));
+  AddMove(gap_1, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128));
+  AddMove(gap_1, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128));
+  AddMove(gap_1, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64));
+  AddMove(gap_1, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64));
+  AddMove(gap_1, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32));
+  AddMove(gap_1, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32));

  StartBlock();
  EndBlock(Last());
@@ -216,11 +268,15 @@ TEST_F(MoveOptimizerTest, SimpleMergeCycle) {
  CHECK(gap_0->AreMovesRedundant());
  CHECK(gap_1->AreMovesRedundant());
  auto move = last->parallel_moves()[0];
-  CHECK_EQ(4, NonRedundantSize(move));
+  CHECK_EQ(8, NonRedundantSize(move));
  CHECK(Contains(move, Reg(0), Reg(1)));
  CHECK(Contains(move, Reg(1), Reg(0)));
-  CHECK(Contains(move, FPReg(0), FPReg(1)));
-  CHECK(Contains(move, FPReg(1), FPReg(0)));
+  CHECK(Contains(move, FPReg(kS128_1, kSimd128), FPReg(kS128_2, kSimd128)));
+  CHECK(Contains(move, FPReg(kS128_2, kSimd128), FPReg(kS128_1, kSimd128)));
+  CHECK(Contains(move, FPReg(kF64_1, kFloat64), FPReg(kF64_2, kFloat64)));
+  CHECK(Contains(move, FPReg(kF64_2, kFloat64), FPReg(kF64_1, kFloat64)));
+  CHECK(Contains(move, FPReg(kF32_1, kFloat32), FPReg(kF32_2, kFloat32)));
+  CHECK(Contains(move, FPReg(kF32_2, kFloat32), FPReg(kF32_1, kFloat32)));
 }


@@ -342,8 +398,31 @@ TEST_F(MoveOptimizerTest, ClobberedDestinationsAreEliminated) {
  EmitNop();
  Instruction* first_instr = LastInstruction();
  AddMove(first_instr, Reg(0), Reg(1));
-  AddMove(first_instr, FPReg(0), FPReg(1));
-  EmitOOI(Reg(1), FPReg(1), 0, nullptr);
+  EmitOI(Reg(1), 0, nullptr);
+  Instruction* last_instr = LastInstruction();
+  EndBlock();
+  Optimize();
+
+  ParallelMove* first_move = first_instr->parallel_moves()[0];
+  CHECK_EQ(0, NonRedundantSize(first_move));
+
+  ParallelMove* last_move = last_instr->parallel_moves()[0];
+  CHECK_EQ(0, NonRedundantSize(last_move));
+}
+
+TEST_F(MoveOptimizerTest, ClobberedFPDestinationsAreEliminated) {
+  StartBlock();
+  EmitNop();
+  Instruction* first_instr = LastInstruction();
+  AddMove(first_instr, FPReg(4, kFloat64), FPReg(1, kFloat64));
+  if (!kSimpleFPAliasing) {
+    // We clobber q0 below. This is aliased by d0, d1, s0, s1, s2, and s3.
+    // Add moves to registers s2 and s3.
+    AddMove(first_instr, FPReg(10, kFloat32), FPReg(0, kFloat32));
+    AddMove(first_instr, FPReg(11, kFloat32), FPReg(1, kFloat32));
+  }
+  // Clobbers output register 0.
+  EmitOI(FPReg(0, kSimd128), 0, nullptr);
  Instruction* last_instr = LastInstruction();
  EndBlock();
  Optimize();

--- a/test/unittests/compiler/register-allocator-unittest.cc
+++ b/test/unittests/compiler/register-allocator-unittest.cc
@@ -101,13 +101,14 @@ TEST_F(RegisterAllocatorTest, CanAllocateThreeRegisters) {
  Allocate();
 }

-TEST_F(RegisterAllocatorTest, CanAllocateThreeFPRegisters) {
-  // return p0 + p1;
-  StartBlock();
-  VReg a_reg = FPParameter();
-  VReg b_reg = FPParameter();
-  VReg c_reg = EmitOI(FPReg(1), Reg(a_reg, 1), Reg(b_reg, 0));
-  Return(c_reg);
+TEST_F(RegisterAllocatorTest, CanAllocateFPRegisters) {
+  StartBlock();
+  TestOperand inputs[] = {
+      Reg(FPParameter(kFloat64)), Reg(FPParameter(kFloat64)),
+      Reg(FPParameter(kFloat32)), Reg(FPParameter(kFloat32)),
+      Reg(FPParameter(kSimd128)), Reg(FPParameter(kSimd128))};
+  VReg out1 = EmitOI(FPReg(1, kFloat64), arraysize(inputs), inputs);
+  Return(out1);
  EndBlock(Last());

  Allocate();

--- a/test/unittests/register-configuration-unittest.cc
+++ b/test/unittests/register-configuration-unittest.cc
@@ -16,8 +16,6 @@ class RegisterConfigurationUnitTest : public ::testing::Test {
 public:
  RegisterConfigurationUnitTest() {}
  virtual ~RegisterConfigurationUnitTest() {}
-
- private:
 };

 TEST_F(RegisterConfigurationUnitTest, BasicProperties) {
@@ -30,9 +28,8 @@ TEST_F(RegisterConfigurationUnitTest, BasicProperties) {

  RegisterConfiguration test(
      kNumGeneralRegs, kNumDoubleRegs, kNumAllocatableGeneralRegs,
-      kNumAllocatableDoubleRegs, kNumAllocatableDoubleRegs, general_codes,
-      double_codes, RegisterConfiguration::OVERLAP, nullptr, nullptr, nullptr,
-      nullptr);
+      kNumAllocatableDoubleRegs, general_codes, double_codes,
+      RegisterConfiguration::OVERLAP, nullptr, nullptr, nullptr, nullptr);

  EXPECT_EQ(test.num_general_registers(), kNumGeneralRegs);
  EXPECT_EQ(test.num_double_registers(), kNumDoubleRegs);
@@ -67,9 +64,8 @@ TEST_F(RegisterConfigurationUnitTest, CombineAliasing) {

  RegisterConfiguration test(
      kNumGeneralRegs, kNumDoubleRegs, kNumAllocatableGeneralRegs,
-      kNumAllocatableDoubleRegs, kNumAllocatableDoubleRegs, general_codes,
-      double_codes, RegisterConfiguration::COMBINE, nullptr, nullptr, nullptr,
-      nullptr);
+      kNumAllocatableDoubleRegs, general_codes, double_codes,
+      RegisterConfiguration::COMBINE, nullptr, nullptr, nullptr, nullptr);

  // There are 3 allocatable double regs, but only 2 can alias float regs.
  EXPECT_EQ(test.num_allocatable_float_registers(), 4);
@@ -157,9 +153,10 @@ TEST_F(RegisterConfigurationUnitTest, CombineAliasing) {
      test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters / 2 + 1,
                      kFloat32, &alias_base_index),
      0);
-  EXPECT_EQ(test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters,
-                            kFloat32, &alias_base_index),
-            0);
+  EXPECT_EQ(
+      test.GetAliases(kFloat64, RegisterConfiguration::kMaxFPRegisters - 1,
+                      kFloat32, &alias_base_index),
+      0);
 }

 }  // namespace internal

--- a/test/unittests/unittests.gyp
+++ b/test/unittests/unittests.gyp
@@ -42,6 +42,7 @@
      'compiler/graph-trimmer-unittest.cc',
      'compiler/graph-unittest.cc',
      'compiler/graph-unittest.h',
+      'compiler/instruction-unittest.cc',
      'compiler/instruction-selector-unittest.cc',
      'compiler/instruction-selector-unittest.h',
      'compiler/instruction-sequence-unittest.cc',