liftoff-assembler.cc 50.9 KB
Newer Older
1 2 3 4 5 6
// Copyright 2017 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/wasm/baseline/liftoff-assembler.h"

7 8
#include <sstream>

9
#include "src/base/optional.h"
10
#include "src/base/platform/wrappers.h"
11 12
#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler-inl.h"
13 14
#include "src/compiler/linkage.h"
#include "src/compiler/wasm-compiler.h"
15
#include "src/utils/ostreams.h"
16
#include "src/wasm/baseline/liftoff-register.h"
17
#include "src/wasm/function-body-decoder-impl.h"
18
#include "src/wasm/object-access.h"
19
#include "src/wasm/wasm-linkage.h"
20 21 22 23 24 25
#include "src/wasm/wasm-opcodes.h"

namespace v8 {
namespace internal {
namespace wasm {

26
using VarState = LiftoffAssembler::VarState;
27
using ValueKindSig = LiftoffAssembler::ValueKindSig;
28

29 30 31
constexpr ValueKind LiftoffAssembler::kPointerKind;
constexpr ValueKind LiftoffAssembler::kTaggedKind;
constexpr ValueKind LiftoffAssembler::kSmiKind;
32

33 34 35
namespace {

class StackTransferRecipe {
36
  struct RegisterMove {
37
    LiftoffRegister src;
38 39 40
    ValueKind kind;
    constexpr RegisterMove(LiftoffRegister src, ValueKind kind)
        : src(src), kind(kind) {}
41
  };
42

43
  struct RegisterLoad {
44
    enum LoadKind : uint8_t {
45
      kNop,           // no-op, used for high fp of a fp pair.
46 47 48 49
      kConstant,      // load a constant value into a register.
      kStack,         // fill a register from a stack slot.
      kLowHalfStack,  // fill a register from the low half of a stack slot.
      kHighHalfStack  // fill a register from the high half of a stack slot.
50
    };
51

52 53
    LoadKind load_kind;
    ValueKind kind;
54
    int32_t value;  // i32 constant value or stack offset, depending on kind.
55

56
    // Named constructors.
57
    static RegisterLoad Const(WasmValue constant) {
58 59
      if (constant.type().kind() == kI32) {
        return {kConstant, kI32, constant.to_i32()};
60
      }
61
      DCHECK_EQ(kI64, constant.type().kind());
62 63
      int32_t i32_const = static_cast<int32_t>(constant.to_i64());
      DCHECK_EQ(constant.to_i64(), i32_const);
64
      return {kConstant, kI64, i32_const};
65
    }
66 67
    static RegisterLoad Stack(int32_t offset, ValueKind kind) {
      return {kStack, kind, offset};
68
    }
69
    static RegisterLoad HalfStack(int32_t offset, RegPairHalf half) {
70
      return {half == kLowWord ? kLowHalfStack : kHighHalfStack, kI32, offset};
71
    }
72
    static RegisterLoad Nop() {
73 74
      // ValueKind does not matter.
      return {kNop, kI32, 0};
75
    }
76 77

   private:
78 79
    RegisterLoad(LoadKind load_kind, ValueKind kind, int32_t value)
        : load_kind(load_kind), kind(kind), value(value) {}
80 81
  };

82 83
 public:
  explicit StackTransferRecipe(LiftoffAssembler* wasm_asm) : asm_(wasm_asm) {}
84 85
  StackTransferRecipe(const StackTransferRecipe&) = delete;
  StackTransferRecipe& operator=(const StackTransferRecipe&) = delete;
86 87 88
  ~StackTransferRecipe() { Execute(); }

  void Execute() {
89 90
    // First, execute register moves. Then load constants and stack values into
    // registers.
91
    ExecuteMoves();
92
    DCHECK(move_dst_regs_.is_empty());
93
    ExecuteLoads();
94
    DCHECK(load_dst_regs_.is_empty());
95 96
  }

97
  V8_INLINE void TransferStackSlot(const VarState& dst, const VarState& src) {
98
    DCHECK(CheckCompatibleStackSlotTypes(dst.kind(), src.kind()));
99 100 101 102
    if (dst.is_reg()) {
      LoadIntoRegister(dst.reg(), src, src.offset());
      return;
    }
103 104 105 106
    if (dst.is_const()) {
      DCHECK_EQ(dst.i32_const(), src.i32_const());
      return;
    }
107 108
    DCHECK(dst.is_stack());
    switch (src.loc()) {
109
      case VarState::kStack:
110
        if (src.offset() != dst.offset()) {
111
          asm_->MoveStackValue(dst.offset(), src.offset(), src.kind());
112
        }
113
        break;
114
      case VarState::kRegister:
115
        asm_->Spill(dst.offset(), src.reg(), src.kind());
116
        break;
117
      case VarState::kIntConst:
118
        asm_->Spill(dst.offset(), src.constant());
119
        break;
120 121 122
    }
  }

123 124 125
  V8_INLINE void LoadIntoRegister(LiftoffRegister dst,
                                  const LiftoffAssembler::VarState& src,
                                  uint32_t src_offset) {
126 127
    switch (src.loc()) {
      case VarState::kStack:
128
        LoadStackSlot(dst, src_offset, src.kind());
129 130 131
        break;
      case VarState::kRegister:
        DCHECK_EQ(dst.reg_class(), src.reg_class());
132
        if (dst != src.reg()) MoveRegister(dst, src.reg(), src.kind());
133
        break;
134
      case VarState::kIntConst:
135
        LoadConstant(dst, src.constant());
136 137 138 139
        break;
    }
  }

140 141
  void LoadI64HalfIntoRegister(LiftoffRegister dst,
                               const LiftoffAssembler::VarState& src,
142
                               int offset, RegPairHalf half) {
143 144 145
    // Use CHECK such that the remaining code is statically dead if
    // {kNeedI64RegPair} is false.
    CHECK(kNeedI64RegPair);
146
    DCHECK_EQ(kI64, src.kind());
147 148
    switch (src.loc()) {
      case VarState::kStack:
149
        LoadI64HalfStackSlot(dst, offset, half);
150 151 152 153
        break;
      case VarState::kRegister: {
        LiftoffRegister src_half =
            half == kLowWord ? src.reg().low() : src.reg().high();
154
        if (dst != src_half) MoveRegister(dst, src_half, kI32);
155 156
        break;
      }
157
      case VarState::kIntConst:
158 159 160
        int32_t value = src.i32_const();
        // The high word is the sign extension of the low word.
        if (half == kHighWord) value = value >> 31;
161 162 163 164 165
        LoadConstant(dst, WasmValue(value));
        break;
    }
  }

166
  void MoveRegister(LiftoffRegister dst, LiftoffRegister src, ValueKind kind) {
167
    DCHECK_NE(dst, src);
168
    DCHECK_EQ(dst.reg_class(), src.reg_class());
169
    DCHECK_EQ(reg_class_for(kind), src.reg_class());
170
    if (src.is_gp_pair()) {
171 172 173
      DCHECK_EQ(kI64, kind);
      if (dst.low() != src.low()) MoveRegister(dst.low(), src.low(), kI32);
      if (dst.high() != src.high()) MoveRegister(dst.high(), src.high(), kI32);
174 175
      return;
    }
176
    if (src.is_fp_pair()) {
177
      DCHECK_EQ(kS128, kind);
178
      if (dst.low() != src.low()) {
179 180
        MoveRegister(dst.low(), src.low(), kF64);
        MoveRegister(dst.high(), src.high(), kF64);
181 182 183
      }
      return;
    }
184
    if (move_dst_regs_.has(dst)) {
185 186
      DCHECK_EQ(register_move(dst)->src, src);
      // Non-fp registers can only occur with the exact same type.
187
      DCHECK_IMPLIES(!dst.is_fp(), register_move(dst)->kind == kind);
188 189 190
      // It can happen that one fp register holds both the f32 zero and the f64
      // zero, as the initial value for local variables. Move the value as f64
      // in that case.
191
      if (kind == kF64) register_move(dst)->kind = kF64;
192 193
      return;
    }
194
    move_dst_regs_.set(dst);
195
    ++*src_reg_use_count(src);
196
    *register_move(dst) = {src, kind};
197 198
  }

199
  void LoadConstant(LiftoffRegister dst, WasmValue value) {
200 201
    DCHECK(!load_dst_regs_.has(dst));
    load_dst_regs_.set(dst);
202
    if (dst.is_gp_pair()) {
203
      DCHECK_EQ(kI64, value.type().kind());
204 205 206 207 208 209 210 211
      int64_t i64 = value.to_i64();
      *register_load(dst.low()) =
          RegisterLoad::Const(WasmValue(static_cast<int32_t>(i64)));
      *register_load(dst.high()) =
          RegisterLoad::Const(WasmValue(static_cast<int32_t>(i64 >> 32)));
    } else {
      *register_load(dst) = RegisterLoad::Const(value);
    }
212 213
  }

214
  void LoadStackSlot(LiftoffRegister dst, uint32_t stack_offset,
215
                     ValueKind kind) {
216 217 218 219 220 221
    if (load_dst_regs_.has(dst)) {
      // It can happen that we spilled the same register to different stack
      // slots, and then we reload them later into the same dst register.
      // In that case, it is enough to load one of the stack slots.
      return;
    }
222
    load_dst_regs_.set(dst);
223
    if (dst.is_gp_pair()) {
224
      DCHECK_EQ(kI64, kind);
225
      *register_load(dst.low()) =
226
          RegisterLoad::HalfStack(stack_offset, kLowWord);
227
      *register_load(dst.high()) =
228
          RegisterLoad::HalfStack(stack_offset, kHighWord);
229
    } else if (dst.is_fp_pair()) {
230
      DCHECK_EQ(kS128, kind);
231 232 233
      // Only need register_load for low_gp since we load 128 bits at one go.
      // Both low and high need to be set in load_dst_regs_ but when iterating
      // over it, both low and high will be cleared, so we won't load twice.
234
      *register_load(dst.low()) = RegisterLoad::Stack(stack_offset, kind);
235
      *register_load(dst.high()) = RegisterLoad::Nop();
236
    } else {
237
      *register_load(dst) = RegisterLoad::Stack(stack_offset, kind);
238
    }
239 240
  }

241
  void LoadI64HalfStackSlot(LiftoffRegister dst, int offset, RegPairHalf half) {
242 243 244 245 246 247
    if (load_dst_regs_.has(dst)) {
      // It can happen that we spilled the same register to different stack
      // slots, and then we reload them later into the same dst register.
      // In that case, it is enough to load one of the stack slots.
      return;
    }
248
    load_dst_regs_.set(dst);
249
    *register_load(dst) = RegisterLoad::HalfStack(offset, half);
250
  }
251 252

 private:
253 254 255 256 257 258 259 260 261 262 263 264
  using MovesStorage =
      std::aligned_storage<kAfterMaxLiftoffRegCode * sizeof(RegisterMove),
                           alignof(RegisterMove)>::type;
  using LoadsStorage =
      std::aligned_storage<kAfterMaxLiftoffRegCode * sizeof(RegisterLoad),
                           alignof(RegisterLoad)>::type;

  ASSERT_TRIVIALLY_COPYABLE(RegisterMove);
  ASSERT_TRIVIALLY_COPYABLE(RegisterLoad);

  MovesStorage register_moves_;  // uninitialized
  LoadsStorage register_loads_;  // uninitialized
265
  int src_reg_use_count_[kAfterMaxLiftoffRegCode] = {0};
266
  LiftoffRegList move_dst_regs_;
267
  LiftoffRegList load_dst_regs_;
268
  LiftoffAssembler* const asm_;
269

270 271 272 273 274 275 276 277
  RegisterMove* register_move(LiftoffRegister reg) {
    return reinterpret_cast<RegisterMove*>(&register_moves_) +
           reg.liftoff_code();
  }
  RegisterLoad* register_load(LiftoffRegister reg) {
    return reinterpret_cast<RegisterLoad*>(&register_loads_) +
           reg.liftoff_code();
  }
278 279 280
  int* src_reg_use_count(LiftoffRegister reg) {
    return src_reg_use_count_ + reg.liftoff_code();
  }
281

282
  void ExecuteMove(LiftoffRegister dst) {
283
    RegisterMove* move = register_move(dst);
284
    DCHECK_EQ(0, *src_reg_use_count(dst));
285
    asm_->Move(dst, move->src, move->kind);
286
    ClearExecutedMove(dst);
287
  }
288

289
  void ClearExecutedMove(LiftoffRegister dst) {
290 291 292
    DCHECK(move_dst_regs_.has(dst));
    move_dst_regs_.clear(dst);
    RegisterMove* move = register_move(dst);
293 294
    DCHECK_LT(0, *src_reg_use_count(move->src));
    if (--*src_reg_use_count(move->src)) return;
295 296 297
    // src count dropped to zero. If this is a destination register, execute
    // that move now.
    if (!move_dst_regs_.has(move->src)) return;
298
    ExecuteMove(move->src);
299 300 301
  }

  void ExecuteMoves() {
302 303 304
    // Execute all moves whose {dst} is not being used as src in another move.
    // If any src count drops to zero, also (transitively) execute the
    // corresponding move to that register.
305
    for (LiftoffRegister dst : move_dst_regs_) {
306 307
      // Check if already handled via transitivity in {ClearExecutedMove}.
      if (!move_dst_regs_.has(dst)) continue;
308 309
      if (*src_reg_use_count(dst)) continue;
      ExecuteMove(dst);
310
    }
311 312 313

    // All remaining moves are parts of a cycle. Just spill the first one, then
    // process all remaining moves in that cycle. Repeat for all cycles.
314
    int last_spill_offset = asm_->TopSpillOffset();
315
    while (!move_dst_regs_.is_empty()) {
316
      // TODO(clemensb): Use an unused register if available.
317 318
      LiftoffRegister dst = move_dst_regs_.GetFirstRegSet();
      RegisterMove* move = register_move(dst);
319
      last_spill_offset += LiftoffAssembler::SlotSizeForType(move->kind);
320
      LiftoffRegister spill_reg = move->src;
321
      asm_->Spill(last_spill_offset, spill_reg, move->kind);
322
      // Remember to reload into the destination register later.
323
      LoadStackSlot(dst, last_spill_offset, move->kind);
324
      ClearExecutedMove(dst);
325 326 327 328
    }
  }

  void ExecuteLoads() {
329 330
    for (LiftoffRegister dst : load_dst_regs_) {
      RegisterLoad* load = register_load(dst);
331
      switch (load->load_kind) {
332 333
        case RegisterLoad::kNop:
          break;
334
        case RegisterLoad::kConstant:
335
          asm_->LoadConstant(dst, load->kind == kI64
336 337
                                      ? WasmValue(int64_t{load->value})
                                      : WasmValue(int32_t{load->value}));
338 339
          break;
        case RegisterLoad::kStack:
340
          if (kNeedS128RegPair && load->kind == kS128) {
341
            asm_->Fill(LiftoffRegister::ForFpPair(dst.fp()), load->value,
342
                       load->kind);
343
          } else {
344
            asm_->Fill(dst, load->value, load->kind);
345
          }
346 347
          break;
        case RegisterLoad::kLowHalfStack:
348 349 350
          // Half of a register pair, {dst} must be a gp register.
          asm_->FillI64Half(dst.gp(), load->value, kLowWord);
          break;
351
        case RegisterLoad::kHighHalfStack:
352 353
          // Half of a register pair, {dst} must be a gp register.
          asm_->FillI64Half(dst.gp(), load->value, kHighWord);
354 355 356
          break;
      }
    }
357
    load_dst_regs_ = {};
358
  }
359 360
};

361 362 363 364 365 366 367 368 369 370 371 372 373
class RegisterReuseMap {
 public:
  void Add(LiftoffRegister src, LiftoffRegister dst) {
    if (auto previous = Lookup(src)) {
      DCHECK_EQ(previous, dst);
      return;
    }
    map_.emplace_back(src);
    map_.emplace_back(dst);
  }

  base::Optional<LiftoffRegister> Lookup(LiftoffRegister src) {
    for (auto it = map_.begin(), end = map_.end(); it != end; it += 2) {
374 375 376
      if (it->is_gp_pair() == src.is_gp_pair() &&
          it->is_fp_pair() == src.is_fp_pair() && *it == src)
        return *(it + 1);
377 378 379 380 381 382 383 384 385
    }
    return {};
  }

 private:
  // {map_} holds pairs of <src, dst>.
  base::SmallVector<LiftoffRegister, 8> map_;
};

386 387 388 389 390 391 392 393
enum MergeKeepStackSlots : bool {
  kKeepStackSlots = true,
  kTurnStackSlotsIntoRegisters = false
};
enum MergeAllowConstants : bool {
  kConstantsAllowed = true,
  kConstantsNotAllowed = false
};
394 395 396 397
enum ReuseRegisters : bool {
  kReuseRegisters = true,
  kNoReuseRegisters = false
};
398 399 400 401
void InitMergeRegion(LiftoffAssembler::CacheState* state,
                     const VarState* source, VarState* target, uint32_t count,
                     MergeKeepStackSlots keep_stack_slots,
                     MergeAllowConstants allow_constants,
402 403
                     ReuseRegisters reuse_registers, LiftoffRegList used_regs) {
  RegisterReuseMap register_reuse_map;
404 405 406 407 408 409 410
  for (const VarState* source_end = source + count; source < source_end;
       ++source, ++target) {
    if ((source->is_stack() && keep_stack_slots) ||
        (source->is_const() && allow_constants)) {
      *target = *source;
      continue;
    }
411 412
    base::Optional<LiftoffRegister> reg;
    // First try: Keep the same register, if it's free.
413
    if (source->is_reg() && state->is_free(source->reg())) {
414
      reg = source->reg();
415
    }
416 417 418 419 420
    // Second try: Use the same register we used before (if we reuse registers).
    if (!reg && reuse_registers) {
      reg = register_reuse_map.Lookup(source->reg());
    }
    // Third try: Use any free register.
421
    RegClass rc = reg_class_for(source->kind());
422 423 424 425 426
    if (!reg && state->has_unused_register(rc, used_regs)) {
      reg = state->unused_register(rc, used_regs);
    }
    if (!reg) {
      // No free register; make this a stack slot.
427
      *target = VarState(source->kind(), source->offset());
428 429
      continue;
    }
430 431
    if (reuse_registers) register_reuse_map.Add(source->reg(), *reg);
    state->inc_used(*reg);
432
    *target = VarState(source->kind(), *reg, source->offset());
433 434 435
  }
}

436 437
}  // namespace

438
// TODO(clemensb): Don't copy the full parent state (this makes us N^2).
439 440
void LiftoffAssembler::CacheState::InitMerge(const CacheState& source,
                                             uint32_t num_locals,
441 442
                                             uint32_t arity,
                                             uint32_t stack_depth) {
443 444 445
  // |------locals------|---(in between)----|--(discarded)--|----merge----|
  //  <-- num_locals --> <-- stack_depth -->^stack_base      <-- arity -->

446 447 448 449
  if (source.cached_instance != no_reg) {
    SetInstanceCacheRegister(source.cached_instance);
  }

450 451 452 453
  if (source.cached_mem_start != no_reg) {
    SetMemStartCacheRegister(source.cached_mem_start);
  }

454
  uint32_t stack_base = stack_depth + num_locals;
455 456
  uint32_t target_height = stack_base + arity;
  uint32_t discarded = source.stack_height() - target_height;
457
  DCHECK(stack_state.empty());
458

459
  DCHECK_GE(source.stack_height(), stack_base);
460
  stack_state.resize_no_init(target_height);
461

462 463
  const VarState* source_begin = source.stack_state.data();
  VarState* target_begin = stack_state.data();
464

465 466 467 468
  // Try to keep locals and the merge region in their registers. Register used
  // multiple times need to be copied to another free register. Compute the list
  // of used registers.
  LiftoffRegList used_regs;
469
  for (auto& src : base::VectorOf(source_begin, num_locals)) {
470 471
    if (src.is_reg()) used_regs.set(src.reg());
  }
472 473
  for (auto& src :
       base::VectorOf(source_begin + stack_base + discarded, arity)) {
474
    if (src.is_reg()) used_regs.set(src.reg());
475
  }
476 477 478 479 480 481 482

  // Initialize the merge region. If this region moves, try to turn stack slots
  // into registers since we need to load the value anyways.
  MergeKeepStackSlots keep_merge_stack_slots =
      discarded == 0 ? kKeepStackSlots : kTurnStackSlotsIntoRegisters;
  InitMergeRegion(this, source_begin + stack_base + discarded,
                  target_begin + stack_base, arity, keep_merge_stack_slots,
483
                  kConstantsNotAllowed, kNoReuseRegisters, used_regs);
484 485 486 487 488 489 490 491
  // Shift spill offsets down to keep slots contiguous.
  int offset = stack_base == 0 ? StaticStackFrameSize()
                               : source.stack_state[stack_base - 1].offset();
  auto merge_region = base::VectorOf(target_begin + stack_base, arity);
  for (VarState& var : merge_region) {
    offset = LiftoffAssembler::NextSpillOffset(var.kind(), offset);
    var.set_offset(offset);
  }
492 493 494 495

  // Initialize the locals region. Here, stack slots stay stack slots (because
  // they do not move). Try to keep register in registers, but avoid duplicates.
  InitMergeRegion(this, source_begin, target_begin, num_locals, kKeepStackSlots,
496
                  kConstantsNotAllowed, kNoReuseRegisters, used_regs);
497
  // Consistency check: All the {used_regs} are really in use now.
498 499
  DCHECK_EQ(used_regs, used_registers & used_regs);

500 501
  // Last, initialize the section in between. Here, constants are allowed, but
  // registers which are already used for the merge region or locals must be
502 503
  // moved to other registers or spilled. If a register appears twice in the
  // source region, ensure to use the same register twice in the target region.
504
  InitMergeRegion(this, source_begin + num_locals, target_begin + num_locals,
505 506
                  stack_depth, kKeepStackSlots, kConstantsAllowed,
                  kReuseRegisters, used_regs);
507 508
}

509
void LiftoffAssembler::CacheState::Steal(const CacheState& source) {
510 511
  // Just use the move assignment operator.
  *this = std::move(source);
512 513 514
}

void LiftoffAssembler::CacheState::Split(const CacheState& source) {
515 516
  // Call the private copy assignment operator.
  *this = source;
517 518
}

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
namespace {
int GetSafepointIndexForStackSlot(const VarState& slot) {
  // index = 0 is for the stack slot at 'fp + kFixedFrameSizeAboveFp -
  // kSystemPointerSize', the location of the current stack slot is 'fp -
  // slot.offset()'. The index we need is therefore '(fp +
  // kFixedFrameSizeAboveFp - kSystemPointerSize) - (fp - slot.offset())' =
  // 'slot.offset() + kFixedFrameSizeAboveFp - kSystemPointerSize'.
  // Concretely, the index of the first stack slot is '4'.
  return (slot.offset() + StandardFrameConstants::kFixedFrameSizeAboveFp -
          kSystemPointerSize) /
         kSystemPointerSize;
}
}  // namespace

void LiftoffAssembler::CacheState::GetTaggedSlotsForOOLCode(
    ZoneVector<int>* slots, LiftoffRegList* spills,
    SpillLocation spill_location) {
  for (const auto& slot : stack_state) {
537
    if (!is_reference(slot.kind())) continue;
538 539 540 541 542 543 544 545 546 547 548 549 550 551

    if (spill_location == SpillLocation::kTopOfStack && slot.is_reg()) {
      // Registers get spilled just before the call to the runtime. In {spills}
      // we store which of the spilled registers contain references, so that we
      // can add the spill slots to the safepoint.
      spills->set(slot.reg());
      continue;
    }
    DCHECK_IMPLIES(slot.is_reg(), spill_location == SpillLocation::kStackSlots);

    slots->push_back(GetSafepointIndexForStackSlot(slot));
  }
}

552
void LiftoffAssembler::CacheState::DefineSafepoint(Safepoint& safepoint) {
553
  for (const auto& slot : stack_state) {
554
    if (is_reference(slot.kind())) {
555
      DCHECK(slot.is_stack());
556 557 558 559 560
      safepoint.DefinePointerSlot(GetSafepointIndexForStackSlot(slot));
    }
  }
}

561 562 563 564 565 566 567 568 569 570 571
void LiftoffAssembler::CacheState::DefineSafepointWithCalleeSavedRegisters(
    Safepoint& safepoint) {
  for (const auto& slot : stack_state) {
    if (!is_reference(slot.kind())) continue;
    if (slot.is_stack()) {
      safepoint.DefinePointerSlot(GetSafepointIndexForStackSlot(slot));
    } else {
      DCHECK(slot.is_reg());
      safepoint.DefineRegister(slot.reg().gp().code());
    }
  }
572 573 574
  if (cached_instance != no_reg) {
    safepoint.DefineRegister(cached_instance.code());
  }
575 576
}

577 578 579 580 581 582
int LiftoffAssembler::GetTotalFrameSlotCountForGC() const {
  // The GC does not care about the actual number of spill slots, just about
  // the number of references that could be there in the spilling area. Note
  // that the offset of the first spill slot is kSystemPointerSize and not
  // '0'. Therefore we don't have to add '+1' here.
  return (max_used_spill_offset_ +
583 584
          StandardFrameConstants::kFixedFrameSizeAboveFp +
          ool_spill_space_size_) /
585 586 587
         kSystemPointerSize;
}

588 589
namespace {

590
AssemblerOptions DefaultLiftoffOptions() { return AssemblerOptions{}; }
591 592 593

}  // namespace

594 595 596
LiftoffAssembler::LiftoffAssembler(std::unique_ptr<AssemblerBuffer> buffer)
    : TurboAssembler(nullptr, DefaultLiftoffOptions(), CodeObjectRequired::kNo,
                     std::move(buffer)) {
597
  set_abort_hard(true);  // Avoid calls to Abort.
598
}
599 600

LiftoffAssembler::~LiftoffAssembler() {
601 602
  if (num_locals_ > kInlineLocalKinds) {
    base::Free(more_local_kinds_);
603 604 605
  }
}

606 607
LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
                                                 LiftoffRegList pinned) {
608
  if (slot.is_reg()) return slot.reg();
609
  LiftoffRegister reg = GetUnusedRegister(reg_class_for(slot.kind()), pinned);
610 611 612 613
  if (slot.is_const()) {
    LoadConstant(reg, slot.constant());
  } else {
    DCHECK(slot.is_stack());
614
    Fill(reg, slot.offset(), slot.kind());
615
  }
616
  return reg;
617 618
}

619 620 621 622 623
LiftoffRegister LiftoffAssembler::LoadI64HalfIntoRegister(VarState slot,
                                                          RegPairHalf half) {
  if (slot.is_reg()) {
    return half == kLowWord ? slot.reg().low() : slot.reg().high();
  }
624
  LiftoffRegister dst = GetUnusedRegister(kGpReg, {});
625 626 627 628 629 630 631 632 633 634 635 636
  if (slot.is_stack()) {
    FillI64Half(dst.gp(), slot.offset(), half);
    return dst;
  }
  DCHECK(slot.is_const());
  int32_t half_word =
      static_cast<int32_t>(half == kLowWord ? slot.constant().to_i64()
                                            : slot.constant().to_i64() >> 32);
  LoadConstant(dst, WasmValue(half_word));
  return dst;
}

637 638 639 640
LiftoffRegister LiftoffAssembler::PeekToRegister(int index,
                                                 LiftoffRegList pinned) {
  DCHECK_LT(index, cache_state_.stack_state.size());
  VarState& slot = cache_state_.stack_state.end()[-1 - index];
641 642
  if (slot.is_reg()) {
    return slot.reg();
643
  }
644
  LiftoffRegister reg = LoadToRegister(slot, pinned);
645
  cache_state_.inc_used(reg);
646
  slot.MakeRegister(reg);
647 648 649
  return reg;
}

650 651 652 653 654 655 656 657 658 659 660
void LiftoffAssembler::DropValues(int count) {
  for (int i = 0; i < count; ++i) {
    DCHECK(!cache_state_.stack_state.empty());
    VarState slot = cache_state_.stack_state.back();
    cache_state_.stack_state.pop_back();
    if (slot.is_reg()) {
      cache_state_.dec_used(slot.reg());
    }
  }
}

661 662 663 664 665 666 667 668 669
void LiftoffAssembler::DropValue(int depth) {
  auto* dropped = cache_state_.stack_state.begin() + depth;
  if (dropped->is_reg()) {
    cache_state_.dec_used(dropped->reg());
  }
  std::copy(dropped + 1, cache_state_.stack_state.end(), dropped);
  cache_state_.stack_state.pop_back();
}

670 671 672
void LiftoffAssembler::PrepareLoopArgs(int num) {
  for (int i = 0; i < num; ++i) {
    VarState& slot = cache_state_.stack_state.end()[-1 - i];
673
    if (slot.is_stack()) continue;
674
    RegClass rc = reg_class_for(slot.kind());
675 676 677 678 679 680 681
    if (slot.is_reg()) {
      if (cache_state_.get_use_count(slot.reg()) > 1) {
        // If the register is used more than once, we cannot use it for the
        // merge. Move it to an unused register instead.
        LiftoffRegList pinned;
        pinned.set(slot.reg());
        LiftoffRegister dst_reg = GetUnusedRegister(rc, pinned);
682
        Move(dst_reg, slot.reg(), slot.kind());
683 684 685 686 687 688
        cache_state_.dec_used(slot.reg());
        cache_state_.inc_used(dst_reg);
        slot.MakeRegister(dst_reg);
      }
      continue;
    }
689
    LiftoffRegister reg = GetUnusedRegister(rc, {});
690 691 692 693 694 695
    LoadConstant(reg, slot.constant());
    slot.MakeRegister(reg);
    cache_state_.inc_used(reg);
  }
}

696 697 698 699
void LiftoffAssembler::MaterializeMergedConstants(uint32_t arity) {
  // Materialize constants on top of the stack ({arity} many), and locals.
  VarState* stack_base = cache_state_.stack_state.data();
  for (auto slots :
700 701 702
       {base::VectorOf(stack_base + cache_state_.stack_state.size() - arity,
                       arity),
        base::VectorOf(stack_base, num_locals())}) {
703 704
    for (VarState& slot : slots) {
      if (!slot.is_const()) continue;
705
      RegClass rc = reg_class_for(slot.kind());
706 707 708 709 710 711 712 713 714 715 716 717 718
      if (cache_state_.has_unused_register(rc)) {
        LiftoffRegister reg = cache_state_.unused_register(rc);
        LoadConstant(reg, slot.constant());
        cache_state_.inc_used(reg);
        slot.MakeRegister(reg);
      } else {
        Spill(slot.offset(), slot.constant());
        slot.MakeStack();
      }
    }
  }
}

719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
#ifdef DEBUG
namespace {
bool SlotInterference(const VarState& a, const VarState& b) {
  return a.is_stack() && b.is_stack() &&
         b.offset() > a.offset() - element_size_bytes(a.kind()) &&
         b.offset() - element_size_bytes(b.kind()) < a.offset();
}

bool SlotInterference(const VarState& a, base::Vector<const VarState> v) {
  return std::any_of(v.begin(), v.end(), [&a](const VarState& b) {
    return SlotInterference(a, b);
  });
}
}  // namespace
#endif

735
void LiftoffAssembler::MergeFullStackWith(CacheState& target,
736 737
                                          const CacheState& source) {
  DCHECK_EQ(source.stack_height(), target.stack_height());
738
  // TODO(clemensb): Reuse the same StackTransferRecipe object to save some
739 740
  // allocations.
  StackTransferRecipe transfers(this);
741
  for (uint32_t i = 0, e = source.stack_height(); i < e; ++i) {
742
    transfers.TransferStackSlot(target.stack_state[i], source.stack_state[i]);
743 744 745
    DCHECK(!SlotInterference(target.stack_state[i],
                             base::VectorOf(source.stack_state.data() + i + 1,
                                            source.stack_height() - i - 1)));
746
  }
747

748
  // Full stack merging is only done for forward jumps, so we can just clear the
749
  // cache registers at the target in case of mismatch.
750
  if (source.cached_instance != target.cached_instance) {
751 752
    target.ClearCachedInstanceRegister();
  }
753 754 755
  if (source.cached_mem_start != target.cached_mem_start) {
    target.ClearCachedMemStartRegister();
  }
756 757
}

758 759
void LiftoffAssembler::MergeStackWith(CacheState& target, uint32_t arity,
                                      JumpDirection jump_direction) {
760
  // Before: ----------------|----- (discarded) ----|--- arity ---|
761 762 763 764 765 766
  //                         ^target_stack_height   ^stack_base   ^stack_height
  // After:  ----|-- arity --|
  //             ^           ^target_stack_height
  //             ^target_stack_base
  uint32_t stack_height = cache_state_.stack_height();
  uint32_t target_stack_height = target.stack_height();
767 768
  DCHECK_LE(target_stack_height, stack_height);
  DCHECK_LE(arity, target_stack_height);
769 770 771 772
  uint32_t stack_base = stack_height - arity;
  uint32_t target_stack_base = target_stack_height - arity;
  StackTransferRecipe transfers(this);
  for (uint32_t i = 0; i < target_stack_base; ++i) {
773 774
    transfers.TransferStackSlot(target.stack_state[i],
                                cache_state_.stack_state[i]);
775 776 777 778 779 780 781
    DCHECK(!SlotInterference(
        target.stack_state[i],
        base::VectorOf(cache_state_.stack_state.data() + i + 1,
                       target_stack_base - i - 1)));
    DCHECK(!SlotInterference(
        target.stack_state[i],
        base::VectorOf(cache_state_.stack_state.data() + stack_base, arity)));
782 783
  }
  for (uint32_t i = 0; i < arity; ++i) {
784 785
    transfers.TransferStackSlot(target.stack_state[target_stack_base + i],
                                cache_state_.stack_state[stack_base + i]);
786 787 788 789
    DCHECK(!SlotInterference(
        target.stack_state[i],
        base::VectorOf(cache_state_.stack_state.data() + stack_base + i + 1,
                       arity - i - 1)));
790
  }
791

792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809
  // Check whether the cached instance and/or memory start need to be moved to
  // another register. Register moves are executed as part of the
  // {StackTransferRecipe}. Remember whether the register content has to be
  // reloaded after executing the stack transfers.
  bool reload_instance = false;
  bool reload_mem_start = false;
  for (auto tuple :
       {std::make_tuple(&reload_instance, cache_state_.cached_instance,
                        &target.cached_instance),
        std::make_tuple(&reload_mem_start, cache_state_.cached_mem_start,
                        &target.cached_mem_start)}) {
    bool* reload = std::get<0>(tuple);
    Register src_reg = std::get<1>(tuple);
    Register* dst_reg = std::get<2>(tuple);
    // If the registers match, or the destination has no cache register, nothing
    // needs to be done.
    if (src_reg == *dst_reg || *dst_reg == no_reg) continue;
    // On forward jumps, just reset the cached register in the target state.
810
    if (jump_direction == kForwardJump) {
811 812 813 814 815 816
      target.ClearCacheRegister(dst_reg);
    } else if (src_reg != no_reg) {
      // If the source has the content but in the wrong register, execute a
      // register move as part of the stack transfer.
      transfers.MoveRegister(LiftoffRegister{*dst_reg},
                             LiftoffRegister{src_reg}, kPointerKind);
817
    } else {
818 819
      // Otherwise (the source state has no cached content), we reload later.
      *reload = true;
820 821
    }
  }
822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837

  // Now execute stack transfers and register moves/loads.
  transfers.Execute();

  if (reload_instance) {
    LoadInstanceFromFrame(target.cached_instance);
  }
  if (reload_mem_start) {
    // {target.cached_instance} already got restored above, so we can use it
    // if it exists.
    Register instance = target.cached_instance;
    if (instance == no_reg) {
      // We don't have the instance available yet. Store it into the target
      // mem_start, so that we can load the mem_start from there.
      instance = target.cached_mem_start;
      LoadInstanceFromFrame(instance);
838
    }
839 840 841 842
    LoadFromInstance(
        target.cached_mem_start, instance,
        ObjectAccess::ToTagged(WasmInstanceObject::kMemoryStartOffset),
        sizeof(size_t));
843 844 845
#ifdef V8_SANDBOXED_POINTERS
    DecodeSandboxedPointer(target.cached_mem_start);
#endif
846
  }
847 848
}

849 850
void LiftoffAssembler::Spill(VarState* slot) {
  switch (slot->loc()) {
851 852 853
    case VarState::kStack:
      return;
    case VarState::kRegister:
854
      Spill(slot->offset(), slot->reg(), slot->kind());
855
      cache_state_.dec_used(slot->reg());
856
      break;
857
    case VarState::kIntConst:
858
      Spill(slot->offset(), slot->constant());
859 860
      break;
  }
861
  slot->MakeStack();
862 863 864 865
}

void LiftoffAssembler::SpillLocals() {
  for (uint32_t i = 0; i < num_locals_; ++i) {
866
    Spill(&cache_state_.stack_state[i]);
867 868 869
  }
}

870 871
void LiftoffAssembler::SpillAllRegisters() {
  for (uint32_t i = 0, e = cache_state_.stack_height(); i < e; ++i) {
872 873
    auto& slot = cache_state_.stack_state[i];
    if (!slot.is_reg()) continue;
874
    Spill(slot.offset(), slot.reg(), slot.kind());
875
    slot.MakeStack();
876
  }
877
  cache_state_.ClearAllCacheRegisters();
878
  cache_state_.reset_used_registers();
879 880
}

881 882 883
void LiftoffAssembler::ClearRegister(
    Register reg, std::initializer_list<Register*> possible_uses,
    LiftoffRegList pinned) {
884 885
  if (reg == cache_state()->cached_instance) {
    cache_state()->ClearCachedInstanceRegister();
886 887 888 889 890 891 892 893
    // We can return immediately. The instance is only used to load information
    // at the beginning of an instruction when values don't have to be in
    // specific registers yet. Therefore the instance should never be one of the
    // {possible_uses}.
    for (Register* use : possible_uses) {
      USE(use);
      DCHECK_NE(reg, *use);
    }
894
    return;
895 896 897 898 899 900
  } else if (reg == cache_state()->cached_mem_start) {
    cache_state()->ClearCachedMemStartRegister();
    // The memory start may be among the {possible_uses}, e.g. for an atomic
    // compare exchange. Therefore it is necessary to iterate over the
    // {possible_uses} below, and we cannot return early.
  } else if (cache_state()->is_used(LiftoffRegister(reg))) {
901 902 903 904 905 906 907
    SpillRegister(LiftoffRegister(reg));
  }
  Register replacement = no_reg;
  for (Register* use : possible_uses) {
    if (reg != *use) continue;
    if (replacement == no_reg) {
      replacement = GetUnusedRegister(kGpReg, pinned).gp();
908
      Move(replacement, reg, kPointerKind);
909 910 911 912 913 914
    }
    // We cannot leave this loop early. There may be multiple uses of {reg}.
    *use = replacement;
  }
}

915
namespace {
916
void PrepareStackTransfers(const ValueKindSig* sig,
917 918 919 920 921
                           compiler::CallDescriptor* call_descriptor,
                           const VarState* slots,
                           LiftoffStackSlots* stack_slots,
                           StackTransferRecipe* stack_transfers,
                           LiftoffRegList* param_regs) {
922 923 924
  // Process parameters backwards, to reduce the amount of Slot sorting for
  // the most common case - a normal Wasm Call. Slots will be mostly unsorted
  // in the Builtin call case.
925 926
  uint32_t call_desc_input_idx =
      static_cast<uint32_t>(call_descriptor->InputCount());
927
  uint32_t num_params = static_cast<uint32_t>(sig->parameter_count());
928
  for (uint32_t i = num_params; i > 0; --i) {
929
    const uint32_t param = i - 1;
930 931
    ValueKind kind = sig->GetParam(param);
    const bool is_gp_pair = kNeedI64RegPair && kind == kI64;
932
    const int num_lowered_params = is_gp_pair ? 2 : 1;
933
    const VarState& slot = slots[param];
934
    const uint32_t stack_offset = slot.offset();
935
    // Process both halfs of a register pair separately, because they are passed
936 937
    // as separate parameters. One or both of them could end up on the stack.
    for (int lowered_idx = 0; lowered_idx < num_lowered_params; ++lowered_idx) {
938
      const RegPairHalf half =
939
          is_gp_pair && lowered_idx == 0 ? kHighWord : kLowWord;
940 941
      --call_desc_input_idx;
      compiler::LinkageLocation loc =
942
          call_descriptor->GetInputLocation(call_desc_input_idx);
943 944
      if (loc.IsRegister()) {
        DCHECK(!loc.IsAnyRegister());
945
        RegClass rc = is_gp_pair ? kGpReg : reg_class_for(kind);
946
        int reg_code = loc.AsRegister();
947
        LiftoffRegister reg =
948
            LiftoffRegister::from_external_code(rc, kind, reg_code);
949
        param_regs->set(reg);
950
        if (is_gp_pair) {
951 952
          stack_transfers->LoadI64HalfIntoRegister(reg, slot, stack_offset,
                                                   half);
953
        } else {
954
          stack_transfers->LoadIntoRegister(reg, slot, stack_offset);
955 956 957
        }
      } else {
        DCHECK(loc.IsCallerFrameSlot());
958 959
        int param_offset = -loc.GetLocation() - 1;
        stack_slots->Add(slot, stack_offset, half, param_offset);
960
      }
961 962
    }
  }
963 964 965 966 967
}

}  // namespace

void LiftoffAssembler::PrepareBuiltinCall(
968
    const ValueKindSig* sig, compiler::CallDescriptor* call_descriptor,
969 970 971 972 973 974
    std::initializer_list<VarState> params) {
  LiftoffStackSlots stack_slots(this);
  StackTransferRecipe stack_transfers(this);
  LiftoffRegList param_regs;
  PrepareStackTransfers(sig, call_descriptor, params.begin(), &stack_slots,
                        &stack_transfers, &param_regs);
975
  SpillAllRegisters();
976
  int param_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
977 978 979
  if (param_slots > 0) {
    stack_slots.Construct(param_slots);
  }
980 981 982 983 984 985 986
  // Execute the stack transfers before filling the instance register.
  stack_transfers.Execute();

  // Reset register use counters.
  cache_state_.reset_used_registers();
}

987
void LiftoffAssembler::PrepareCall(const ValueKindSig* sig,
988 989 990 991 992 993 994 995
                                   compiler::CallDescriptor* call_descriptor,
                                   Register* target,
                                   Register* target_instance) {
  uint32_t num_params = static_cast<uint32_t>(sig->parameter_count());
  // Input 0 is the call target.
  constexpr size_t kInputShift = 1;

  // Spill all cache slots which are not being used as parameters.
996
  cache_state_.ClearAllCacheRegisters();
997 998 999 1000 1001
  for (VarState* it = cache_state_.stack_state.end() - 1 - num_params;
       it >= cache_state_.stack_state.begin() &&
       !cache_state_.used_registers.is_empty();
       --it) {
    if (!it->is_reg()) continue;
1002
    Spill(it->offset(), it->reg(), it->kind());
1003 1004
    cache_state_.dec_used(it->reg());
    it->MakeStack();
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
  }

  LiftoffStackSlots stack_slots(this);
  StackTransferRecipe stack_transfers(this);
  LiftoffRegList param_regs;

  // Move the target instance (if supplied) into the correct instance register.
  compiler::LinkageLocation instance_loc =
      call_descriptor->GetInputLocation(kInputShift);
  DCHECK(instance_loc.IsRegister() && !instance_loc.IsAnyRegister());
  Register instance_reg = Register::from_code(instance_loc.AsRegister());
  param_regs.set(instance_reg);
  if (target_instance && *target_instance != instance_reg) {
    stack_transfers.MoveRegister(LiftoffRegister(instance_reg),
1019 1020
                                 LiftoffRegister(*target_instance),
                                 kPointerKind);
1021 1022
  }

1023
  int param_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
1024 1025 1026 1027 1028 1029
  if (num_params) {
    uint32_t param_base = cache_state_.stack_height() - num_params;
    PrepareStackTransfers(sig, call_descriptor,
                          &cache_state_.stack_state[param_base], &stack_slots,
                          &stack_transfers, &param_regs);
  }
1030

1031 1032 1033 1034 1035 1036 1037
  // If the target register overlaps with a parameter register, then move the
  // target to another free register, or spill to the stack.
  if (target && param_regs.has(LiftoffRegister(*target))) {
    // Try to find another free register.
    LiftoffRegList free_regs = kGpCacheRegList.MaskOut(param_regs);
    if (!free_regs.is_empty()) {
      LiftoffRegister new_target = free_regs.GetFirstRegSet();
1038
      stack_transfers.MoveRegister(new_target, LiftoffRegister(*target),
1039
                                   kPointerKind);
1040 1041
      *target = new_target.gp();
    } else {
1042
      stack_slots.Add(VarState(kPointerKind, LiftoffRegister(*target), 0),
1043 1044
                      param_slots);
      param_slots++;
1045 1046 1047 1048
      *target = no_reg;
    }
  }

1049 1050 1051
  if (param_slots > 0) {
    stack_slots.Construct(param_slots);
  }
1052
  // Execute the stack transfers before filling the instance register.
1053
  stack_transfers.Execute();
1054
  // Pop parameters from the value stack.
1055
  cache_state_.stack_state.pop_back(num_params);
1056

1057
  // Reset register use counters.
1058
  cache_state_.reset_used_registers();
1059

1060 1061
  // Reload the instance from the stack.
  if (!target_instance) {
1062
    FillInstanceInto(instance_reg);
1063
  }
1064 1065
}

1066
void LiftoffAssembler::FinishCall(const ValueKindSig* sig,
1067
                                  compiler::CallDescriptor* call_descriptor) {
1068
  int call_desc_return_idx = 0;
1069
  for (ValueKind return_kind : sig->returns()) {
1070
    DCHECK_LT(call_desc_return_idx, call_descriptor->ReturnCount());
1071
    const bool needs_gp_pair = needs_gp_reg_pair(return_kind);
1072
    const int num_lowered_params = 1 + needs_gp_pair;
1073 1074
    const ValueKind lowered_kind = needs_gp_pair ? kI32 : return_kind;
    const RegClass rc = reg_class_for(lowered_kind);
1075 1076 1077 1078 1079 1080 1081 1082 1083 1084
    // Initialize to anything, will be set in the loop and used afterwards.
    LiftoffRegister reg_pair[2] = {kGpCacheRegList.GetFirstRegSet(),
                                   kGpCacheRegList.GetFirstRegSet()};
    LiftoffRegList pinned;
    for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
      compiler::LinkageLocation loc =
          call_descriptor->GetReturnLocation(call_desc_return_idx++);
      if (loc.IsRegister()) {
        DCHECK(!loc.IsAnyRegister());
        reg_pair[pair_idx] = LiftoffRegister::from_external_code(
1085
            rc, lowered_kind, loc.AsRegister());
1086 1087 1088
      } else {
        DCHECK(loc.IsCallerFrameSlot());
        reg_pair[pair_idx] = GetUnusedRegister(rc, pinned);
1089 1090 1091 1092
        // Get slot offset relative to the stack pointer.
        int offset = call_descriptor->GetOffsetToReturns();
        int return_slot = -loc.GetLocation() - offset - 1;
        LoadReturnStackSlot(reg_pair[pair_idx],
1093
                            return_slot * kSystemPointerSize, lowered_kind);
1094 1095 1096 1097 1098 1099
      }
      if (pair_idx == 0) {
        pinned.set(reg_pair[0]);
      }
    }
    if (num_lowered_params == 1) {
1100
      PushRegister(return_kind, reg_pair[0]);
1101
    } else {
1102
      PushRegister(return_kind, LiftoffRegister::ForPair(reg_pair[0].gp(),
1103
                                                         reg_pair[1].gp()));
1104
    }
1105
  }
1106
  int return_slots = static_cast<int>(call_descriptor->ReturnSlotCount());
1107
  RecordUsedSpillOffset(TopSpillOffset() + return_slots * kSystemPointerSize);
1108 1109
}

1110
void LiftoffAssembler::Move(LiftoffRegister dst, LiftoffRegister src,
1111
                            ValueKind kind) {
1112
  DCHECK_EQ(dst.reg_class(), src.reg_class());
1113
  DCHECK_NE(dst, src);
1114
  if (kNeedI64RegPair && dst.is_gp_pair()) {
1115 1116
    // Use the {StackTransferRecipe} to move pairs, as the registers in the
    // pairs might overlap.
1117
    StackTransferRecipe(this).MoveRegister(dst, src, kind);
1118
  } else if (kNeedS128RegPair && dst.is_fp_pair()) {
1119
    // Calling low_fp is fine, Move will automatically check the kind and
1120
    // convert this FP to its SIMD register, and use a SIMD move.
1121
    Move(dst.low_fp(), src.low_fp(), kind);
1122
  } else if (dst.is_gp()) {
1123
    Move(dst.gp(), src.gp(), kind);
1124
  } else {
1125
    Move(dst.fp(), src.fp(), kind);
1126 1127 1128
  }
}

1129
void LiftoffAssembler::ParallelRegisterMove(
1130
    base::Vector<const ParallelRegisterMoveTuple> tuples) {
1131 1132 1133
  StackTransferRecipe stack_transfers(this);
  for (auto tuple : tuples) {
    if (tuple.dst == tuple.src) continue;
1134
    stack_transfers.MoveRegister(tuple.dst, tuple.src, tuple.kind);
1135 1136
  }
}
1137

1138 1139
void LiftoffAssembler::MoveToReturnLocations(
    const FunctionSig* sig, compiler::CallDescriptor* descriptor) {
1140 1141
  StackTransferRecipe stack_transfers(this);
  if (sig->return_count() == 1) {
1142 1143
    ValueKind return_kind = sig->GetReturn(0).kind();
    // Defaults to a gp reg, will be set below if return kind is not gp.
1144 1145
    LiftoffRegister return_reg = LiftoffRegister(kGpReturnRegisters[0]);

1146
    if (needs_gp_reg_pair(return_kind)) {
1147 1148
      return_reg = LiftoffRegister::ForPair(kGpReturnRegisters[0],
                                            kGpReturnRegisters[1]);
1149
    } else if (needs_fp_reg_pair(return_kind)) {
1150
      return_reg = LiftoffRegister::ForFpPair(kFpReturnRegisters[0]);
1151
    } else if (reg_class_for(return_kind) == kFpReg) {
1152 1153
      return_reg = LiftoffRegister(kFpReturnRegisters[0]);
    } else {
1154
      DCHECK_EQ(kGpReg, reg_class_for(return_kind));
1155 1156 1157 1158 1159 1160 1161 1162
    }
    stack_transfers.LoadIntoRegister(return_reg,
                                     cache_state_.stack_state.back(),
                                     cache_state_.stack_state.back().offset());
    return;
  }

  // Slow path for multi-return.
1163 1164 1165 1166 1167 1168
  int call_desc_return_idx = 0;
  DCHECK_LE(sig->return_count(), cache_state_.stack_height());
  VarState* slots = cache_state_.stack_state.end() - sig->return_count();
  // Fill return frame slots first to ensure that all potential spills happen
  // before we prepare the stack transfers.
  for (size_t i = 0; i < sig->return_count(); ++i) {
1169 1170
    ValueKind return_kind = sig->GetReturn(i).kind();
    bool needs_gp_pair = needs_gp_reg_pair(return_kind);
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180
    int num_lowered_params = 1 + needs_gp_pair;
    for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
      compiler::LinkageLocation loc =
          descriptor->GetReturnLocation(call_desc_return_idx++);
      if (loc.IsCallerFrameSlot()) {
        RegPairHalf half = pair_idx == 0 ? kLowWord : kHighWord;
        VarState& slot = slots[i];
        LiftoffRegister reg = needs_gp_pair
                                  ? LoadI64HalfIntoRegister(slot, half)
                                  : LoadToRegister(slot, {});
1181 1182
        ValueKind lowered_kind = needs_gp_pair ? kI32 : return_kind;
        StoreCallerFrameSlot(reg, -loc.AsCallerFrameSlot(), lowered_kind);
1183 1184 1185 1186 1187 1188
      }
    }
  }
  // Prepare and execute stack transfers.
  call_desc_return_idx = 0;
  for (size_t i = 0; i < sig->return_count(); ++i) {
1189 1190
    ValueKind return_kind = sig->GetReturn(i).kind();
    bool needs_gp_pair = needs_gp_reg_pair(return_kind);
1191 1192 1193 1194 1195 1196 1197 1198
    int num_lowered_params = 1 + needs_gp_pair;
    for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
      RegPairHalf half = pair_idx == 0 ? kLowWord : kHighWord;
      compiler::LinkageLocation loc =
          descriptor->GetReturnLocation(call_desc_return_idx++);
      if (loc.IsRegister()) {
        DCHECK(!loc.IsAnyRegister());
        int reg_code = loc.AsRegister();
1199 1200
        ValueKind lowered_kind = needs_gp_pair ? kI32 : return_kind;
        RegClass rc = reg_class_for(lowered_kind);
1201
        LiftoffRegister reg =
1202
            LiftoffRegister::from_external_code(rc, return_kind, reg_code);
1203 1204 1205 1206 1207 1208 1209 1210 1211
        VarState& slot = slots[i];
        if (needs_gp_pair) {
          stack_transfers.LoadI64HalfIntoRegister(reg, slot, slot.offset(),
                                                  half);
        } else {
          stack_transfers.LoadIntoRegister(reg, slot, slot.offset());
        }
      }
    }
1212
  }
1213
}
1214

1215
#ifdef ENABLE_SLOW_DCHECKS
1216 1217 1218 1219 1220 1221
bool LiftoffAssembler::ValidateCacheState() const {
  uint32_t register_use_count[kAfterMaxLiftoffRegCode] = {0};
  LiftoffRegList used_regs;
  for (const VarState& var : cache_state_.stack_state) {
    if (!var.is_reg()) continue;
    LiftoffRegister reg = var.reg();
1222
    if ((kNeedI64RegPair || kNeedS128RegPair) && reg.is_pair()) {
1223 1224 1225 1226 1227 1228 1229
      ++register_use_count[reg.low().liftoff_code()];
      ++register_use_count[reg.high().liftoff_code()];
    } else {
      ++register_use_count[reg.liftoff_code()];
    }
    used_regs.set(reg);
  }
1230 1231 1232 1233 1234 1235 1236 1237 1238
  for (Register cache_reg :
       {cache_state_.cached_instance, cache_state_.cached_mem_start}) {
    if (cache_reg != no_reg) {
      DCHECK(!used_regs.has(cache_reg));
      int liftoff_code = LiftoffRegister{cache_reg}.liftoff_code();
      used_regs.set(cache_reg);
      DCHECK_EQ(0, register_use_count[liftoff_code]);
      register_use_count[liftoff_code] = 1;
    }
1239
  }
1240 1241 1242 1243 1244 1245
  bool valid = memcmp(register_use_count, cache_state_.register_use_count,
                      sizeof(register_use_count)) == 0 &&
               used_regs == cache_state_.used_registers;
  if (valid) return true;
  std::ostringstream os;
  os << "Error in LiftoffAssembler::ValidateCacheState().\n";
1246 1247 1248 1249
  os << "expected: used_regs " << used_regs << ", counts "
     << PrintCollection(register_use_count) << "\n";
  os << "found:    used_regs " << cache_state_.used_registers << ", counts "
     << PrintCollection(cache_state_.register_use_count) << "\n";
1250
  os << "Use --trace-wasm-decoder and --trace-liftoff to debug.";
1251 1252
  FATAL("%s", os.str().c_str());
}
1253
#endif
1254

1255
LiftoffRegister LiftoffAssembler::SpillOneRegister(LiftoffRegList candidates) {
1256
  // Spill one cached value to free a register.
1257
  LiftoffRegister spill_reg = cache_state_.GetNextSpillReg(candidates);
1258 1259 1260 1261
  SpillRegister(spill_reg);
  return spill_reg;
}

1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288
LiftoffRegister LiftoffAssembler::SpillAdjacentFpRegisters(
    LiftoffRegList pinned) {
  // We end up in this call only when:
  // [1] kNeedS128RegPair, and
  // [2] there are no pair of adjacent FP registers that are free
  CHECK(kNeedS128RegPair);
  DCHECK(!kFpCacheRegList.MaskOut(pinned)
              .MaskOut(cache_state_.used_registers)
              .HasAdjacentFpRegsSet());

  // Special logic, if the top fp register is even, we might hit a case of an
  // invalid register in case 2.
  LiftoffRegister last_fp = kFpCacheRegList.GetLastRegSet();
  if (last_fp.fp().code() % 2 == 0) {
    pinned.set(last_fp);
  }

  // We can try to optimize the spilling here:
  // 1. Try to get a free fp register, either:
  //  a. This register is already free, or
  //  b. it had to be spilled.
  // 2. If 1a, the adjacent register is used (invariant [2]), spill it.
  // 3. If 1b, check the adjacent register:
  //  a. If free, done!
  //  b. If used, spill it.
  // We spill one register in 2 and 3a, and two registers in 3b.

1289
  LiftoffRegister first_reg = GetUnusedRegister(kFpReg, pinned);
1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307
  LiftoffRegister second_reg = first_reg, low_reg = first_reg;

  if (first_reg.fp().code() % 2 == 0) {
    second_reg =
        LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() + 1);
  } else {
    second_reg =
        LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() - 1);
    low_reg = second_reg;
  }

  if (cache_state_.is_used(second_reg)) {
    SpillRegister(second_reg);
  }

  return low_reg;
}

1308 1309
void LiftoffAssembler::SpillRegister(LiftoffRegister reg) {
  int remaining_uses = cache_state_.get_use_count(reg);
1310 1311 1312
  DCHECK_LT(0, remaining_uses);
  for (uint32_t idx = cache_state_.stack_height() - 1;; --idx) {
    DCHECK_GT(cache_state_.stack_height(), idx);
1313
    auto* slot = &cache_state_.stack_state[idx];
1314
    if (!slot->is_reg() || !slot->reg().overlaps(reg)) continue;
1315
    if (slot->reg().is_pair()) {
1316 1317 1318 1319
      // Make sure to decrement *both* registers in a pair, because the
      // {clear_used} call below only clears one of them.
      cache_state_.dec_used(slot->reg().low());
      cache_state_.dec_used(slot->reg().high());
1320 1321
      cache_state_.last_spilled_regs.set(slot->reg().low());
      cache_state_.last_spilled_regs.set(slot->reg().high());
1322
    }
1323
    Spill(slot->offset(), slot->reg(), slot->kind());
1324
    slot->MakeStack();
1325 1326
    if (--remaining_uses == 0) break;
  }
1327
  cache_state_.clear_used(reg);
1328
  cache_state_.last_spilled_regs.set(reg);
1329 1330 1331 1332 1333
}

void LiftoffAssembler::set_num_locals(uint32_t num_locals) {
  DCHECK_EQ(0, num_locals_);  // only call this once.
  num_locals_ = num_locals;
1334 1335
  if (num_locals > kInlineLocalKinds) {
    more_local_kinds_ = reinterpret_cast<ValueKind*>(
1336
        base::Malloc(num_locals * sizeof(ValueKind)));
1337
    DCHECK_NOT_NULL(more_local_kinds_);
1338 1339 1340
  }
}

1341
std::ostream& operator<<(std::ostream& os, VarState slot) {
1342
  os << name(slot.kind()) << ":";
1343 1344
  switch (slot.loc()) {
    case VarState::kStack:
1345
      return os << "s0x" << std::hex << slot.offset() << std::dec;
1346 1347
    case VarState::kRegister:
      return os << slot.reg();
1348
    case VarState::kIntConst:
1349 1350 1351 1352 1353
      return os << "c" << slot.i32_const();
  }
  UNREACHABLE();
}

1354 1355 1356 1357 1358 1359 1360
#if DEBUG
bool CheckCompatibleStackSlotTypes(ValueKind a, ValueKind b) {
  if (is_object_reference(a)) {
    // Since Liftoff doesn't do accurate type tracking (e.g. on loop back
    // edges), we only care that pointer types stay amongst pointer types.
    // It's fine if ref/optref overwrite each other.
    DCHECK(is_object_reference(b));
1361 1362 1363
  } else if (is_rtt(a)) {
    // Same for rtt/rtt_with_depth.
    DCHECK(is_rtt(b));
1364
  } else {
1365
    // All other types (primitive numbers, bottom/stmt) must be equal.
1366 1367 1368 1369 1370 1371
    DCHECK_EQ(a, b);
  }
  return true;  // Dummy so this can be called via DCHECK.
}
#endif

1372 1373 1374
}  // namespace wasm
}  // namespace internal
}  // namespace v8