Commit 83091c45 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[liftoff][wasm-simd] Add allocation and spill logic for fp reg pair

kFpRegPair requires a bit of special case logic in allocation and spill
logic, since it requires adjacent fp registers, with the low register
being an even-numbered one.

We make use of a shift+and trick with the reg list to check this. Given
a bit mask of registers available, AND this with a right-shift-by-one
version of itself, and then AND it again with with a bit mask of even
numbered fp registers, we can get a bit mask where all available even
registers are set.

For example, given 00011011, where 1 means register is available:

  00011011
&  0001101 (right shift by 1)
----------
  00001001
& 01010101 (mask of even registers)
----------
  00000001

We see that only the first fp register is available, based on the
adjacency requirement.

The mask of even registers is defined to be uint64_t, to workarond shift
overflow issues when non-unsigned values are used, and also to make this
work on all platforms and compilers without implicit conversion warnings.
It is a bit wasteful, since we really only need storage_t, but since it
is a constexpr it should be okay.

The spilling case tries to be a bit smart, since there are three possible
cases (described in the code), two of which we can spill one register,
and in the last case we have to spill two registers. We try to
distinguish between these cases in order to minimize spills. The dumb
thing here is in the last case, we arbitrarily pick the first two
registers (d0, d1) to spill.

Bug: v8:9909
Change-Id: I408f9f79b1f3c505f2fd73bad15923188ae7aaf4
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1975033
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65546}
parent f2cc948f
......@@ -807,6 +807,52 @@ LiftoffRegister LiftoffAssembler::SpillOneRegister(LiftoffRegList candidates,
return spill_reg;
}
LiftoffRegister LiftoffAssembler::SpillAdjacentFpRegisters(
LiftoffRegList pinned) {
// We end up in this call only when:
// [1] kNeedS128RegPair, and
// [2] there are no pair of adjacent FP registers that are free
CHECK(kNeedS128RegPair);
DCHECK(!kFpCacheRegList.MaskOut(pinned)
.MaskOut(cache_state_.used_registers)
.HasAdjacentFpRegsSet());
// Special logic, if the top fp register is even, we might hit a case of an
// invalid register in case 2.
LiftoffRegister last_fp = kFpCacheRegList.GetLastRegSet();
if (last_fp.fp().code() % 2 == 0) {
pinned.set(last_fp);
}
// We can try to optimize the spilling here:
// 1. Try to get a free fp register, either:
// a. This register is already free, or
// b. it had to be spilled.
// 2. If 1a, the adjacent register is used (invariant [2]), spill it.
// 3. If 1b, check the adjacent register:
// a. If free, done!
// b. If used, spill it.
// We spill one register in 2 and 3a, and two registers in 3b.
LiftoffRegister first_reg = GetUnusedRegister(kFpCacheRegList, pinned);
LiftoffRegister second_reg = first_reg, low_reg = first_reg;
if (first_reg.fp().code() % 2 == 0) {
second_reg =
LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() + 1);
} else {
second_reg =
LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() - 1);
low_reg = second_reg;
}
if (cache_state_.is_used(second_reg)) {
SpillRegister(second_reg);
}
return low_reg;
}
void LiftoffAssembler::SpillRegister(LiftoffRegister reg) {
int remaining_uses = cache_state_.get_use_count(reg);
DCHECK_LT(0, remaining_uses);
......@@ -814,7 +860,7 @@ void LiftoffAssembler::SpillRegister(LiftoffRegister reg) {
DCHECK_GT(cache_state_.stack_height(), idx);
auto* slot = &cache_state_.stack_state[idx];
if (!slot->is_reg() || !slot->reg().overlaps(reg)) continue;
if (slot->reg().is_gp_pair()) {
if (slot->reg().is_pair()) {
// Make sure to decrement *both* registers in a pair, because the
// {clear_used} call below only clears one of them.
cache_state_.dec_used(slot->reg().low());
......
......@@ -146,6 +146,10 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegList available_regs =
kGpCacheRegList.MaskOut(used_registers).MaskOut(pinned);
return available_regs.GetNumRegsSet() >= 2;
} else if (kNeedS128RegPair && rc == kFpRegPair) {
LiftoffRegList available_regs =
kFpCacheRegList.MaskOut(used_registers).MaskOut(pinned);
return available_regs.HasAdjacentFpRegsSet();
}
DCHECK(rc == kGpReg || rc == kFpReg);
LiftoffRegList candidates = GetCacheRegList(rc);
......@@ -165,6 +169,13 @@ class LiftoffAssembler : public TurboAssembler {
Register low = pinned.set(unused_register(kGpReg, pinned)).gp();
Register high = unused_register(kGpReg, pinned).gp();
return LiftoffRegister::ForPair(low, high);
} else if (kNeedS128RegPair && rc == kFpRegPair) {
LiftoffRegList available_regs =
kFpCacheRegList.MaskOut(used_registers).MaskOut(pinned);
DoubleRegister low =
available_regs.GetAdjacentFpRegsSet().GetFirstRegSet().fp();
DCHECK(is_free(LiftoffRegister::ForFpPair(low)));
return LiftoffRegister::ForFpPair(low);
}
DCHECK(rc == kGpReg || rc == kFpReg);
LiftoffRegList candidates = GetCacheRegList(rc);
......@@ -323,6 +334,14 @@ class LiftoffAssembler : public TurboAssembler {
Register low = pinned.set(GetUnusedRegister(candidates, pinned)).gp();
Register high = GetUnusedRegister(candidates, pinned).gp();
return LiftoffRegister::ForPair(low, high);
} else if (kNeedS128RegPair && rc == kFpRegPair) {
// kFpRegPair specific logic here because we need adjacent registers, not
// just any two registers (like kGpRegPair).
if (cache_state_.has_unused_register(rc, pinned)) {
return cache_state_.unused_register(rc, pinned);
}
DoubleRegister low_fp = SpillAdjacentFpRegisters(pinned).fp();
return LiftoffRegister::ForFpPair(low_fp);
}
DCHECK(rc == kGpReg || rc == kFpReg);
LiftoffRegList candidates = GetCacheRegList(rc);
......@@ -717,6 +736,8 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister SpillOneRegister(LiftoffRegList candidates,
LiftoffRegList pinned);
// Spill one or two fp registers to get a pair of adjacent fp registers.
LiftoffRegister SpillAdjacentFpRegisters(LiftoffRegList pinned);
};
std::ostream& operator<<(std::ostream& os, LiftoffAssembler::VarState);
......
......@@ -306,6 +306,9 @@ class LiftoffRegList {
static constexpr storage_t kGpMask = storage_t{kLiftoffAssemblerGpCacheRegs};
static constexpr storage_t kFpMask = storage_t{kLiftoffAssemblerFpCacheRegs}
<< kAfterMaxLiftoffGpRegCode;
// Sets all even numbered fp registers.
static constexpr uint64_t kEvenFpSetMask = uint64_t{0x5555555555555555}
<< kAfterMaxLiftoffGpRegCode;
constexpr LiftoffRegList() = default;
......@@ -358,6 +361,17 @@ class LiftoffRegList {
return LiftoffRegList(regs_ | other.regs_);
}
constexpr LiftoffRegList GetAdjacentFpRegsSet() const {
// And regs_ with a right shifted version of itself, so reg[i] is set only
// if reg[i+1] is set. We only care about the even fp registers.
storage_t available = (regs_ >> 1) & regs_ & kEvenFpSetMask;
return LiftoffRegList(available);
}
constexpr bool HasAdjacentFpRegsSet() const {
return !GetAdjacentFpRegsSet().is_empty();
}
constexpr bool operator==(const LiftoffRegList other) const {
return regs_ == other.regs_;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment