Commit 75fa5d42 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[liftoff][wasm-simd] Encode SIMD registers in LiftoffRegister

This introduces a new constant kNeedS128RegPair, which is set for
architecture where a pair of FP registers (DoubleRegister) is mapped to
single Simd128Register (ARM).

In this case, a new RegClass, kFpRegPair, is defined to represent
LiftoffRegister of this type. kFpRegPair will be kNoReg on all other
architectures.

We add 1 more bit to the encoding on ARM, so now the top bit is set for
kFpRegPair, the second most top bit is set for kGpRegPair. When the top
bit is set, we encode the FP register code into the bottom bits of
code_. Note that this is directly encoded, i.e. not added to kMaxGpRegCode,
so we can save an add/subtract when converting to/from DoubleRegister.
We only need to store 1 FP register, the low register, since the high
other register is implicity the next register. Note that the stored
register is *always* an even-numbered register.

Bug: v8:9909
Change-Id: I78d603c9938c3d0add9bd3ca77ddebbfa7abbc05
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1973276
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65508}
parent 565c4fee
......@@ -353,7 +353,7 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type, LiftoffRegList pinned,
uint32_t* protected_load_pc, bool is_load_mem) {
DCHECK_IMPLIES(type.value_type() == kWasmI64, dst.is_pair());
DCHECK_IMPLIES(type.value_type() == kWasmI64, dst.is_gp_pair());
// If offset_imm cannot be converted to int32 safely, we abort as a separate
// check should cause this code to never be executed.
// TODO(7881): Support when >2GB is required.
......
......@@ -257,7 +257,7 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
Register offset_reg, uint32_t offset_imm,
LoadType type, LiftoffRegList pinned,
uint32_t* protected_load_pc, bool is_load_mem) {
DCHECK_EQ(type.value_type() == kWasmI64, dst.is_pair());
DCHECK_EQ(type.value_type() == kWasmI64, dst.is_gp_pair());
// Wasm memory is limited to a size <2GB, so all offsets can be encoded as
// immediate value (in 31 bits, interpreted as signed value).
// If the offset is bigger, we always trap and this code is not reached.
......@@ -335,7 +335,7 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
uint32_t offset_imm, LiftoffRegister src,
StoreType type, LiftoffRegList pinned,
uint32_t* protected_store_pc, bool is_store_mem) {
DCHECK_EQ(type.value_type() == kWasmI64, src.is_pair());
DCHECK_EQ(type.value_type() == kWasmI64, src.is_gp_pair());
// Wasm memory is limited to a size <2GB, so all offsets can be encoded as
// immediate value (in 31 bits, interpreted as signed value).
// If the offset is bigger, we always trap and this code is not reached.
......
......@@ -152,7 +152,7 @@ class StackTransferRecipe {
DCHECK_NE(dst, src);
DCHECK_EQ(dst.reg_class(), src.reg_class());
DCHECK_EQ(reg_class_for(type), src.reg_class());
if (src.is_pair()) {
if (src.is_gp_pair()) {
DCHECK_EQ(kWasmI64, type);
if (dst.low() != src.low()) MoveRegister(dst.low(), src.low(), kWasmI32);
if (dst.high() != src.high())
......@@ -177,7 +177,7 @@ class StackTransferRecipe {
void LoadConstant(LiftoffRegister dst, WasmValue value) {
DCHECK(!load_dst_regs_.has(dst));
load_dst_regs_.set(dst);
if (dst.is_pair()) {
if (dst.is_gp_pair()) {
DCHECK_EQ(kWasmI64, value.type());
int64_t i64 = value.to_i64();
*register_load(dst.low()) =
......@@ -198,7 +198,7 @@ class StackTransferRecipe {
return;
}
load_dst_regs_.set(dst);
if (dst.is_pair()) {
if (dst.is_gp_pair()) {
DCHECK_EQ(kWasmI64, type);
*register_load(dst.low()) =
RegisterLoad::HalfStack(stack_offset, kLowWord);
......@@ -341,7 +341,7 @@ class RegisterReuseMap {
base::Optional<LiftoffRegister> Lookup(LiftoffRegister src) {
for (auto it = map_.begin(), end = map_.end(); it != end; it += 2) {
if (it->is_pair() == src.is_pair() && *it == src) return *(it + 1);
if (it->is_gp_pair() == src.is_gp_pair() && *it == src) return *(it + 1);
}
return {};
}
......@@ -623,8 +623,8 @@ void LiftoffAssembler::PrepareCall(FunctionSig* sig,
for (uint32_t i = num_params; i > 0; --i) {
const uint32_t param = i - 1;
ValueType type = sig->GetParam(param);
const bool is_pair = kNeedI64RegPair && type == kWasmI64;
const int num_lowered_params = is_pair ? 2 : 1;
const bool is_gp_pair = kNeedI64RegPair && type == kWasmI64;
const int num_lowered_params = is_gp_pair ? 2 : 1;
const uint32_t stack_idx = param_base + param;
const VarState& slot = cache_state_.stack_state[stack_idx];
const uint32_t stack_offset = slot.offset();
......@@ -632,13 +632,13 @@ void LiftoffAssembler::PrepareCall(FunctionSig* sig,
// as separate parameters. One or both of them could end up on the stack.
for (int lowered_idx = 0; lowered_idx < num_lowered_params; ++lowered_idx) {
const RegPairHalf half =
is_pair && lowered_idx == 0 ? kHighWord : kLowWord;
is_gp_pair && lowered_idx == 0 ? kHighWord : kLowWord;
--call_desc_input_idx;
compiler::LinkageLocation loc =
call_descriptor->GetInputLocation(call_desc_input_idx);
if (loc.IsRegister()) {
DCHECK(!loc.IsAnyRegister());
RegClass rc = is_pair ? kGpReg : reg_class_for(type);
RegClass rc = is_gp_pair ? kGpReg : reg_class_for(type);
int reg_code = loc.AsRegister();
#if V8_TARGET_ARCH_ARM
// Liftoff assumes a one-to-one mapping between float registers and
......@@ -652,7 +652,7 @@ void LiftoffAssembler::PrepareCall(FunctionSig* sig,
LiftoffRegister reg = LiftoffRegister::from_code(rc, reg_code);
#endif
param_regs.set(reg);
if (is_pair) {
if (is_gp_pair) {
stack_transfers.LoadI64HalfIntoRegister(reg, slot, stack_offset,
half);
} else {
......@@ -733,7 +733,7 @@ void LiftoffAssembler::Move(LiftoffRegister dst, LiftoffRegister src,
ValueType type) {
DCHECK_EQ(dst.reg_class(), src.reg_class());
DCHECK_NE(dst, src);
if (kNeedI64RegPair && dst.is_pair()) {
if (kNeedI64RegPair && dst.is_gp_pair()) {
// Use the {StackTransferRecipe} to move pairs, as the registers in the
// pairs might overlap.
StackTransferRecipe(this).MoveRegister(dst, src, type);
......@@ -776,7 +776,7 @@ bool LiftoffAssembler::ValidateCacheState() const {
for (const VarState& var : cache_state_.stack_state) {
if (!var.is_reg()) continue;
LiftoffRegister reg = var.reg();
if (kNeedI64RegPair && reg.is_pair()) {
if (kNeedI64RegPair && reg.is_gp_pair()) {
++register_use_count[reg.low().liftoff_code()];
++register_use_count[reg.high().liftoff_code()];
} else {
......@@ -814,7 +814,7 @@ void LiftoffAssembler::SpillRegister(LiftoffRegister reg) {
DCHECK_GT(cache_state_.stack_height(), idx);
auto* slot = &cache_state_.stack_state[idx];
if (!slot->is_reg() || !slot->reg().overlaps(reg)) continue;
if (slot->reg().is_pair()) {
if (slot->reg().is_gp_pair()) {
// Make sure to decrement *both* registers in a pair, because the
// {clear_used} call below only clears one of them.
cache_state_.dec_used(slot->reg().low());
......
......@@ -179,7 +179,7 @@ class LiftoffAssembler : public TurboAssembler {
}
void inc_used(LiftoffRegister reg) {
if (reg.is_pair()) {
if (reg.is_gp_pair()) {
inc_used(reg.low());
inc_used(reg.high());
return;
......@@ -192,7 +192,7 @@ class LiftoffAssembler : public TurboAssembler {
// Returns whether this was the last use.
void dec_used(LiftoffRegister reg) {
DCHECK(is_used(reg));
if (reg.is_pair()) {
if (reg.is_gp_pair()) {
dec_used(reg.low());
dec_used(reg.high());
return;
......@@ -203,14 +203,14 @@ class LiftoffAssembler : public TurboAssembler {
}
bool is_used(LiftoffRegister reg) const {
if (reg.is_pair()) return is_used(reg.low()) || is_used(reg.high());
if (reg.is_gp_pair()) return is_used(reg.low()) || is_used(reg.high());
bool used = used_registers.has(reg);
DCHECK_EQ(used, register_use_count[reg.liftoff_code()] != 0);
return used;
}
uint32_t get_use_count(LiftoffRegister reg) const {
if (reg.is_pair()) {
if (reg.is_gp_pair()) {
DCHECK_EQ(register_use_count[reg.low().liftoff_code()],
register_use_count[reg.high().liftoff_code()]);
reg = reg.low();
......
......@@ -1076,16 +1076,16 @@ class LiftoffCompiler {
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { \
__ emit_f64_set_cond(cond, dst.gp(), lhs.fp(), rhs.fp()); \
});
#define CASE_I64_SHIFTOP(opcode, fn) \
case kExpr##opcode: \
return EmitBinOpImm<kWasmI64, kWasmI64>( \
[=](LiftoffRegister dst, LiftoffRegister src, \
LiftoffRegister amount) { \
__ emit_##fn(dst, src, \
amount.is_pair() ? amount.low_gp() : amount.gp()); \
}, \
[=](LiftoffRegister dst, LiftoffRegister src, int32_t amount) { \
__ emit_##fn(dst, src, amount); \
#define CASE_I64_SHIFTOP(opcode, fn) \
case kExpr##opcode: \
return EmitBinOpImm<kWasmI64, kWasmI64>( \
[=](LiftoffRegister dst, LiftoffRegister src, \
LiftoffRegister amount) { \
__ emit_##fn(dst, src, \
amount.is_gp_pair() ? amount.low_gp() : amount.gp()); \
}, \
[=](LiftoffRegister dst, LiftoffRegister src, int32_t amount) { \
__ emit_##fn(dst, src, amount); \
});
#define CASE_CCALL_BINOP(opcode, type, ext_ref_fn) \
case kExpr##opcode: \
......
......@@ -17,13 +17,25 @@ namespace internal {
namespace wasm {
static constexpr bool kNeedI64RegPair = kSystemPointerSize == 4;
static constexpr bool kNeedS128RegPair = !kSimpleFPAliasing;
enum RegClass : uint8_t {
kGpReg,
kFpReg,
// {kGpRegPair} equals {kNoReg} if {kNeedI64RegPair} is false.
kGpRegPair,
kNoReg = kGpRegPair + kNeedI64RegPair
kFpRegPair = kGpRegPair + kNeedI64RegPair,
kNoReg = kFpRegPair + kNeedS128RegPair,
// +------------------+-------------------------------+
// | | kNeedI64RegPair |
// +------------------+---------------+---------------+
// | kNeedS128RegPair | true | false |
// +------------------+---------------+---------------+
// | true | 0,1,2,3,4 (a) | 0,1,2,2,3 |
// | false | 0,1,2,3,3 (b) | 0,1,2,2,2 (c) |
// +------------------+---------------+---------------+
// (a) arm
// (b) ia32
// (c) x64, arm64
};
enum RegPairHalf : uint8_t { kLowWord = 0, kHighWord = 1 };
......@@ -41,11 +53,47 @@ static inline constexpr RegClass reg_class_for(ValueType type) {
return kGpReg;
case kWasmI64:
return kNeedI64RegPair ? kGpRegPair : kGpReg;
case kWasmS128:
return kNeedS128RegPair ? kFpRegPair : kFpReg;
default:
return kNoReg; // unsupported type
}
}
// Description of LiftoffRegister code encoding.
// This example uses the ARM architecture, which as of writing has:
// - 9 GP registers, requiring 4 bits
// - 13 FP regitsters, requiring 5 bits
// - kNeedI64RegPair is true
// - kNeedS128RegPair is true
// - thus, kBitsPerRegPair is 2 + 2 * 4 = 10
// - storage_t is uint16_t
// The table below illustrates how each RegClass is encoded, with brackets
// surrounding the bits which encode the register number.
//
// +----------------+------------------+
// | RegClass | Example |
// +----------------+------------------+
// | kGpReg (1) | [00 0000 0000] |
// | kFpReg (2) | [00 0000 1001] |
// | kGpRegPair (3) | 01 [0000] [0001] |
// | kFpRegPair (4) | 10 000[0 0010] |
// +----------------+------------------+
//
// gp and fp registers are encoded in the same index space, which means that
// code has to check for kGpRegPair and kFpRegPair before it can treat the code
// as a register code.
// (1) [0 .. kMaxGpRegCode] encodes gp registers
// (2) [kMaxGpRegCode + 1 .. kMaxGpRegCode + kMaxFpRegCode] encodes fp
// registers, so in this example, 1001 is really fp register 0.
// (3) The second top bit is set for kGpRegPair, and the two gp registers are
// stuffed side by side in code. Note that this is not the second top bit of
// storage_t, since storage_t is larger than the number of meaningful bits we
// need for the encoding.
// (4) The top bit is set for kFpRegPair, and the fp register is stuffed into
// the bottom part of the code. Unlike (2), this is the fp register code itself
// (not sharing index space with gp), so in this example, it is fp register 2.
// Maximum code of a gp cache register.
static constexpr int kMaxGpRegCode =
8 * sizeof(kLiftoffAssemblerGpCacheRegs) -
......@@ -54,11 +102,6 @@ static constexpr int kMaxGpRegCode =
static constexpr int kMaxFpRegCode =
8 * sizeof(kLiftoffAssemblerFpCacheRegs) -
base::bits::CountLeadingZeros(kLiftoffAssemblerFpCacheRegs) - 1;
// LiftoffRegister encodes both gp and fp in a unified index space.
// [0 .. kMaxGpRegCode] encodes gp registers,
// [kMaxGpRegCode+1 .. kMaxGpRegCode + kMaxFpRegCode] encodes fp registers.
// I64 values on 32 bit platforms are stored in two registers, both encoded in
// the same LiftoffRegister value.
static constexpr int kAfterMaxLiftoffGpRegCode = kMaxGpRegCode + 1;
static constexpr int kAfterMaxLiftoffFpRegCode =
kAfterMaxLiftoffGpRegCode + kMaxFpRegCode + 1;
......@@ -67,11 +110,19 @@ static constexpr int kBitsPerLiftoffRegCode =
32 - base::bits::CountLeadingZeros<uint32_t>(kAfterMaxLiftoffRegCode - 1);
static constexpr int kBitsPerGpRegCode =
32 - base::bits::CountLeadingZeros<uint32_t>(kMaxGpRegCode);
static constexpr int kBitsPerGpRegPair = 1 + 2 * kBitsPerGpRegCode;
static constexpr int kBitsPerFpRegCode =
32 - base::bits::CountLeadingZeros<uint32_t>(kMaxFpRegCode);
// GpRegPair requires 1 extra bit, S128RegPair also needs an extra bit.
static constexpr int kBitsPerRegPair =
(kNeedS128RegPair ? 2 : 1) + 2 * kBitsPerGpRegCode;
static_assert(2 * kBitsPerGpRegCode >= kBitsPerFpRegCode,
"encoding for gp pair and fp pair collides");
class LiftoffRegister {
static constexpr int needed_bits =
Max(kNeedI64RegPair ? kBitsPerGpRegPair : 0, kBitsPerLiftoffRegCode);
Max(kNeedI64RegPair || kNeedS128RegPair ? kBitsPerRegPair : 0,
kBitsPerLiftoffRegCode);
using storage_t = std::conditional<
needed_bits <= 8, uint8_t,
std::conditional<needed_bits <= 16, uint16_t, uint32_t>::type>::type;
......@@ -121,31 +172,67 @@ class LiftoffRegister {
return LiftoffRegister(combined_code);
}
static LiftoffRegister ForFpPair(DoubleRegister low) {
DCHECK(kNeedS128RegPair);
DCHECK_EQ(0, low.code() % 2);
storage_t combined_code = low.code() | 2 << (2 * kBitsPerGpRegCode);
return LiftoffRegister(combined_code);
}
constexpr bool is_pair() const {
return (kNeedI64RegPair || kNeedS128RegPair) &&
(code_ & (3 << (2 * kBitsPerGpRegCode)));
}
constexpr bool is_gp_pair() const {
return kNeedI64RegPair && (code_ & (1 << (2 * kBitsPerGpRegCode))) != 0;
}
constexpr bool is_fp_pair() const {
return kNeedS128RegPair && (code_ & (2 << (2 * kBitsPerGpRegCode))) != 0;
}
constexpr bool is_gp() const { return code_ < kAfterMaxLiftoffGpRegCode; }
constexpr bool is_fp() const {
return code_ >= kAfterMaxLiftoffGpRegCode &&
code_ < kAfterMaxLiftoffFpRegCode;
}
LiftoffRegister low() const { return LiftoffRegister(low_gp()); }
LiftoffRegister low() const {
// Common case for most archs where only gp pair supported.
if (!kNeedS128RegPair) return LiftoffRegister(low_gp());
return is_gp_pair() ? LiftoffRegister(low_gp()) : LiftoffRegister(low_fp());
}
LiftoffRegister high() const { return LiftoffRegister(high_gp()); }
LiftoffRegister high() const {
// Common case for most archs where only gp pair supported.
if (!kNeedS128RegPair) return LiftoffRegister(high_gp());
return is_gp_pair() ? LiftoffRegister(high_gp())
: LiftoffRegister(high_fp());
}
Register low_gp() const {
DCHECK(is_pair());
DCHECK(is_gp_pair());
static constexpr storage_t kCodeMask = (1 << kBitsPerGpRegCode) - 1;
return Register::from_code(code_ & kCodeMask);
}
Register high_gp() const {
DCHECK(is_pair());
DCHECK(is_gp_pair());
static constexpr storage_t kCodeMask = (1 << kBitsPerGpRegCode) - 1;
return Register::from_code((code_ >> kBitsPerGpRegCode) & kCodeMask);
}
DoubleRegister low_fp() const {
DCHECK(is_fp_pair());
static constexpr storage_t kCodeMask = (1 << kBitsPerFpRegCode) - 1;
return DoubleRegister::from_code(code_ & kCodeMask);
}
DoubleRegister high_fp() const {
DCHECK(is_fp_pair());
static constexpr storage_t kCodeMask = (1 << kBitsPerFpRegCode) - 1;
return DoubleRegister::from_code((code_ & kCodeMask) + 1);
}
Register gp() const {
DCHECK(is_gp());
return Register::from_code(code_);
......@@ -162,15 +249,18 @@ class LiftoffRegister {
}
RegClass reg_class() const {
return is_pair() ? kGpRegPair : is_gp() ? kGpReg : kFpReg;
return is_fp_pair() ? kFpRegPair
: is_gp_pair() ? kGpRegPair : is_gp() ? kGpReg : kFpReg;
}
bool operator==(const LiftoffRegister other) const {
DCHECK_EQ(is_pair(), other.is_pair());
DCHECK_EQ(is_gp_pair(), other.is_gp_pair());
DCHECK_EQ(is_fp_pair(), other.is_fp_pair());
return code_ == other.code_;
}
bool operator!=(const LiftoffRegister other) const {
DCHECK_EQ(is_pair(), other.is_pair());
DCHECK_EQ(is_gp_pair(), other.is_gp_pair());
DCHECK_EQ(is_fp_pair(), other.is_fp_pair());
return code_ != other.code_;
}
bool overlaps(const LiftoffRegister other) const {
......@@ -187,8 +277,10 @@ class LiftoffRegister {
ASSERT_TRIVIALLY_COPYABLE(LiftoffRegister);
inline std::ostream& operator<<(std::ostream& os, LiftoffRegister reg) {
if (reg.is_pair()) {
if (reg.is_gp_pair()) {
return os << "<" << reg.low_gp() << "+" << reg.high_gp() << ">";
} else if (reg.is_fp_pair()) {
return os << "<" << reg.low_fp() << "+" << reg.high_fp() << ">";
} else if (reg.is_gp()) {
return os << reg.gp();
} else {
......
......@@ -807,7 +807,7 @@ void LiftoffAssembler::emit_i64_sub(LiftoffRegister dst, LiftoffRegister lhs,
namespace liftoff {
inline bool IsRegInRegPair(LiftoffRegister pair, Register reg) {
DCHECK(pair.is_pair());
DCHECK(pair.is_gp_pair());
return pair.low_gp() == reg || pair.high_gp() == reg;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment