Commit 42e8c231 authored by Clemens Backes's avatar Clemens Backes Committed by Commit Bot

[Liftoff] Implement i64 shift with immediate

Especially on ia32 and x64, shifts with immediate generate much shorter
and more efficient code.

R=jkummerow@chromium.org

Bug: v8:9919
Change-Id: I33acf287d5eb6fc5c4d39a295f410557348a4f19
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1899770Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Clemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64780}
parent 5ca7840b
...@@ -217,6 +217,16 @@ inline void EmitFloatMinOrMax(LiftoffAssembler* assm, RegisterType dst, ...@@ -217,6 +217,16 @@ inline void EmitFloatMinOrMax(LiftoffAssembler* assm, RegisterType dst,
assm->bind(&done); assm->bind(&done);
} }
inline Register EnsureNoAlias(Assembler* assm, Register reg,
Register must_not_alias,
UseScratchRegisterScope* temps) {
if (reg != must_not_alias) return reg;
Register tmp = temps->Acquire();
DCHECK_NE(reg, tmp);
assm->mov(tmp, reg);
return tmp;
}
} // namespace liftoff } // namespace liftoff
int LiftoffAssembler::PrepareStackFrame() { int LiftoffAssembler::PrepareStackFrame() {
...@@ -944,28 +954,44 @@ void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src, ...@@ -944,28 +954,44 @@ void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
liftoff::I64Shiftop<&TurboAssembler::LslPair, true>(this, dst, src, amount); liftoff::I64Shiftop<&TurboAssembler::LslPair, true>(this, dst, src, amount);
} }
void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
int32_t amount) {
UseScratchRegisterScope temps(this);
// {src.low_gp()} will still be needed after writing {dst.high_gp()}.
Register src_low =
liftoff::EnsureNoAlias(this, src.low_gp(), dst.high_gp(), &temps);
LslPair(dst.low_gp(), dst.high_gp(), src_low, src.high_gp(), amount & 63);
}
void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
Register amount) { Register amount) {
liftoff::I64Shiftop<&TurboAssembler::AsrPair, false>(this, dst, src, amount); liftoff::I64Shiftop<&TurboAssembler::AsrPair, false>(this, dst, src, amount);
} }
void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
int32_t amount) {
UseScratchRegisterScope temps(this);
// {src.high_gp()} will still be needed after writing {dst.low_gp()}.
Register src_high =
liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps);
AsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63);
}
void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
Register amount) { Register amount) {
liftoff::I64Shiftop<&TurboAssembler::LsrPair, false>(this, dst, src, amount); liftoff::I64Shiftop<&TurboAssembler::LsrPair, false>(this, dst, src, amount);
} }
void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
int amount) { int32_t amount) {
DCHECK(is_uint6(amount));
UseScratchRegisterScope temps(this); UseScratchRegisterScope temps(this);
Register src_high = src.high_gp();
// {src.high_gp()} will still be needed after writing {dst.low_gp()}. // {src.high_gp()} will still be needed after writing {dst.low_gp()}.
if (src_high == dst.low_gp()) { Register src_high =
src_high = GetUnusedRegister(kGpReg).gp(); liftoff::EnsureNoAlias(this, src.high_gp(), dst.low_gp(), &temps);
TurboAssembler::Move(src_high, dst.low_gp());
}
LsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount); LsrPair(dst.low_gp(), dst.high_gp(), src.low_gp(), src_high, amount & 63);
} }
void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) { void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
......
...@@ -495,13 +495,10 @@ void LiftoffAssembler::FillStackSlotsWithZero(uint32_t index, uint32_t count) { ...@@ -495,13 +495,10 @@ void LiftoffAssembler::FillStackSlotsWithZero(uint32_t index, uint32_t count) {
void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \ void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \
Register amount) { \ Register amount) { \
instruction(dst.gp().X(), src.gp().X(), amount.X()); \ instruction(dst.gp().X(), src.gp().X(), amount.X()); \
} } \
#define I64_SHIFTOP_I(name, instruction) \
I64_SHIFTOP(name, instruction) \
void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \ void LiftoffAssembler::emit_##name(LiftoffRegister dst, LiftoffRegister src, \
int amount) { \ int32_t amount) { \
DCHECK(is_uint6(amount)); \ instruction(dst.gp().X(), src.gp().X(), amount & 63); \
instruction(dst.gp().X(), src.gp().X(), amount); \
} }
I32_BINOP_I(i32_add, Add) I32_BINOP_I(i32_add, Add)
...@@ -521,7 +518,7 @@ I64_BINOP_I(i64_or, Orr) ...@@ -521,7 +518,7 @@ I64_BINOP_I(i64_or, Orr)
I64_BINOP_I(i64_xor, Eor) I64_BINOP_I(i64_xor, Eor)
I64_SHIFTOP(i64_shl, Lsl) I64_SHIFTOP(i64_shl, Lsl)
I64_SHIFTOP(i64_sar, Asr) I64_SHIFTOP(i64_sar, Asr)
I64_SHIFTOP_I(i64_shr, Lsr) I64_SHIFTOP(i64_shr, Lsr)
FP32_BINOP(f32_add, Fadd) FP32_BINOP(f32_add, Fadd)
FP32_BINOP(f32_sub, Fsub) FP32_BINOP(f32_sub, Fsub)
FP32_BINOP(f32_mul, Fmul) FP32_BINOP(f32_mul, Fmul)
...@@ -558,7 +555,6 @@ FP64_UNOP(f64_sqrt, Fsqrt) ...@@ -558,7 +555,6 @@ FP64_UNOP(f64_sqrt, Fsqrt)
#undef FP64_UNOP_RETURN_TRUE #undef FP64_UNOP_RETURN_TRUE
#undef I32_SHIFTOP #undef I32_SHIFTOP
#undef I64_SHIFTOP #undef I64_SHIFTOP
#undef I64_SHIFTOP_I
void LiftoffAssembler::emit_i32_clz(Register dst, Register src) { void LiftoffAssembler::emit_i32_clz(Register dst, Register src) {
Clz(dst.W(), src.W()); Clz(dst.W(), src.W());
......
...@@ -1001,12 +1001,38 @@ void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src, ...@@ -1001,12 +1001,38 @@ void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
&TurboAssembler::ShlPair_cl); &TurboAssembler::ShlPair_cl);
} }
void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
int32_t amount) {
amount &= 63;
if (amount >= 32) {
if (dst.high_gp() != src.low_gp()) mov(dst.high_gp(), src.low_gp());
if (amount != 32) shl(dst.high_gp(), amount - 32);
xor_(dst.low_gp(), dst.low_gp());
} else {
if (dst != src) Move(dst, src, kWasmI64);
ShlPair(dst.high_gp(), dst.low_gp(), amount);
}
}
void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
Register amount) { Register amount) {
liftoff::Emit64BitShiftOperation(this, dst, src, amount, liftoff::Emit64BitShiftOperation(this, dst, src, amount,
&TurboAssembler::SarPair_cl); &TurboAssembler::SarPair_cl);
} }
void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
int32_t amount) {
amount &= 63;
if (amount >= 32) {
if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
if (dst.high_gp() != src.high_gp()) mov(dst.high_gp(), src.high_gp());
if (amount != 32) sar(dst.low_gp(), amount - 32);
sar(dst.high_gp(), 31);
} else {
if (dst != src) Move(dst, src, kWasmI64);
SarPair(dst.high_gp(), dst.low_gp(), amount);
}
}
void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
Register amount) { Register amount) {
liftoff::Emit64BitShiftOperation(this, dst, src, amount, liftoff::Emit64BitShiftOperation(this, dst, src, amount,
...@@ -1014,10 +1040,16 @@ void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, ...@@ -1014,10 +1040,16 @@ void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
} }
void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
int amount) { int32_t amount) {
if (dst != src) Move(dst, src, kWasmI64); amount &= 63;
DCHECK(is_uint6(amount)); if (amount >= 32) {
ShrPair(dst.high_gp(), dst.low_gp(), amount); if (dst.low_gp() != src.high_gp()) mov(dst.low_gp(), src.high_gp());
if (amount != 32) shr(dst.low_gp(), amount - 32);
xor_(dst.high_gp(), dst.high_gp());
} else {
if (dst != src) Move(dst, src, kWasmI64);
ShrPair(dst.high_gp(), dst.low_gp(), amount);
}
} }
void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) { void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
......
...@@ -452,12 +452,16 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -452,12 +452,16 @@ class LiftoffAssembler : public TurboAssembler {
int32_t imm); int32_t imm);
inline void emit_i64_shl(LiftoffRegister dst, LiftoffRegister src, inline void emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
Register amount); Register amount);
inline void emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
int32_t amount);
inline void emit_i64_sar(LiftoffRegister dst, LiftoffRegister src, inline void emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
Register amount); Register amount);
inline void emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
int32_t amount);
inline void emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, inline void emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
Register amount); Register amount);
inline void emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, inline void emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
int amount); int32_t amount);
// i64 unops. // i64 unops.
inline void emit_i64_clz(LiftoffRegister dst, LiftoffRegister src); inline void emit_i64_clz(LiftoffRegister dst, LiftoffRegister src);
......
...@@ -881,7 +881,7 @@ class LiftoffCompiler { ...@@ -881,7 +881,7 @@ class LiftoffCompiler {
LiftoffAssembler::VarState rhs_slot = __ cache_state()->stack_state.back(); LiftoffAssembler::VarState rhs_slot = __ cache_state()->stack_state.back();
// Check if the RHS is an immediate. // Check if the RHS is an immediate.
if (rhs_slot.loc() == LiftoffAssembler::VarState::kIntConst) { if (rhs_slot.is_const()) {
__ cache_state()->stack_state.pop_back(); __ cache_state()->stack_state.pop_back();
int32_t imm = rhs_slot.i32_const(); int32_t imm = rhs_slot.i32_const();
...@@ -1010,14 +1010,17 @@ class LiftoffCompiler { ...@@ -1010,14 +1010,17 @@ class LiftoffCompiler {
[=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { \ [=](LiftoffRegister dst, LiftoffRegister lhs, LiftoffRegister rhs) { \
__ emit_f64_set_cond(cond, dst.gp(), lhs.fp(), rhs.fp()); \ __ emit_f64_set_cond(cond, dst.gp(), lhs.fp(), rhs.fp()); \
}); });
#define CASE_I64_SHIFTOP(opcode, fn) \ #define CASE_I64_SHIFTOP(opcode, fn) \
case kExpr##opcode: \ case kExpr##opcode: \
return EmitBinOp<kWasmI64, kWasmI64>([=](LiftoffRegister dst, \ return EmitBinOpImm<kWasmI64, kWasmI64>( \
LiftoffRegister src, \ [=](LiftoffRegister dst, LiftoffRegister src, \
LiftoffRegister amount) { \ LiftoffRegister amount) { \
__ emit_##fn(dst, src, \ __ emit_##fn(dst, src, \
amount.is_pair() ? amount.low_gp() : amount.gp()); \ amount.is_pair() ? amount.low_gp() : amount.gp()); \
}); }, \
[=](LiftoffRegister dst, LiftoffRegister src, int32_t amount) { \
__ emit_##fn(dst, src, amount); \
});
#define CASE_CCALL_BINOP(opcode, type, ext_ref_fn) \ #define CASE_CCALL_BINOP(opcode, type, ext_ref_fn) \
case kExpr##opcode: \ case kExpr##opcode: \
return EmitBinOp<kWasmI32, kWasmI32>( \ return EmitBinOp<kWasmI32, kWasmI32>( \
......
...@@ -876,12 +876,24 @@ void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src, ...@@ -876,12 +876,24 @@ void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
&Assembler::shlq_cl); &Assembler::shlq_cl);
} }
void LiftoffAssembler::emit_i64_shl(LiftoffRegister dst, LiftoffRegister src,
int32_t amount) {
if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
shlq(dst.gp(), Immediate(amount & 63));
}
void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
Register amount) { Register amount) {
liftoff::EmitShiftOperation<kWasmI64>(this, dst.gp(), src.gp(), amount, liftoff::EmitShiftOperation<kWasmI64>(this, dst.gp(), src.gp(), amount,
&Assembler::sarq_cl); &Assembler::sarq_cl);
} }
void LiftoffAssembler::emit_i64_sar(LiftoffRegister dst, LiftoffRegister src,
int32_t amount) {
if (dst.gp() != src.gp()) movq(dst.gp(), src.gp());
sarq(dst.gp(), Immediate(amount & 63));
}
void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
Register amount) { Register amount) {
liftoff::EmitShiftOperation<kWasmI64>(this, dst.gp(), src.gp(), amount, liftoff::EmitShiftOperation<kWasmI64>(this, dst.gp(), src.gp(), amount,
...@@ -889,10 +901,9 @@ void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, ...@@ -889,10 +901,9 @@ void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
} }
void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src, void LiftoffAssembler::emit_i64_shr(LiftoffRegister dst, LiftoffRegister src,
int amount) { int32_t amount) {
if (dst.gp() != src.gp()) movl(dst.gp(), src.gp()); if (dst != src) movq(dst.gp(), src.gp());
DCHECK(is_uint6(amount)); shrq(dst.gp(), Immediate(amount & 63));
shrq(dst.gp(), Immediate(amount));
} }
void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) { void LiftoffAssembler::emit_i64_clz(LiftoffRegister dst, LiftoffRegister src) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment