Commit 4dc894b9 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

S390[simd]: avoid using r0 as scratch during vector load/store

Passing `ip/r1` as scratch to LoadV128 anf StoreV128.

Change-Id: Ie86d3bd241065de985f98025e7bb60aba4cd42d3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3576132Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#79864}
parent 72a11f20
......@@ -2910,7 +2910,7 @@ void Builtins::Generate_WasmCompileLazy(MacroAssembler* masm) {
fp_regs.Count());
__ MultiPush(gp_regs);
__ MultiPushF64OrV128(fp_regs);
__ MultiPushF64OrV128(fp_regs, ip);
// Pass instance and function index as explicit arguments to the runtime
// function.
......@@ -2923,7 +2923,7 @@ void Builtins::Generate_WasmCompileLazy(MacroAssembler* masm) {
__ mov(ip, r2);
// Restore registers.
__ MultiPopF64OrV128(fp_regs);
__ MultiPopF64OrV128(fp_regs, ip);
__ MultiPop(gp_regs);
}
// Finally, jump to the entrypoint.
......@@ -2938,7 +2938,7 @@ void Builtins::Generate_WasmDebugBreak(MacroAssembler* masm) {
// Save all parameter registers. They might hold live values, we restore
// them after the runtime call.
__ MultiPush(WasmDebugBreakFrameConstants::kPushedGpRegs);
__ MultiPushF64OrV128(WasmDebugBreakFrameConstants::kPushedFpRegs);
__ MultiPushF64OrV128(WasmDebugBreakFrameConstants::kPushedFpRegs, ip);
// Initialize the JavaScript context with 0. CEntry will use it to
// set the current context on the isolate.
......@@ -2946,7 +2946,7 @@ void Builtins::Generate_WasmDebugBreak(MacroAssembler* masm) {
__ CallRuntime(Runtime::kWasmDebugBreak, 0);
// Restore registers.
__ MultiPopF64OrV128(WasmDebugBreakFrameConstants::kPushedFpRegs);
__ MultiPopF64OrV128(WasmDebugBreakFrameConstants::kPushedFpRegs, ip);
__ MultiPop(WasmDebugBreakFrameConstants::kPushedGpRegs);
}
__ Ret();
......
......@@ -296,8 +296,9 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
return bytes;
}
int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
Register exclusion2, Register exclusion3) {
int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register scratch,
Register exclusion1, Register exclusion2,
Register exclusion3) {
int bytes = 0;
RegList exclusions = {exclusion1, exclusion2, exclusion3};
......@@ -306,18 +307,19 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
bytes += list.Count() * kSystemPointerSize;
if (fp_mode == SaveFPRegsMode::kSave) {
MultiPushF64OrV128(kCallerSavedDoubles);
MultiPushF64OrV128(kCallerSavedDoubles, scratch);
bytes += kStackSavedSavedFPSizeInBytes;
}
return bytes;
}
int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
Register exclusion2, Register exclusion3) {
int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register scratch,
Register exclusion1, Register exclusion2,
Register exclusion3) {
int bytes = 0;
if (fp_mode == SaveFPRegsMode::kSave) {
MultiPopF64OrV128(kCallerSavedDoubles);
MultiPopF64OrV128(kCallerSavedDoubles, scratch);
bytes += kStackSavedSavedFPSizeInBytes;
}
......@@ -667,7 +669,8 @@ void TurboAssembler::MultiPushDoubles(DoubleRegList dregs, Register location) {
}
}
void TurboAssembler::MultiPushV128(DoubleRegList dregs, Register location) {
void TurboAssembler::MultiPushV128(DoubleRegList dregs, Register scratch,
Register location) {
int16_t num_to_push = dregs.Count();
int16_t stack_offset = num_to_push * kSimd128Size;
......@@ -676,7 +679,7 @@ void TurboAssembler::MultiPushV128(DoubleRegList dregs, Register location) {
if ((dregs.bits() & (1 << i)) != 0) {
Simd128Register dreg = Simd128Register::from_code(i);
stack_offset -= kSimd128Size;
StoreV128(dreg, MemOperand(location, stack_offset), r0);
StoreV128(dreg, MemOperand(location, stack_offset), scratch);
}
}
}
......@@ -694,20 +697,21 @@ void TurboAssembler::MultiPopDoubles(DoubleRegList dregs, Register location) {
AddS64(location, location, Operand(stack_offset));
}
void TurboAssembler::MultiPopV128(DoubleRegList dregs, Register location) {
void TurboAssembler::MultiPopV128(DoubleRegList dregs, Register scratch,
Register location) {
int16_t stack_offset = 0;
for (int16_t i = 0; i < Simd128Register::kNumRegisters; i++) {
if ((dregs.bits() & (1 << i)) != 0) {
Simd128Register dreg = Simd128Register::from_code(i);
LoadV128(dreg, MemOperand(location, stack_offset), r0);
LoadV128(dreg, MemOperand(location, stack_offset), scratch);
stack_offset += kSimd128Size;
}
}
AddS64(location, location, Operand(stack_offset));
}
void TurboAssembler::MultiPushF64OrV128(DoubleRegList dregs,
void TurboAssembler::MultiPushF64OrV128(DoubleRegList dregs, Register scratch,
Register location) {
#if V8_ENABLE_WEBASSEMBLY
bool generating_bultins =
......@@ -719,7 +723,7 @@ void TurboAssembler::MultiPushF64OrV128(DoubleRegList dregs,
LoadAndTestP(r1, r1); // If > 0 then simd is available.
ble(&push_doubles, Label::kNear);
// Save vector registers, don't save double registers anymore.
MultiPushV128(dregs);
MultiPushV128(dregs, scratch);
b(&simd_pushed);
bind(&push_doubles);
// Simd not supported, only save double registers.
......@@ -730,7 +734,7 @@ void TurboAssembler::MultiPushF64OrV128(DoubleRegList dregs,
bind(&simd_pushed);
} else {
if (CpuFeatures::SupportsWasmSimd128()) {
MultiPushV128(dregs);
MultiPushV128(dregs, scratch);
} else {
MultiPushDoubles(dregs);
lay(sp, MemOperand(sp, -(dregs.Count() * kDoubleSize)));
......@@ -741,7 +745,8 @@ void TurboAssembler::MultiPushF64OrV128(DoubleRegList dregs,
#endif
}
void TurboAssembler::MultiPopF64OrV128(DoubleRegList dregs, Register location) {
void TurboAssembler::MultiPopF64OrV128(DoubleRegList dregs, Register scratch,
Register location) {
#if V8_ENABLE_WEBASSEMBLY
bool generating_bultins =
isolate() && isolate()->IsGeneratingEmbeddedBuiltins();
......@@ -752,7 +757,7 @@ void TurboAssembler::MultiPopF64OrV128(DoubleRegList dregs, Register location) {
LoadAndTestP(r1, r1); // If > 0 then simd is available.
ble(&pop_doubles, Label::kNear);
// Pop vector registers, don't pop double registers anymore.
MultiPopV128(dregs);
MultiPopV128(dregs, scratch);
b(&simd_popped);
bind(&pop_doubles);
// Simd not supported, only pop double registers.
......@@ -761,7 +766,7 @@ void TurboAssembler::MultiPopF64OrV128(DoubleRegList dregs, Register location) {
bind(&simd_popped);
} else {
if (CpuFeatures::SupportsWasmSimd128()) {
MultiPopV128(dregs);
MultiPopV128(dregs, scratch);
} else {
lay(sp, MemOperand(sp, dregs.Count() * kDoubleSize));
MultiPopDoubles(dregs);
......@@ -4073,6 +4078,7 @@ void TurboAssembler::LoadF32(DoubleRegister dst, const MemOperand& mem) {
void TurboAssembler::LoadV128(Simd128Register dst, const MemOperand& mem,
Register scratch) {
DCHECK(scratch != r0);
if (is_uint12(mem.offset())) {
vl(dst, mem, Condition(0));
} else {
......@@ -4102,6 +4108,7 @@ void TurboAssembler::StoreF32(DoubleRegister src, const MemOperand& mem) {
void TurboAssembler::StoreV128(Simd128Register src, const MemOperand& mem,
Register scratch) {
DCHECK(scratch != r0);
if (is_uint12(mem.offset())) {
vst(src, mem, Condition(0));
} else {
......
......@@ -178,11 +178,15 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void MultiPushDoubles(DoubleRegList dregs, Register location = sp);
void MultiPopDoubles(DoubleRegList dregs, Register location = sp);
void MultiPushV128(DoubleRegList dregs, Register location = sp);
void MultiPopV128(DoubleRegList dregs, Register location = sp);
void MultiPushV128(DoubleRegList dregs, Register scratch,
Register location = sp);
void MultiPopV128(DoubleRegList dregs, Register scratch,
Register location = sp);
void MultiPushF64OrV128(DoubleRegList dregs, Register location = sp);
void MultiPopF64OrV128(DoubleRegList dregs, Register location = sp);
void MultiPushF64OrV128(DoubleRegList dregs, Register scratch,
Register location = sp);
void MultiPopF64OrV128(DoubleRegList dregs, Register scratch,
Register location = sp);
// Calculate how much stack space (in bytes) are required to store caller
// registers excluding those specified in the arguments.
......@@ -193,13 +197,14 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// Push caller saved registers on the stack, and return the number of bytes
// stack pointer is adjusted.
int PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1 = no_reg,
int PushCallerSaved(SaveFPRegsMode fp_mode, Register scratch,
Register exclusion1 = no_reg,
Register exclusion2 = no_reg,
Register exclusion3 = no_reg);
// Restore caller saved registers from the stack, and return the number of
// bytes stack pointer is adjusted.
int PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1 = no_reg,
Register exclusion2 = no_reg,
int PopCallerSaved(SaveFPRegsMode fp_mode, Register scratch,
Register exclusion1 = no_reg, Register exclusion2 = no_reg,
Register exclusion3 = no_reg);
// Load an object from the root table.
......
......@@ -1277,7 +1277,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
fp_mode_ == SaveFPRegsMode::kSave);
// kReturnRegister0 should have been saved before entering the stub.
int bytes = __ PushCallerSaved(fp_mode_, kReturnRegister0);
int bytes = __ PushCallerSaved(fp_mode_, ip, kReturnRegister0);
DCHECK(IsAligned(bytes, kSystemPointerSize));
DCHECK_EQ(0, frame_access_state()->sp_delta());
frame_access_state()->IncreaseSPDelta(bytes / kSystemPointerSize);
......@@ -1291,7 +1291,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK(fp_mode_ == SaveFPRegsMode::kIgnore ||
fp_mode_ == SaveFPRegsMode::kSave);
// Don't overwrite the returned value.
int bytes = __ PopCallerSaved(fp_mode_, kReturnRegister0);
int bytes = __ PopCallerSaved(fp_mode_, ip, kReturnRegister0);
frame_access_state()->IncreaseSPDelta(-(bytes / kSystemPointerSize));
DCHECK_EQ(0, frame_access_state()->sp_delta());
DCHECK(caller_registers_saved_);
......
......@@ -409,9 +409,9 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
break;
case LoadType::kS128Load:
if (is_load_mem) {
LoadV128LE(dst.fp(), src_op, r0, r1);
LoadV128LE(dst.fp(), src_op, r1, r0);
} else {
LoadV128(dst.fp(), src_op, r0);
LoadV128(dst.fp(), src_op, r1);
}
break;
default:
......@@ -478,7 +478,7 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
break;
case StoreType::kS128Store: {
if (is_store_mem) {
StoreV128LE(src.fp(), dst_op, r0, r1);
StoreV128LE(src.fp(), dst_op, r1, r0);
} else {
StoreV128(src.fp(), dst_op, r1);
}
......@@ -2912,11 +2912,11 @@ void LiftoffAssembler::AssertUnreachable(AbortReason reason) {
void LiftoffAssembler::PushRegisters(LiftoffRegList regs) {
MultiPush(regs.GetGpList());
MultiPushF64OrV128(regs.GetFpList());
MultiPushF64OrV128(regs.GetFpList(), ip);
}
void LiftoffAssembler::PopRegisters(LiftoffRegList regs) {
MultiPopF64OrV128(regs.GetFpList());
MultiPopF64OrV128(regs.GetFpList(), ip);
MultiPop(regs.GetGpList());
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment