Commit b0eb59fb authored by Santiago Aboy Solanes's avatar Santiago Aboy Solanes Committed by V8 LUCI CQ

[codegen] Save the full 128bit FP register for Push/PopCallerSaved

Note that Arm32 is already saving the full register in
https://source.chromium.org/chromium/chromium/src/+/main:v8/src/codegen/arm/macro-assembler-arm.cc;l=2250;drc=ec4fd32cf7f945923fa6bb332c061ecbdaaaa405

Change-Id: I1f5fe60ca350583fb4cb877ccad74f5e260c3665
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2912778
Commit-Queue: Santiago Aboy Solanes <solanes@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74778}
parent 1179c6b0
......@@ -45,17 +45,30 @@ CPURegList TurboAssembler::DefaultFPTmpList() {
return CPURegList(fp_scratch1, fp_scratch2);
}
namespace {
// For WebAssembly we care about the full floating point register. If we are not
// running Wasm, we can get away with saving half of those registers.
#if V8_ENABLE_WEBASSEMBLY
constexpr int kStackSavedSavedFPSizeInBits = kQRegSizeInBits;
#else
constexpr int kStackSavedSavedFPSizeInBits = kDRegSizeInBits;
#endif // V8_ENABLE_WEBASSEMBLY
} // namespace
int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
Register exclusion) const {
auto list = kCallerSaved;
list.Remove(exclusion);
list.Align();
int bytes = list.Count() * kXRegSizeInBits / 8;
int bytes = list.TotalSizeInBytes();
if (fp_mode == SaveFPRegsMode::kSave) {
DCHECK_EQ(kCallerSavedV.Count() % 2, 0);
bytes += kCallerSavedV.Count() * kDRegSizeInBits / 8;
auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
DCHECK_EQ(fp_list.Count() % 2, 0);
bytes += fp_list.TotalSizeInBytes();
}
return bytes;
}
......@@ -68,12 +81,13 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode,
PushCPURegList<kDontStoreLR>(list);
int bytes = list.Count() * kXRegSizeInBits / 8;
int bytes = list.TotalSizeInBytes();
if (fp_mode == SaveFPRegsMode::kSave) {
DCHECK_EQ(kCallerSavedV.Count() % 2, 0);
PushCPURegList(kCallerSavedV);
bytes += kCallerSavedV.Count() * kDRegSizeInBits / 8;
auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
DCHECK_EQ(fp_list.Count() % 2, 0);
PushCPURegList(fp_list);
bytes += fp_list.TotalSizeInBytes();
}
return bytes;
}
......@@ -81,9 +95,10 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode,
int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion) {
int bytes = 0;
if (fp_mode == SaveFPRegsMode::kSave) {
DCHECK_EQ(kCallerSavedV.Count() % 2, 0);
PopCPURegList(kCallerSavedV);
bytes += kCallerSavedV.Count() * kDRegSizeInBits / 8;
auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
DCHECK_EQ(fp_list.Count() % 2, 0);
PopCPURegList(fp_list);
bytes += fp_list.TotalSizeInBytes();
}
auto list = kCallerSaved;
......@@ -91,7 +106,7 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion) {
list.Align();
PopCPURegList<kDontLoadLR>(list);
bytes += list.Count() * kXRegSizeInBits / 8;
bytes += list.TotalSizeInBytes();
return bytes;
}
......
......@@ -296,7 +296,7 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
if (fp_mode == SaveFPRegsMode::kSave) {
// Count all XMM registers except XMM0.
bytes += kDoubleSize * (XMMRegister::kNumRegisters - 1);
bytes += kStackSavedSavedFPSize * (XMMRegister::kNumRegisters - 1);
}
return bytes;
......@@ -318,11 +318,15 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
if (fp_mode == SaveFPRegsMode::kSave) {
// Save all XMM registers except XMM0.
int delta = kDoubleSize * (XMMRegister::kNumRegisters - 1);
const int delta = kStackSavedSavedFPSize * (XMMRegister::kNumRegisters - 1);
AllocateStackSpace(delta);
for (int i = XMMRegister::kNumRegisters - 1; i > 0; i--) {
XMMRegister reg = XMMRegister::from_code(i);
movsd(Operand(esp, (i - 1) * kDoubleSize), reg);
#if V8_ENABLE_WEBASSEMBLY
movdqu(Operand(esp, (i - 1) * kStackSavedSavedFPSize), reg);
#else
movsd(Operand(esp, (i - 1) * kStackSavedSavedFPSize), reg);
#endif // V8_ENABLE_WEBASSEMBLY
}
bytes += delta;
}
......@@ -335,10 +339,14 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
int bytes = 0;
if (fp_mode == SaveFPRegsMode::kSave) {
// Restore all XMM registers except XMM0.
int delta = kDoubleSize * (XMMRegister::kNumRegisters - 1);
const int delta = kStackSavedSavedFPSize * (XMMRegister::kNumRegisters - 1);
for (int i = XMMRegister::kNumRegisters - 1; i > 0; i--) {
XMMRegister reg = XMMRegister::from_code(i);
movsd(reg, Operand(esp, (i - 1) * kDoubleSize));
#if V8_ENABLE_WEBASSEMBLY
movdqu(reg, Operand(esp, (i - 1) * kStackSavedSavedFPSize));
#else
movsd(reg, Operand(esp, (i - 1) * kStackSavedSavedFPSize));
#endif // V8_ENABLE_WEBASSEMBLY
}
add(esp, Immediate(delta));
bytes += delta;
......
......@@ -21,6 +21,14 @@ namespace v8 {
namespace internal {
class Assembler;
// For WebAssembly we care about the full floating point register. If we are not
// running Wasm, we can get away with saving half of those registers.
#if V8_ENABLE_WEBASSEMBLY
constexpr int kStackSavedSavedFPSize = 2 * kDoubleSize;
#else
constexpr int kStackSavedSavedFPSize = kDoubleSize;
#endif // V8_ENABLE_WEBASSEMBLY
class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
public:
using TurboAssemblerBase::TurboAssemblerBase;
......
......@@ -690,7 +690,7 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
// R12 to r15 are callee save on all platforms.
if (fp_mode == SaveFPRegsMode::kSave) {
bytes += kDoubleSize * XMMRegister::kNumRegisters;
bytes += kStackSavedSavedFPSize * XMMRegister::kNumRegisters;
}
return bytes;
......@@ -712,11 +712,15 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
// R12 to r15 are callee save on all platforms.
if (fp_mode == SaveFPRegsMode::kSave) {
int delta = kDoubleSize * XMMRegister::kNumRegisters;
const int delta = kStackSavedSavedFPSize * XMMRegister::kNumRegisters;
AllocateStackSpace(delta);
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
XMMRegister reg = XMMRegister::from_code(i);
Movsd(Operand(rsp, i * kDoubleSize), reg);
#if V8_ENABLE_WEBASSEMBLY
Movdqu(Operand(rsp, i * kStackSavedSavedFPSize), reg);
#else
Movsd(Operand(rsp, i * kStackSavedSavedFPSize), reg);
#endif // V8_ENABLE_WEBASSEMBLY
}
bytes += delta;
}
......@@ -730,10 +734,14 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
if (fp_mode == SaveFPRegsMode::kSave) {
for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
XMMRegister reg = XMMRegister::from_code(i);
Movsd(reg, Operand(rsp, i * kDoubleSize));
#if V8_ENABLE_WEBASSEMBLY
Movdqu(reg, Operand(rsp, i * kStackSavedSavedFPSize));
#else
Movsd(reg, Operand(rsp, i * kStackSavedSavedFPSize));
#endif // V8_ENABLE_WEBASSEMBLY
}
int delta = kDoubleSize * XMMRegister::kNumRegisters;
addq(rsp, Immediate(kDoubleSize * XMMRegister::kNumRegisters));
const int delta = kStackSavedSavedFPSize * XMMRegister::kNumRegisters;
addq(rsp, Immediate(delta));
bytes += delta;
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment