Commit b0eb59fb authored by Santiago Aboy Solanes's avatar Santiago Aboy Solanes Committed by V8 LUCI CQ

[codegen] Save the full 128bit FP register for Push/PopCallerSaved

Note that Arm32 is already saving the full register in
https://source.chromium.org/chromium/chromium/src/+/main:v8/src/codegen/arm/macro-assembler-arm.cc;l=2250;drc=ec4fd32cf7f945923fa6bb332c061ecbdaaaa405

Change-Id: I1f5fe60ca350583fb4cb877ccad74f5e260c3665
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2912778
Commit-Queue: Santiago Aboy Solanes <solanes@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#74778}
parent 1179c6b0
...@@ -45,17 +45,30 @@ CPURegList TurboAssembler::DefaultFPTmpList() { ...@@ -45,17 +45,30 @@ CPURegList TurboAssembler::DefaultFPTmpList() {
return CPURegList(fp_scratch1, fp_scratch2); return CPURegList(fp_scratch1, fp_scratch2);
} }
namespace {
// For WebAssembly we care about the full floating point register. If we are not
// running Wasm, we can get away with saving half of those registers.
#if V8_ENABLE_WEBASSEMBLY
constexpr int kStackSavedSavedFPSizeInBits = kQRegSizeInBits;
#else
constexpr int kStackSavedSavedFPSizeInBits = kDRegSizeInBits;
#endif // V8_ENABLE_WEBASSEMBLY
} // namespace
int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
Register exclusion) const { Register exclusion) const {
auto list = kCallerSaved; auto list = kCallerSaved;
list.Remove(exclusion); list.Remove(exclusion);
list.Align(); list.Align();
int bytes = list.Count() * kXRegSizeInBits / 8; int bytes = list.TotalSizeInBytes();
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
DCHECK_EQ(kCallerSavedV.Count() % 2, 0); auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
bytes += kCallerSavedV.Count() * kDRegSizeInBits / 8; DCHECK_EQ(fp_list.Count() % 2, 0);
bytes += fp_list.TotalSizeInBytes();
} }
return bytes; return bytes;
} }
...@@ -68,12 +81,13 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, ...@@ -68,12 +81,13 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode,
PushCPURegList<kDontStoreLR>(list); PushCPURegList<kDontStoreLR>(list);
int bytes = list.Count() * kXRegSizeInBits / 8; int bytes = list.TotalSizeInBytes();
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
DCHECK_EQ(kCallerSavedV.Count() % 2, 0); auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
PushCPURegList(kCallerSavedV); DCHECK_EQ(fp_list.Count() % 2, 0);
bytes += kCallerSavedV.Count() * kDRegSizeInBits / 8; PushCPURegList(fp_list);
bytes += fp_list.TotalSizeInBytes();
} }
return bytes; return bytes;
} }
...@@ -81,9 +95,10 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, ...@@ -81,9 +95,10 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode,
int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion) { int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion) {
int bytes = 0; int bytes = 0;
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
DCHECK_EQ(kCallerSavedV.Count() % 2, 0); auto fp_list = CPURegList::GetCallerSavedV(kStackSavedSavedFPSizeInBits);
PopCPURegList(kCallerSavedV); DCHECK_EQ(fp_list.Count() % 2, 0);
bytes += kCallerSavedV.Count() * kDRegSizeInBits / 8; PopCPURegList(fp_list);
bytes += fp_list.TotalSizeInBytes();
} }
auto list = kCallerSaved; auto list = kCallerSaved;
...@@ -91,7 +106,7 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion) { ...@@ -91,7 +106,7 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion) {
list.Align(); list.Align();
PopCPURegList<kDontLoadLR>(list); PopCPURegList<kDontLoadLR>(list);
bytes += list.Count() * kXRegSizeInBits / 8; bytes += list.TotalSizeInBytes();
return bytes; return bytes;
} }
......
...@@ -296,7 +296,7 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, ...@@ -296,7 +296,7 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
// Count all XMM registers except XMM0. // Count all XMM registers except XMM0.
bytes += kDoubleSize * (XMMRegister::kNumRegisters - 1); bytes += kStackSavedSavedFPSize * (XMMRegister::kNumRegisters - 1);
} }
return bytes; return bytes;
...@@ -318,11 +318,15 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, ...@@ -318,11 +318,15 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
// Save all XMM registers except XMM0. // Save all XMM registers except XMM0.
int delta = kDoubleSize * (XMMRegister::kNumRegisters - 1); const int delta = kStackSavedSavedFPSize * (XMMRegister::kNumRegisters - 1);
AllocateStackSpace(delta); AllocateStackSpace(delta);
for (int i = XMMRegister::kNumRegisters - 1; i > 0; i--) { for (int i = XMMRegister::kNumRegisters - 1; i > 0; i--) {
XMMRegister reg = XMMRegister::from_code(i); XMMRegister reg = XMMRegister::from_code(i);
movsd(Operand(esp, (i - 1) * kDoubleSize), reg); #if V8_ENABLE_WEBASSEMBLY
movdqu(Operand(esp, (i - 1) * kStackSavedSavedFPSize), reg);
#else
movsd(Operand(esp, (i - 1) * kStackSavedSavedFPSize), reg);
#endif // V8_ENABLE_WEBASSEMBLY
} }
bytes += delta; bytes += delta;
} }
...@@ -335,10 +339,14 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, ...@@ -335,10 +339,14 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
int bytes = 0; int bytes = 0;
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
// Restore all XMM registers except XMM0. // Restore all XMM registers except XMM0.
int delta = kDoubleSize * (XMMRegister::kNumRegisters - 1); const int delta = kStackSavedSavedFPSize * (XMMRegister::kNumRegisters - 1);
for (int i = XMMRegister::kNumRegisters - 1; i > 0; i--) { for (int i = XMMRegister::kNumRegisters - 1; i > 0; i--) {
XMMRegister reg = XMMRegister::from_code(i); XMMRegister reg = XMMRegister::from_code(i);
movsd(reg, Operand(esp, (i - 1) * kDoubleSize)); #if V8_ENABLE_WEBASSEMBLY
movdqu(reg, Operand(esp, (i - 1) * kStackSavedSavedFPSize));
#else
movsd(reg, Operand(esp, (i - 1) * kStackSavedSavedFPSize));
#endif // V8_ENABLE_WEBASSEMBLY
} }
add(esp, Immediate(delta)); add(esp, Immediate(delta));
bytes += delta; bytes += delta;
......
...@@ -21,6 +21,14 @@ namespace v8 { ...@@ -21,6 +21,14 @@ namespace v8 {
namespace internal { namespace internal {
class Assembler; class Assembler;
// For WebAssembly we care about the full floating point register. If we are not
// running Wasm, we can get away with saving half of those registers.
#if V8_ENABLE_WEBASSEMBLY
constexpr int kStackSavedSavedFPSize = 2 * kDoubleSize;
#else
constexpr int kStackSavedSavedFPSize = kDoubleSize;
#endif // V8_ENABLE_WEBASSEMBLY
class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase { class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
public: public:
using TurboAssemblerBase::TurboAssemblerBase; using TurboAssemblerBase::TurboAssemblerBase;
......
...@@ -690,7 +690,7 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, ...@@ -690,7 +690,7 @@ int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
// R12 to r15 are callee save on all platforms. // R12 to r15 are callee save on all platforms.
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
bytes += kDoubleSize * XMMRegister::kNumRegisters; bytes += kStackSavedSavedFPSize * XMMRegister::kNumRegisters;
} }
return bytes; return bytes;
...@@ -712,11 +712,15 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, ...@@ -712,11 +712,15 @@ int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
// R12 to r15 are callee save on all platforms. // R12 to r15 are callee save on all platforms.
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
int delta = kDoubleSize * XMMRegister::kNumRegisters; const int delta = kStackSavedSavedFPSize * XMMRegister::kNumRegisters;
AllocateStackSpace(delta); AllocateStackSpace(delta);
for (int i = 0; i < XMMRegister::kNumRegisters; i++) { for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
XMMRegister reg = XMMRegister::from_code(i); XMMRegister reg = XMMRegister::from_code(i);
Movsd(Operand(rsp, i * kDoubleSize), reg); #if V8_ENABLE_WEBASSEMBLY
Movdqu(Operand(rsp, i * kStackSavedSavedFPSize), reg);
#else
Movsd(Operand(rsp, i * kStackSavedSavedFPSize), reg);
#endif // V8_ENABLE_WEBASSEMBLY
} }
bytes += delta; bytes += delta;
} }
...@@ -730,10 +734,14 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1, ...@@ -730,10 +734,14 @@ int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register exclusion1,
if (fp_mode == SaveFPRegsMode::kSave) { if (fp_mode == SaveFPRegsMode::kSave) {
for (int i = 0; i < XMMRegister::kNumRegisters; i++) { for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
XMMRegister reg = XMMRegister::from_code(i); XMMRegister reg = XMMRegister::from_code(i);
Movsd(reg, Operand(rsp, i * kDoubleSize)); #if V8_ENABLE_WEBASSEMBLY
Movdqu(reg, Operand(rsp, i * kStackSavedSavedFPSize));
#else
Movsd(reg, Operand(rsp, i * kStackSavedSavedFPSize));
#endif // V8_ENABLE_WEBASSEMBLY
} }
int delta = kDoubleSize * XMMRegister::kNumRegisters; const int delta = kStackSavedSavedFPSize * XMMRegister::kNumRegisters;
addq(rsp, Immediate(kDoubleSize * XMMRegister::kNumRegisters)); addq(rsp, Immediate(delta));
bytes += delta; bytes += delta;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment