Commit 59a629b5 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[arm] Correctly push and pop double registers on deopt entry

The number of available double registers depends on supported CPU
features on arm. Any code that applies to all double regs must be
extra-careful to correctly handle either 16 or 32 registers.

This was not the case for deopt entries, which were recently moved
from a runtime-generated code stub to a mksnapshot-time-generated
builtin.

This CL fixes the issue by inspecting the runtime value of cpu
features and acting on it.

Bug: v8:8661,chromium:1142158
Change-Id: I6f4d2e6ee6a80217b9110194b8e1edbe8670d8d0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2498686
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarRoss McIlroy <rmcilroy@chromium.org>
Cr-Commit-Position: refs/heads/master@{#70796}
parent 965916c7
...@@ -3151,27 +3151,18 @@ void Generate_DeoptimizationEntry(MacroAssembler* masm, ...@@ -3151,27 +3151,18 @@ void Generate_DeoptimizationEntry(MacroAssembler* masm,
DeoptimizeKind deopt_kind) { DeoptimizeKind deopt_kind) {
Isolate* isolate = masm->isolate(); Isolate* isolate = masm->isolate();
// Note: This is an overapproximation; we always reserve space for 32 double
// registers, even though the actual CPU may only support 16. In the latter
// case, SaveFPRegs and RestoreFPRegs still use 32 stack slots, but only fill
// 16.
static constexpr int kDoubleRegsSize = static constexpr int kDoubleRegsSize =
kDoubleSize * DwVfpRegister::kNumRegisters; kDoubleSize * DwVfpRegister::kNumRegisters;
// Save all allocatable VFP registers before messing with them. // Save all allocatable VFP registers before messing with them.
{ {
// We use a run-time check for VFP32DREGS.
CpuFeatureScope scope(masm, VFP32DREGS,
CpuFeatureScope::kDontCheckSupported);
UseScratchRegisterScope temps(masm); UseScratchRegisterScope temps(masm);
Register scratch = temps.Acquire(); Register scratch = temps.Acquire();
__ SaveFPRegs(sp, scratch);
// Check CPU flags for number of registers, setting the Z condition flag.
__ CheckFor32DRegs(scratch);
// Push registers d0-d15, and possibly d16-d31, on the stack.
// If d16-d31 are not pushed, decrease the stack pointer instead.
__ vstm(db_w, sp, d16, d31, ne);
// Okay to not call AllocateStackSpace here because the size is a known
// small number and we need to use condition codes.
__ sub(sp, sp, Operand(16 * kDoubleSize), LeaveCC, eq);
__ vstm(db_w, sp, d0, d15);
} }
// Save all general purpose registers before messing with them. // Save all general purpose registers before messing with them.
...@@ -3230,7 +3221,7 @@ void Generate_DeoptimizationEntry(MacroAssembler* masm, ...@@ -3230,7 +3221,7 @@ void Generate_DeoptimizationEntry(MacroAssembler* masm,
// frame descriptor pointer to r1 (deoptimizer->input_); // frame descriptor pointer to r1 (deoptimizer->input_);
__ ldr(r1, MemOperand(r0, Deoptimizer::input_offset())); __ ldr(r1, MemOperand(r0, Deoptimizer::input_offset()));
// Copy core registers into FrameDescription::registers_[kNumRegisters]. // Copy core registers into FrameDescription::registers_.
DCHECK_EQ(Register::kNumRegisters, kNumberOfRegisters); DCHECK_EQ(Register::kNumRegisters, kNumberOfRegisters);
for (int i = 0; i < kNumberOfRegisters; i++) { for (int i = 0; i < kNumberOfRegisters; i++) {
int offset = (i * kPointerSize) + FrameDescription::registers_offset(); int offset = (i * kPointerSize) + FrameDescription::registers_offset();
...@@ -3238,16 +3229,19 @@ void Generate_DeoptimizationEntry(MacroAssembler* masm, ...@@ -3238,16 +3229,19 @@ void Generate_DeoptimizationEntry(MacroAssembler* masm,
__ str(r2, MemOperand(r1, offset)); __ str(r2, MemOperand(r1, offset));
} }
// Copy VFP registers to // Copy double registers to double_registers_.
// double_registers_[DoubleRegister::kNumAllocatableRegisters] static constexpr int kDoubleRegsOffset =
int double_regs_offset = FrameDescription::double_registers_offset(); FrameDescription::double_registers_offset();
const RegisterConfiguration* config = RegisterConfiguration::Default(); {
for (int i = 0; i < config->num_allocatable_double_registers(); ++i) { UseScratchRegisterScope temps(masm);
int code = config->GetAllocatableDoubleCode(i); Register scratch = temps.Acquire();
int dst_offset = code * kDoubleSize + double_regs_offset; Register src_location = r4;
int src_offset = code * kDoubleSize + kNumberOfRegisters * kPointerSize; __ add(src_location, sp, Operand(kNumberOfRegisters * kPointerSize));
__ vldr(d0, sp, src_offset); __ RestoreFPRegs(src_location, scratch);
__ vstr(d0, r1, dst_offset);
Register dst_location = r4;
__ add(dst_location, r1, Operand(kDoubleRegsOffset));
__ SaveFPRegsToHeap(dst_location, scratch);
} }
// Mark the stack as not iterable for the CPU profiler which won't be able to // Mark the stack as not iterable for the CPU profiler which won't be able to
...@@ -3324,11 +3318,18 @@ void Generate_DeoptimizationEntry(MacroAssembler* masm, ...@@ -3324,11 +3318,18 @@ void Generate_DeoptimizationEntry(MacroAssembler* masm,
__ b(lt, &outer_push_loop); __ b(lt, &outer_push_loop);
__ ldr(r1, MemOperand(r0, Deoptimizer::input_offset())); __ ldr(r1, MemOperand(r0, Deoptimizer::input_offset()));
for (int i = 0; i < config->num_allocatable_double_registers(); ++i) {
int code = config->GetAllocatableDoubleCode(i); // State:
DwVfpRegister reg = DwVfpRegister::from_code(code); // r1: Deoptimizer::input_ (FrameDescription*).
int src_offset = code * kDoubleSize + double_regs_offset; // r2: The last output FrameDescription pointer (FrameDescription*).
__ vldr(reg, r1, src_offset);
// Restore double registers from the input frame description.
{
UseScratchRegisterScope temps(masm);
Register scratch = temps.Acquire();
Register src_location = r6;
__ add(src_location, r1, Operand(kDoubleRegsOffset));
__ RestoreFPRegsFromHeap(src_location, scratch);
} }
// Push pc and continuation from the last output frame. // Push pc and continuation from the last output frame.
......
...@@ -2134,6 +2134,23 @@ void TurboAssembler::RestoreFPRegs(Register location, Register scratch) { ...@@ -2134,6 +2134,23 @@ void TurboAssembler::RestoreFPRegs(Register location, Register scratch) {
add(location, location, Operand(16 * kDoubleSize), LeaveCC, eq); add(location, location, Operand(16 * kDoubleSize), LeaveCC, eq);
} }
void TurboAssembler::SaveFPRegsToHeap(Register location, Register scratch) {
CpuFeatureScope scope(this, VFP32DREGS, CpuFeatureScope::kDontCheckSupported);
CheckFor32DRegs(scratch);
vstm(ia_w, location, d0, d15);
vstm(ia_w, location, d16, d31, ne);
add(location, location, Operand(16 * kDoubleSize), LeaveCC, eq);
}
void TurboAssembler::RestoreFPRegsFromHeap(Register location,
Register scratch) {
CpuFeatureScope scope(this, VFP32DREGS, CpuFeatureScope::kDontCheckSupported);
CheckFor32DRegs(scratch);
vldm(ia_w, location, d0, d15);
vldm(ia_w, location, d16, d31, ne);
add(location, location, Operand(16 * kDoubleSize), LeaveCC, eq);
}
template <typename T> template <typename T>
void TurboAssembler::FloatMaxHelper(T result, T left, T right, void TurboAssembler::FloatMaxHelper(T result, T left, T right,
Label* out_of_line) { Label* out_of_line) {
......
...@@ -394,6 +394,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -394,6 +394,12 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// values to location, restoring [d0..(d15|d31)]. // values to location, restoring [d0..(d15|d31)].
void RestoreFPRegs(Register location, Register scratch); void RestoreFPRegs(Register location, Register scratch);
// As above, but with heap semantics instead of stack semantics, i.e.: the
// location starts at the lowest address and grows towards higher addresses,
// for both saves and restores.
void SaveFPRegsToHeap(Register location, Register scratch);
void RestoreFPRegsFromHeap(Register location, Register scratch);
// Calculate how much stack space (in bytes) are required to store caller // Calculate how much stack space (in bytes) are required to store caller
// registers excluding those specified in the arguments. // registers excluding those specified in the arguments.
int RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode, int RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
......
...@@ -229,6 +229,8 @@ class V8_EXPORT_PRIVATE AssemblerBase : public Malloced { ...@@ -229,6 +229,8 @@ class V8_EXPORT_PRIVATE AssemblerBase : public Malloced {
} }
// Features are usually enabled by CpuFeatureScope, which also asserts that // Features are usually enabled by CpuFeatureScope, which also asserts that
// the features are supported before they are enabled. // the features are supported before they are enabled.
// IMPORTANT: IsEnabled() should only be used by DCHECKs. For real feature
// detection, use IsSupported().
bool IsEnabled(CpuFeature f) { bool IsEnabled(CpuFeature f) {
return (enabled_cpu_features_ & (static_cast<uint64_t>(1) << f)) != 0; return (enabled_cpu_features_ & (static_cast<uint64_t>(1) << f)) != 0;
} }
......
...@@ -42,6 +42,8 @@ STATIC_ASSERT(RegisterConfiguration::kMaxFPRegisters >= ...@@ -42,6 +42,8 @@ STATIC_ASSERT(RegisterConfiguration::kMaxFPRegisters >=
STATIC_ASSERT(RegisterConfiguration::kMaxFPRegisters >= STATIC_ASSERT(RegisterConfiguration::kMaxFPRegisters >=
Simd128Register::kNumRegisters); Simd128Register::kNumRegisters);
// Callers on architectures other than Arm expect this to be be constant
// between build and runtime. Avoid adding variability on other platforms.
static int get_num_allocatable_double_registers() { static int get_num_allocatable_double_registers() {
return return
#if V8_TARGET_ARCH_IA32 #if V8_TARGET_ARCH_IA32
...@@ -71,6 +73,8 @@ static int get_num_allocatable_double_registers() { ...@@ -71,6 +73,8 @@ static int get_num_allocatable_double_registers() {
#undef REGISTER_COUNT #undef REGISTER_COUNT
// Callers on architectures other than Arm expect this to be be constant
// between build and runtime. Avoid adding variability on other platforms.
static const int* get_allocatable_double_codes() { static const int* get_allocatable_double_codes() {
return return
#if V8_TARGET_ARCH_ARM #if V8_TARGET_ARCH_ARM
......
...@@ -57,6 +57,9 @@ class V8_EXPORT_PRIVATE RegisterConfiguration { ...@@ -57,6 +57,9 @@ class V8_EXPORT_PRIVATE RegisterConfiguration {
int num_allocatable_float_registers() const { int num_allocatable_float_registers() const {
return num_allocatable_float_registers_; return num_allocatable_float_registers_;
} }
// Caution: this value depends on the current cpu and may change between
// build and runtime. At the time of writing, the only architecture with a
// variable allocatable double register set is Arm.
int num_allocatable_double_registers() const { int num_allocatable_double_registers() const {
return num_allocatable_double_registers_; return num_allocatable_double_registers_;
} }
......
...@@ -774,7 +774,7 @@ class FrameDescription { ...@@ -774,7 +774,7 @@ class FrameDescription {
return offsetof(FrameDescription, register_values_.registers_); return offsetof(FrameDescription, register_values_.registers_);
} }
static int double_registers_offset() { static constexpr int double_registers_offset() {
return offsetof(FrameDescription, register_values_.double_registers_); return offsetof(FrameDescription, register_values_.double_registers_);
} }
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
//
// Flags: --allow-natives-syntax
var __v_0 = {};
var __v_13 = {};
var __v_14 = {};
var __v_15 = {};
var __v_16 = {};
var __v_17 = {};
var __v_18 = {};
function __f_6(x, deopt) {
var __v_1 = x;
var __v_2 = 2 * x;
var __v_3 = 3 * x;
var __v_4 = 4 * x;
var __v_5 = 5 * x;
var __v_6 = 6 * x;
var __v_7 = 7 * x;
var __v_9 = 9 * x;
var __v_10 = 10 * x;
var __v_11 = 11 * x;
var __v_12 = 12 * x;
var __v_20 = 18 * x;
var __v_19 = 19 * x;
var __v_8 = 20 * x;
__v_0 = 1;
deopt + -2147483648;
return __v_1 + __v_2 + __v_3 + __v_4 + __v_5 + __v_6 + __v_7 + __v_8 + __v_9 + __v_10 + __v_11 + __v_12 + __v_13 +
__v_14 + __v_15 + __v_16 + __v_17 + __v_18 + __v_19 + __v_20;
};
%PrepareFunctionForOptimization(__f_6);
__f_6();
%OptimizeFunctionOnNextCall(__f_6);
assertEquals("45[object Object][object Object][object Object][object Object][object Object][object Object]9.59", __f_6(0.5, ""));
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment