deoptimizer-arm.cc 13.8 KB
Newer Older
1
// Copyright 2012 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4

5 6
#include "src/codegen.h"
#include "src/deoptimizer.h"
7
#include "src/full-codegen/full-codegen.h"
8
#include "src/register-configuration.h"
9
#include "src/safepoint-table.h"
10 11 12 13

namespace v8 {
namespace internal {

14
const int Deoptimizer::table_entry_size_ = 8;
15

16 17 18 19 20 21 22

int Deoptimizer::patch_size() {
  const int kCallInstructionSizeInWords = 3;
  return kCallInstructionSizeInWords * Assembler::kInstrSize;
}


23 24 25 26 27 28
void Deoptimizer::EnsureRelocSpaceForLazyDeoptimization(Handle<Code> code) {
  // Empty because there is no need for relocation information for the code
  // patching in Deoptimizer::PatchCodeForDeoptimization below.
}


29
void Deoptimizer::PatchCodeForDeoptimization(Isolate* isolate, Code* code) {
30
  Address code_start_address = code->instruction_start();
31 32 33 34
  // Invalidate the relocation information, as it will become invalid by the
  // code patching below, and is not needed any more.
  code->InvalidateRelocation();

35 36 37 38
  if (FLAG_zap_code_space) {
    // Fail hard and early if we enter this code object again.
    byte* pointer = code->FindCodeAgeSequence();
    if (pointer != NULL) {
39
      pointer += kNoCodeAgeSequenceLength;
40 41 42
    } else {
      pointer = code->instruction_start();
    }
43
    CodePatcher patcher(isolate, pointer, 1);
44 45 46 47 48 49
    patcher.masm()->bkpt(0);

    DeoptimizationInputData* data =
        DeoptimizationInputData::cast(code->deoptimization_data());
    int osr_offset = data->OsrPcOffset()->value();
    if (osr_offset > 0) {
50 51
      CodePatcher osr_patcher(isolate, code->instruction_start() + osr_offset,
                              1);
52 53 54 55
      osr_patcher.masm()->bkpt(0);
    }
  }

56 57
  DeoptimizationInputData* deopt_data =
      DeoptimizationInputData::cast(code->deoptimization_data());
58
#ifdef DEBUG
59
  Address prev_call_address = NULL;
60
#endif
61 62
  // For each LLazyBailout instruction insert a call to the corresponding
  // deoptimization entry.
63 64 65
  for (int i = 0; i < deopt_data->DeoptCount(); i++) {
    if (deopt_data->Pc(i)->value() == -1) continue;
    Address call_address = code_start_address + deopt_data->Pc(i)->value();
66
    Address deopt_entry = GetDeoptimizationEntry(isolate, i, LAZY);
67 68
    // We need calls to have a predictable size in the unoptimized code, but
    // this is optimized code, so we don't have to have a predictable size.
69
    int call_size_in_bytes = MacroAssembler::CallDeoptimizerSize();
70
    int call_size_in_words = call_size_in_bytes / Assembler::kInstrSize;
71 72
    DCHECK(call_size_in_bytes % Assembler::kInstrSize == 0);
    DCHECK(call_size_in_bytes <= patch_size());
73
    CodePatcher patcher(isolate, call_address, call_size_in_words);
74
    patcher.masm()->CallDeoptimizer(deopt_entry);
75
    DCHECK(prev_call_address == NULL ||
76
           call_address >= prev_call_address + patch_size());
77
    DCHECK(call_address + patch_size() <= code->instruction_end());
78
#ifdef DEBUG
79
    prev_call_address = call_address;
80
#endif
81
  }
82 83 84
}


85
void Deoptimizer::SetPlatformCompiledStubRegisters(
86
    FrameDescription* output_frame, CodeStubDescriptor* descriptor) {
87
  ApiFunction function(descriptor->deoptimization_handler());
88 89
  ExternalReference xref(&function, ExternalReference::BUILTIN_CALL, isolate_);
  intptr_t handler = reinterpret_cast<intptr_t>(xref.address());
90
  int params = descriptor->GetHandlerParameterCount();
91 92 93 94 95 96 97 98 99 100 101 102
  output_frame->SetRegister(r0.code(), params);
  output_frame->SetRegister(r1.code(), handler);
}


void Deoptimizer::CopyDoubleRegisters(FrameDescription* output_frame) {
  for (int i = 0; i < DwVfpRegister::kMaxNumRegisters; ++i) {
    double double_value = input_->GetDoubleRegister(i);
    output_frame->SetDoubleRegister(i, double_value);
  }
}

103
#define __ masm()->
104 105 106

// This code tries to be close to ia32 code so that any changes can be
// easily ported.
107
void Deoptimizer::TableEntryGenerator::Generate() {
108
  GeneratePrologue();
109

110 111 112 113 114 115
  // Save all general purpose registers before messing with them.
  const int kNumberOfRegisters = Register::kNumRegisters;

  // Everything but pc, lr and ip which will be saved but not restored.
  RegList restored_regs = kJSCallerSaved | kCalleeSaved | ip.bit();

116
  const int kDoubleRegsSize = kDoubleSize * DwVfpRegister::kMaxNumRegisters;
117

118
  // Save all allocatable VFP registers before messing with them.
119 120
  DCHECK(kDoubleRegZero.code() == 14);
  DCHECK(kScratchDoubleReg.code() == 15);
121

122 123 124 125 126 127 128 129 130 131 132 133 134 135
  {
    // We use a run-time check for VFP32DREGS.
    CpuFeatureScope scope(masm(), VFP32DREGS,
                          CpuFeatureScope::kDontCheckSupported);

    // Check CPU flags for number of registers, setting the Z condition flag.
    __ CheckFor32DRegs(ip);

    // Push registers d0-d15, and possibly d16-d31, on the stack.
    // If d16-d31 are not pushed, decrease the stack pointer instead.
    __ vstm(db_w, sp, d16, d31, ne);
    __ sub(sp, sp, Operand(16 * kDoubleSize), LeaveCC, eq);
    __ vstm(db_w, sp, d0, d15);
  }
136 137

  // Push all 16 registers (needed to populate FrameDescription::registers_).
138 139
  // TODO(1588) Note that using pc with stm is deprecated, so we should perhaps
  // handle this a bit differently.
140 141
  __ stm(db_w, sp, restored_regs  | sp.bit() | lr.bit() | pc.bit());

142 143 144
  __ mov(ip, Operand(ExternalReference(Isolate::kCEntryFPAddress, isolate())));
  __ str(fp, MemOperand(ip));

145 146 147 148 149 150
  const int kSavedRegistersAreaSize =
      (kNumberOfRegisters * kPointerSize) + kDoubleRegsSize;

  // Get the bailout id from the stack.
  __ ldr(r2, MemOperand(sp, kSavedRegistersAreaSize));

151
  // Get the address of the location in the code object (r3) (return
152 153
  // address for lazy deoptimization) and compute the fp-to-sp delta in
  // register r4.
154 155 156
  __ mov(r3, lr);
  // Correct one word for bailout id.
  __ add(r4, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
157 158 159 160
  __ sub(r4, fp, r4);

  // Allocate a new deoptimizer object.
  // Pass four arguments in r0 to r3 and fifth argument on stack.
161
  __ PrepareCallCFunction(6, r5);
162 163 164 165
  __ mov(r0, Operand(0));
  Label context_check;
  __ ldr(r1, MemOperand(fp, CommonFrameConstants::kContextOrFrameTypeOffset));
  __ JumpIfSmi(r1, &context_check);
166
  __ ldr(r0, MemOperand(fp, JavaScriptFrameConstants::kFunctionOffset));
167
  __ bind(&context_check);
168 169 170 171
  __ mov(r1, Operand(type()));  // bailout type,
  // r2: bailout id already loaded.
  // r3: code address or 0 already loaded.
  __ str(r4, MemOperand(sp, 0 * kPointerSize));  // Fp-to-sp delta.
172
  __ mov(r5, Operand(ExternalReference::isolate_address(isolate())));
173
  __ str(r5, MemOperand(sp, 1 * kPointerSize));  // Isolate.
174
  // Call Deoptimizer::New().
175 176
  {
    AllowExternalCallThatCantCauseGC scope(masm());
177
    __ CallCFunction(ExternalReference::new_deoptimizer_function(isolate()), 6);
178
  }
179 180 181 182 183 184

  // Preserve "deoptimizer" object in register r0 and get the input
  // frame descriptor pointer to r1 (deoptimizer->input_);
  __ ldr(r1, MemOperand(r0, Deoptimizer::input_offset()));

  // Copy core registers into FrameDescription::registers_[kNumRegisters].
185
  DCHECK(Register::kNumRegisters == kNumberOfRegisters);
186 187 188 189 190
  for (int i = 0; i < kNumberOfRegisters; i++) {
    int offset = (i * kPointerSize) + FrameDescription::registers_offset();
    __ ldr(r2, MemOperand(sp, i * kPointerSize));
    __ str(r2, MemOperand(r1, offset));
  }
191

192 193 194
  // Copy VFP registers to
  // double_registers_[DoubleRegister::kMaxNumAllocatableRegisters]
  int double_regs_offset = FrameDescription::double_registers_offset();
195
  const RegisterConfiguration* config = RegisterConfiguration::Crankshaft();
196 197 198 199
  for (int i = 0; i < config->num_allocatable_double_registers(); ++i) {
    int code = config->GetAllocatableDoubleCode(i);
    int dst_offset = code * kDoubleSize + double_regs_offset;
    int src_offset = code * kDoubleSize + kNumberOfRegisters * kPointerSize;
200 201
    __ vldr(d0, sp, src_offset);
    __ vstr(d0, r1, dst_offset);
202
  }
203

204 205
  // Remove the bailout id and the saved registers from the stack.
  __ add(sp, sp, Operand(kSavedRegistersAreaSize + (1 * kPointerSize)));
206 207 208 209 210 211 212 213 214 215 216

  // Compute a pointer to the unwinding limit in register r2; that is
  // the first stack slot not part of the input frame.
  __ ldr(r2, MemOperand(r1, FrameDescription::frame_size_offset()));
  __ add(r2, r2, sp);

  // Unwind the stack down to - but not including - the unwinding
  // limit and copy the contents of the activation frame to the input
  // frame description.
  __ add(r3,  r1, Operand(FrameDescription::frame_content_offset()));
  Label pop_loop;
217 218
  Label pop_loop_header;
  __ b(&pop_loop_header);
219 220 221 222
  __ bind(&pop_loop);
  __ pop(r4);
  __ str(r4, MemOperand(r3, 0));
  __ add(r3, r3, Operand(sizeof(uint32_t)));
223
  __ bind(&pop_loop_header);
224 225 226 227 228 229 230 231
  __ cmp(r2, sp);
  __ b(ne, &pop_loop);

  // Compute the output frame in the deoptimizer.
  __ push(r0);  // Preserve deoptimizer object across call.
  // r0: deoptimizer object; r1: scratch.
  __ PrepareCallCFunction(1, r1);
  // Call Deoptimizer::ComputeOutputFrames().
232 233 234
  {
    AllowExternalCallThatCantCauseGC scope(masm());
    __ CallCFunction(
235
        ExternalReference::compute_output_frames_function(isolate()), 1);
236
  }
237 238
  __ pop(r0);  // Restore deoptimizer object (class Deoptimizer).

239 240
  __ ldr(sp, MemOperand(r0, Deoptimizer::caller_frame_top_offset()));

241
  // Replace the current (input) frame with the output frames.
242 243
  Label outer_push_loop, inner_push_loop,
      outer_loop_header, inner_loop_header;
244
  // Outer loop state: r4 = current "FrameDescription** output_",
245 246
  // r1 = one past the last FrameDescription**.
  __ ldr(r1, MemOperand(r0, Deoptimizer::output_count_offset()));
247 248
  __ ldr(r4, MemOperand(r0, Deoptimizer::output_offset()));  // r4 is output_.
  __ add(r1, r4, Operand(r1, LSL, 2));
249
  __ jmp(&outer_loop_header);
250 251
  __ bind(&outer_push_loop);
  // Inner loop state: r2 = current FrameDescription*, r3 = loop index.
252
  __ ldr(r2, MemOperand(r4, 0));  // output_[ix]
253
  __ ldr(r3, MemOperand(r2, FrameDescription::frame_size_offset()));
254
  __ jmp(&inner_loop_header);
255 256 257
  __ bind(&inner_push_loop);
  __ sub(r3, r3, Operand(sizeof(uint32_t)));
  __ add(r6, r2, Operand(r3));
258 259
  __ ldr(r6, MemOperand(r6, FrameDescription::frame_content_offset()));
  __ push(r6);
260
  __ bind(&inner_loop_header);
261
  __ cmp(r3, Operand::Zero());
262
  __ b(ne, &inner_push_loop);  // test for gt?
263
  __ add(r4, r4, Operand(kPointerSize));
264
  __ bind(&outer_loop_header);
265
  __ cmp(r4, r1);
266 267
  __ b(lt, &outer_push_loop);

268
  __ ldr(r1, MemOperand(r0, Deoptimizer::input_offset()));
269 270 271 272 273
  for (int i = 0; i < config->num_allocatable_double_registers(); ++i) {
    int code = config->GetAllocatableDoubleCode(i);
    DwVfpRegister reg = DwVfpRegister::from_code(code);
    int src_offset = code * kDoubleSize + double_regs_offset;
    __ vldr(reg, r1, src_offset);
274 275
  }

276
  // Push state, pc, and continuation from the last output frame.
277 278
  __ ldr(r6, MemOperand(r2, FrameDescription::state_offset()));
  __ push(r6);
279 280 281 282 283 284 285
  __ ldr(r6, MemOperand(r2, FrameDescription::pc_offset()));
  __ push(r6);
  __ ldr(r6, MemOperand(r2, FrameDescription::continuation_offset()));
  __ push(r6);

  // Push the registers from the last output frame.
  for (int i = kNumberOfRegisters - 1; i >= 0; i--) {
286
    int offset = (i * kPointerSize) + FrameDescription::registers_offset();
287 288 289 290 291 292 293 294 295
    __ ldr(r6, MemOperand(r2, offset));
    __ push(r6);
  }

  // Restore the registers from the stack.
  __ ldm(ia_w, sp, restored_regs);  // all but pc registers.
  __ pop(ip);  // remove sp
  __ pop(ip);  // remove lr

296
  __ InitializeRootRegister();
297 298

  __ pop(ip);  // remove pc
299
  __ pop(ip);  // get continuation, leave pc on stack
300
  __ pop(lr);
301
  __ Jump(ip);
302 303 304 305 306
  __ stop("Unreachable.");
}


void Deoptimizer::TableEntryGenerator::GeneratePrologue() {
307 308
  // Create a sequence of deoptimization entries.
  // Note that registers are still live when jumping to an entry.
309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351

  // We need to be able to generate immediates up to kMaxNumberOfEntries. On
  // ARMv7, we can use movw (with a maximum immediate of 0xffff). On ARMv6, we
  // need two instructions.
  STATIC_ASSERT((kMaxNumberOfEntries - 1) <= 0xffff);
  if (CpuFeatures::IsSupported(ARMv7)) {
    CpuFeatureScope scope(masm(), ARMv7);
    Label done;
    for (int i = 0; i < count(); i++) {
      int start = masm()->pc_offset();
      USE(start);
      __ movw(ip, i);
      __ b(&done);
      DCHECK_EQ(table_entry_size_, masm()->pc_offset() - start);
    }
    __ bind(&done);
  } else {
    // We want to keep table_entry_size_ == 8 (since this is the common case),
    // but we need two instructions to load most immediates over 0xff. To handle
    // this, we set the low byte in the main table, and then set the high byte
    // in a separate table if necessary.
    Label high_fixes[256];
    int high_fix_max = (count() - 1) >> 8;
    DCHECK_GT(arraysize(high_fixes), high_fix_max);
    for (int i = 0; i < count(); i++) {
      int start = masm()->pc_offset();
      USE(start);
      __ mov(ip, Operand(i & 0xff));  // Set the low byte.
      __ b(&high_fixes[i >> 8]);      // Jump to the secondary table.
      DCHECK_EQ(table_entry_size_, masm()->pc_offset() - start);
    }
    // Generate the secondary table, to set the high byte.
    for (int high = 1; high <= high_fix_max; high++) {
      __ bind(&high_fixes[high]);
      __ orr(ip, ip, Operand(high << 8));
      // If this isn't the last entry, emit a branch to the end of the table.
      // The last entry can just fall through.
      if (high < high_fix_max) __ b(&high_fixes[0]);
    }
    // Bind high_fixes[0] last, for indices like 0x00**. This case requires no
    // fix-up, so for (common) small tables we can jump here, then just fall
    // through with no additional branch.
    __ bind(&high_fixes[0]);
352
  }
353
  __ push(ip);
354 355
}

356 357 358 359 360 361 362 363 364 365 366

void FrameDescription::SetCallerPc(unsigned offset, intptr_t value) {
  SetFrameSlot(offset, value);
}


void FrameDescription::SetCallerFp(unsigned offset, intptr_t value) {
  SetFrameSlot(offset, value);
}


367
void FrameDescription::SetCallerConstantPool(unsigned offset, intptr_t value) {
368
  DCHECK(FLAG_enable_embedded_constant_pool);
369 370 371 372
  SetFrameSlot(offset, value);
}


373 374
#undef __

375 376
}  // namespace internal
}  // namespace v8