simulator-arm.h 19.3 KB
Newer Older
1
// Copyright 2012 the V8 project authors. All rights reserved.
2 3
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
4 5 6 7 8 9 10 11 12


// Declares a Simulator for ARM instructions if we are not generating a native
// ARM binary. This Simulator allows us to run and debug ARM code generation on
// regular desktop machines.
// V8 calls into generated code by "calling" the CALL_GENERATED_CODE macro,
// which will start execution in the Simulator or forwards to the real entry
// on a ARM HW platform.

13 14
#ifndef V8_ARM_SIMULATOR_ARM_H_
#define V8_ARM_SIMULATOR_ARM_H_
15

16
#include "src/allocation.h"
17 18
#include "src/base/lazy-instance.h"
#include "src/base/platform/mutex.h"
19

20 21 22 23 24
#if !defined(USE_SIMULATOR)
// Running without a simulator on a native arm platform.

namespace v8 {
namespace internal {
25 26

// When running without a simulator we call the entry directly.
27
#define CALL_GENERATED_CODE(isolate, entry, p0, p1, p2, p3, p4) \
lrn@chromium.org's avatar
lrn@chromium.org committed
28
  (entry(p0, p1, p2, p3, p4))
29

30 31
typedef int (*arm_regexp_matcher)(String*, int, const byte*, const byte*, int*,
                                  int, Address, int, Isolate*);
32 33 34

// Call the generated regexp code directly. The code at the entry address
// should act as a function matching the type arm_regexp_matcher.
35 36
#define CALL_GENERATED_REGEXP_CODE(isolate, entry, p0, p1, p2, p3, p4, p5, p6, \
                                   p7, p8)                                     \
37
  (FUNCTION_CAST<arm_regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7, p8))
38

39 40 41 42 43
// The stack limit beyond which we will throw stack overflow errors in
// generated code. Because generated code on arm uses the C stack, we
// just use the C stack limit.
class SimulatorStack : public v8::internal::AllStatic {
 public:
44 45 46
  static inline uintptr_t JsLimitFromCLimit(v8::internal::Isolate* isolate,
                                            uintptr_t c_limit) {
    USE(isolate);
47 48
    return c_limit;
  }
49

50 51 52
  static inline uintptr_t RegisterCTryCatch(v8::internal::Isolate* isolate,
                                            uintptr_t try_catch_address) {
    USE(isolate);
53 54 55
    return try_catch_address;
  }

56 57 58
  static inline void UnregisterCTryCatch(v8::internal::Isolate* isolate) {
    USE(isolate);
  }
59
};
60

61 62
}  // namespace internal
}  // namespace v8
63

64 65
#else  // !defined(USE_SIMULATOR)
// Running with a simulator.
66

67 68
#include "src/arm/constants-arm.h"
#include "src/assembler.h"
lpy's avatar
lpy committed
69
#include "src/base/hashmap.h"
70

71 72
namespace v8 {
namespace internal {
73

74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
class CachePage {
 public:
  static const int LINE_VALID = 0;
  static const int LINE_INVALID = 1;

  static const int kPageShift = 12;
  static const int kPageSize = 1 << kPageShift;
  static const int kPageMask = kPageSize - 1;
  static const int kLineShift = 2;  // The cache line is only 4 bytes right now.
  static const int kLineLength = 1 << kLineShift;
  static const int kLineMask = kLineLength - 1;

  CachePage() {
    memset(&validity_map_, LINE_INVALID, sizeof(validity_map_));
  }

  char* ValidityByte(int offset) {
    return &validity_map_[offset >> kLineShift];
  }

  char* CachedData(int offset) {
    return &data_[offset];
  }

 private:
  char data_[kPageSize];   // The cached data.
  static const int kValidityMapSize = kPageSize >> kLineShift;
  char validity_map_[kValidityMapSize];  // One byte per line.
};


105 106
class Simulator {
 public:
107
  friend class ArmDebugger;
108 109 110 111 112 113 114
  enum Register {
    no_reg = -1,
    r0 = 0, r1, r2, r3, r4, r5, r6, r7,
    r8, r9, r10, r11, r12, r13, r14, r15,
    num_registers,
    sp = 13,
    lr = 14,
115 116 117 118 119 120 121 122
    pc = 15,
    s0 = 0, s1, s2, s3, s4, s5, s6, s7,
    s8, s9, s10, s11, s12, s13, s14, s15,
    s16, s17, s18, s19, s20, s21, s22, s23,
    s24, s25, s26, s27, s28, s29, s30, s31,
    num_s_registers = 32,
    d0 = 0, d1, d2, d3, d4, d5, d6, d7,
    d8, d9, d10, d11, d12, d13, d14, d15,
123 124
    d16, d17, d18, d19, d20, d21, d22, d23,
    d24, d25, d26, d27, d28, d29, d30, d31,
125 126 127 128
    num_d_registers = 32,
    q0 = 0, q1, q2, q3, q4, q5, q6, q7,
    q8, q9, q10, q11, q12, q13, q14, q15,
    num_q_registers = 16
129 130
  };

131
  explicit Simulator(Isolate* isolate);
132 133 134 135
  ~Simulator();

  // The currently executing Simulator instance. Potentially there can be one
  // for each native thread.
136
  static Simulator* current(v8::internal::Isolate* isolate);
137 138 139 140 141 142

  // Accessors for register state. Reading the pc value adheres to the ARM
  // architecture specification and is off by a 8 from the currently executing
  // instruction.
  void set_register(int reg, int32_t value);
  int32_t get_register(int reg) const;
143
  double get_double_from_register_pair(int reg);
144
  void set_register_pair_from_double(int reg, double* value);
145
  void set_dw_register(int dreg, const int* dbl);
146

147
  // Support for VFP.
148 149 150 151
  void get_d_register(int dreg, uint64_t* value);
  void set_d_register(int dreg, const uint64_t* value);
  void get_d_register(int dreg, uint32_t* value);
  void set_d_register(int dreg, const uint32_t* value);
152
  // Support for NEON.
153 154 155 156
  template <typename T, int SIZE = kSimd128Size>
  void get_neon_register(int reg, T (&value)[SIZE / sizeof(T)]);
  template <typename T, int SIZE = kSimd128Size>
  void set_neon_register(int reg, const T (&value)[SIZE / sizeof(T)]);
157

158 159
  void set_s_register(int reg, unsigned int value);
  unsigned int get_s_register(int reg) const;
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183

  void set_d_register_from_double(int dreg, const double& dbl) {
    SetVFPRegister<double, 2>(dreg, dbl);
  }

  double get_double_from_d_register(int dreg) {
    return GetFromVFPRegister<double, 2>(dreg);
  }

  void set_s_register_from_float(int sreg, const float flt) {
    SetVFPRegister<float, 1>(sreg, flt);
  }

  float get_float_from_s_register(int sreg) {
    return GetFromVFPRegister<float, 1>(sreg);
  }

  void set_s_register_from_sinteger(int sreg, const int sint) {
    SetVFPRegister<int, 1>(sreg, sint);
  }

  int get_sinteger_from_s_register(int sreg) {
    return GetFromVFPRegister<int, 1>(sreg);
  }
184

185 186 187 188
  // Special case of set_register and get_register to access the raw PC value.
  void set_pc(int32_t value);
  int32_t get_pc() const;

189
  Address get_sp() const {
190 191 192
    return reinterpret_cast<Address>(static_cast<intptr_t>(get_register(sp)));
  }

193
  // Accessor to the internal simulator stack area.
194
  uintptr_t StackLimit(uintptr_t c_limit) const;
195 196

  // Executes ARM instructions until the PC reaches end_sim_pc.
197
  void Execute();
198

199
  // Call on program start.
200
  static void Initialize(Isolate* isolate);
201

202
  static void TearDown(base::CustomMatcherHashMap* i_cache, Redirection* first);
203

lrn@chromium.org's avatar
lrn@chromium.org committed
204 205 206 207
  // V8 generally calls into generated JS code with 5 parameters and into
  // generated RegExp code with 7 parameters. This is a convenience function,
  // which sets up the simulator state and grabs the result on return.
  int32_t Call(byte* entry, int argument_count, ...);
208
  // Alternative: call a 2-argument double function.
209 210 211
  void CallFP(byte* entry, double d0, double d1);
  int32_t CallFPReturnsInt(byte* entry, double d0, double d1);
  double CallFPReturnsDouble(byte* entry, double d0, double d1);
212

213 214 215 216 217 218
  // Push an address onto the JS stack.
  uintptr_t PushAddress(uintptr_t address);

  // Pop an address from the JS stack.
  uintptr_t PopAddress();

219 220 221 222
  // Debugger input.
  void set_last_debugger_input(char* input);
  char* last_debugger_input() { return last_debugger_input_; }

223
  // ICache checking.
224 225
  static void FlushICache(base::CustomMatcherHashMap* i_cache, void* start,
                          size_t size);
226

227 228 229 230
  // Returns true if pc register contains one of the 'special_values' defined
  // below (bad_lr, end_sim_pc).
  bool has_bad_pc() const;

231 232 233 234 235 236 237 238 239
  // EABI variant for double arguments in use.
  bool use_eabi_hardfloat() {
#if USE_EABI_HARDFLOAT
    return true;
#else
    return false;
#endif
  }

240 241 242 243 244 245 246 247 248 249 250 251 252
 private:
  enum special_values {
    // Known bad pc value to ensure that the simulator does not execute
    // without being properly setup.
    bad_lr = -1,
    // A pc value used to signal the simulator to stop execution.  Generally
    // the lr is set to this value on transition from native C code to
    // simulated execution, so that the simulator can "return" to the native
    // C code.
    end_sim_pc = -2
  };

  // Unsupported instructions use Format to print an error and stop execution.
253
  void Format(Instruction* instr, const char* format);
254 255 256

  // Checks if the current instruction should be executed based on its
  // condition bits.
257
  inline bool ConditionallyExecute(Instruction* instr);
258 259 260 261 262

  // Helper functions to set the conditional flags in the architecture state.
  void SetNZFlags(int32_t val);
  void SetCFlag(bool val);
  void SetVFlag(bool val);
263
  bool CarryFrom(int32_t left, int32_t right, int32_t carry = 0);
264
  bool BorrowFrom(int32_t left, int32_t right, int32_t carry = 1);
265 266 267 268 269
  bool OverflowFrom(int32_t alu_out,
                    int32_t left,
                    int32_t right,
                    bool addition);

270 271
  inline int GetCarry() {
    return c_flag_ ? 1 : 0;
272
  }
273

274
  // Support for VFP.
275
  void Compute_FPSCR_Flags(float val1, float val2);
276 277
  void Compute_FPSCR_Flags(double val1, double val2);
  void Copy_FPSCR_to_APSR();
278
  inline float canonicalizeNaN(float value);
279
  inline double canonicalizeNaN(double value);
280

281
  // Helper functions to decode common "addressing" modes
282 283
  int32_t GetShiftRm(Instruction* instr, bool* carry_out);
  int32_t GetImm(Instruction* instr, bool* carry_out);
284 285 286 287 288
  int32_t ProcessPU(Instruction* instr,
                    int num_regs,
                    int operand_size,
                    intptr_t* start_address,
                    intptr_t* end_address);
289
  void HandleRList(Instruction* instr, bool load);
290
  void HandleVList(Instruction* inst);
291
  void SoftwareInterrupt(Instruction* instr);
292

293
  // Stop helper functions.
294
  inline bool isStopInstruction(Instruction* instr);
295 296 297 298 299 300 301
  inline bool isWatchedStop(uint32_t bkpt_code);
  inline bool isEnabledStop(uint32_t bkpt_code);
  inline void EnableStop(uint32_t bkpt_code);
  inline void DisableStop(uint32_t bkpt_code);
  inline void IncreaseStopCounter(uint32_t bkpt_code);
  void PrintStopInfo(uint32_t code);

302
  // Read and write memory.
303 304
  // The *Ex functions are exclusive access. The writes return the strex status:
  // 0 if the write succeeds, and 1 if the write fails.
305 306
  inline uint8_t ReadBU(int32_t addr);
  inline int8_t ReadB(int32_t addr);
307
  uint8_t ReadExBU(int32_t addr);
308 309
  inline void WriteB(int32_t addr, uint8_t value);
  inline void WriteB(int32_t addr, int8_t value);
310
  int WriteExB(int32_t addr, uint8_t value);
311

312 313
  inline uint16_t ReadHU(int32_t addr, Instruction* instr);
  inline int16_t ReadH(int32_t addr, Instruction* instr);
314
  uint16_t ReadExHU(int32_t addr, Instruction* instr);
315
  // Note: Overloaded on the sign of the value.
316 317
  inline void WriteH(int32_t addr, uint16_t value, Instruction* instr);
  inline void WriteH(int32_t addr, int16_t value, Instruction* instr);
318
  int WriteExH(int32_t addr, uint16_t value, Instruction* instr);
319

320
  inline int ReadW(int32_t addr, Instruction* instr);
321
  int ReadExW(int32_t addr, Instruction* instr);
322
  inline void WriteW(int32_t addr, int value, Instruction* instr);
323
  int WriteExW(int32_t addr, int value, Instruction* instr);
324

325 326 327
  int32_t* ReadDW(int32_t addr);
  void WriteDW(int32_t addr, int32_t value1, int32_t value2);

328
  // Executing is handled based on the instruction type.
329 330 331 332 333 334 335 336
  // Both type 0 and type 1 rolled into one.
  void DecodeType01(Instruction* instr);
  void DecodeType2(Instruction* instr);
  void DecodeType3(Instruction* instr);
  void DecodeType4(Instruction* instr);
  void DecodeType5(Instruction* instr);
  void DecodeType6(Instruction* instr);
  void DecodeType7(Instruction* instr);
337

338 339 340
  // CP15 coprocessor instructions.
  void DecodeTypeCP15(Instruction* instr);

341
  // Support for VFP.
342 343
  void DecodeTypeVFP(Instruction* instr);
  void DecodeType6CoprocessorIns(Instruction* instr);
344
  void DecodeSpecialCondition(Instruction* instr);
345

346 347 348
  void DecodeVMOVBetweenCoreAndSinglePrecisionRegisters(Instruction* instr);
  void DecodeVCMP(Instruction* instr);
  void DecodeVCVTBetweenDoubleAndSingle(Instruction* instr);
349 350
  int32_t ConvertDoubleToInt(double val, bool unsigned_integer,
                             VFPRoundingMode mode);
351
  void DecodeVCVTBetweenFloatingPointAndInteger(Instruction* instr);
352

353
  // Executes one instruction.
354
  void InstructionDecode(Instruction* instr);
355

356
  // ICache.
357 358 359 360 361 362
  static void CheckICache(base::CustomMatcherHashMap* i_cache,
                          Instruction* instr);
  static void FlushOnePage(base::CustomMatcherHashMap* i_cache, intptr_t start,
                           int size);
  static CachePage* GetCachePage(base::CustomMatcherHashMap* i_cache,
                                 void* page);
363

364
  // Runtime call support. Uses the isolate in a thread-safe way.
365
  static void* RedirectExternalReference(
366
      Isolate* isolate, void* external_function,
367
      v8::internal::ExternalReference::Type type);
368

369 370
  // Handle arguments and return value for runtime FP functions.
  void GetFpArgs(double* x, double* y, int32_t* z);
371 372 373
  void SetFpResult(const double& result);
  void TrashCallerSaveRegisters();

374 375 376 377 378 379
  template<class ReturnType, int register_size>
      ReturnType GetFromVFPRegister(int reg_index);

  template<class InputType, int register_size>
      void SetVFPRegister(int reg_index, const InputType& value);

380 381 382
  void SetSpecialRegister(SRegisterFieldMask reg_and_mask, uint32_t value);
  uint32_t GetFromSpecialRegister(SRegister reg);

383 384
  void CallInternal(byte* entry);

385
  // Architecture state.
386 387 388
  // Saturating instructions require a Q flag to indicate saturation.
  // There is currently no way to read the CPSR directly, and thus read the Q
  // flag, so this is left unimplemented.
389 390 391 392 393 394
  int32_t registers_[16];
  bool n_flag_;
  bool z_flag_;
  bool c_flag_;
  bool v_flag_;

395
  // VFP architecture state.
396
  unsigned int vfp_registers_[num_d_registers * 2];
397 398 399 400 401
  bool n_flag_FPSCR_;
  bool z_flag_FPSCR_;
  bool c_flag_FPSCR_;
  bool v_flag_FPSCR_;

402
  // VFP rounding mode. See ARM DDI 0406B Page A2-29.
403
  VFPRoundingMode FPSCR_rounding_mode_;
404
  bool FPSCR_default_NaN_mode_;
405

406 407 408 409 410 411 412
  // VFP FP exception flags architecture state.
  bool inv_op_vfp_flag_;
  bool div_zero_vfp_flag_;
  bool overflow_vfp_flag_;
  bool underflow_vfp_flag_;
  bool inexact_vfp_flag_;

413
  // Simulator support.
414 415 416 417
  char* stack_;
  bool pc_modified_;
  int icount_;

418 419 420
  // Debugger input.
  char* last_debugger_input_;

421
  // Icache simulation
422
  base::CustomMatcherHashMap* i_cache_;
423

424
  // Registered breakpoints.
425 426
  Instruction* break_pc_;
  Instr break_instr_;
427

428 429
  v8::internal::Isolate* isolate_;

430 431 432 433 434 435 436 437
  // A stop is watched if its code is less than kNumOfWatchedStops.
  // Only watched stops support enabling/disabling and the counter feature.
  static const uint32_t kNumOfWatchedStops = 256;

  // Breakpoint is disabled if bit 31 is set.
  static const uint32_t kStopDisabledBit = 1 << 31;

  // A stop is enabled, meaning the simulator will stop when meeting the
438 439
  // instruction, if bit 31 of watched_stops_[code].count is unset.
  // The value watched_stops_[code].count & ~(1 << 31) indicates how many times
440
  // the breakpoint was hit or gone through.
441
  struct StopCountAndDesc {
442 443 444
    uint32_t count;
    char* desc;
  };
445
  StopCountAndDesc watched_stops_[kNumOfWatchedStops];
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533

  // Syncronization primitives. See ARM DDI 0406C.b, A2.9.
  enum class MonitorAccess {
    Open,
    Exclusive,
  };

  enum class TransactionSize {
    None = 0,
    Byte = 1,
    HalfWord = 2,
    Word = 4,
  };

  // The least-significant bits of the address are ignored. The number of bits
  // is implementation-defined, between 3 and 11. See ARM DDI 0406C.b, A3.4.3.
  static const int32_t kExclusiveTaggedAddrMask = ~((1 << 11) - 1);

  class LocalMonitor {
   public:
    LocalMonitor();

    // These functions manage the state machine for the local monitor, but do
    // not actually perform loads and stores. NotifyStoreExcl only returns
    // true if the exclusive store is allowed; the global monitor will still
    // have to be checked to see whether the memory should be updated.
    void NotifyLoad(int32_t addr);
    void NotifyLoadExcl(int32_t addr, TransactionSize size);
    void NotifyStore(int32_t addr);
    bool NotifyStoreExcl(int32_t addr, TransactionSize size);

   private:
    void Clear();

    MonitorAccess access_state_;
    int32_t tagged_addr_;
    TransactionSize size_;
  };

  class GlobalMonitor {
   public:
    GlobalMonitor();

    class Processor {
     public:
      Processor();

     private:
      friend class GlobalMonitor;
      // These functions manage the state machine for the global monitor, but do
      // not actually perform loads and stores.
      void Clear_Locked();
      void NotifyLoadExcl_Locked(int32_t addr);
      void NotifyStore_Locked(int32_t addr, bool is_requesting_processor);
      bool NotifyStoreExcl_Locked(int32_t addr, bool is_requesting_processor);

      MonitorAccess access_state_;
      int32_t tagged_addr_;
      Processor* next_;
      Processor* prev_;
      // A strex can fail due to background cache evictions. Rather than
      // simulating this, we'll just occasionally introduce cases where an
      // exclusive store fails. This will happen once after every
      // kMaxFailureCounter exclusive stores.
      static const int kMaxFailureCounter = 5;
      int failure_counter_;
    };

    // Exposed so it can be accessed by Simulator::{Read,Write}Ex*.
    base::Mutex mutex;

    void NotifyLoadExcl_Locked(int32_t addr, Processor* processor);
    void NotifyStore_Locked(int32_t addr, Processor* processor);
    bool NotifyStoreExcl_Locked(int32_t addr, Processor* processor);

    // Called when the simulator is destroyed.
    void RemoveProcessor(Processor* processor);

   private:
    bool IsProcessorInLinkedList_Locked(Processor* processor) const;
    void PrependProcessor_Locked(Processor* processor);

    Processor* head_;
  };

  LocalMonitor local_monitor_;
  GlobalMonitor::Processor global_monitor_processor_;
  static base::LazyInstance<GlobalMonitor>::type global_monitor_;
534 535
};

536 537 538

// When running with the simulator transition into simulated execution at this
// point.
539 540
#define CALL_GENERATED_CODE(isolate, entry, p0, p1, p2, p3, p4) \
  reinterpret_cast<Object*>(Simulator::current(isolate)->Call(  \
541 542
      FUNCTION_ADDR(entry), 5, p0, p1, p2, p3, p4))

543 544
#define CALL_GENERATED_FP_INT(isolate, entry, p0, p1) \
  Simulator::current(isolate)->CallFPReturnsInt(FUNCTION_ADDR(entry), p0, p1)
545

546 547
#define CALL_GENERATED_REGEXP_CODE(isolate, entry, p0, p1, p2, p3, p4, p5, p6, \
                                   p7, p8)                                     \
548 549
  Simulator::current(isolate)->Call(entry, 9, p0, p1, p2, p3, p4, p5, p6, p7,  \
                                    p8)
550

551
// The simulator has its own stack. Thus it has a different stack limit from
552 553 554
// the C-based native code.  The JS-based limit normally points near the end of
// the simulator stack.  When the C-based limit is exhausted we reflect that by
// lowering the JS-based limit as well, to make stack checks trigger.
555 556
class SimulatorStack : public v8::internal::AllStatic {
 public:
557 558
  static inline uintptr_t JsLimitFromCLimit(v8::internal::Isolate* isolate,
                                            uintptr_t c_limit) {
559
    return Simulator::current(isolate)->StackLimit(c_limit);
560
  }
561

562 563 564
  static inline uintptr_t RegisterCTryCatch(v8::internal::Isolate* isolate,
                                            uintptr_t try_catch_address) {
    Simulator* sim = Simulator::current(isolate);
565 566 567
    return sim->PushAddress(try_catch_address);
  }

568 569
  static inline void UnregisterCTryCatch(v8::internal::Isolate* isolate) {
    Simulator::current(isolate)->PopAddress();
570
  }
571 572
};

573 574
}  // namespace internal
}  // namespace v8
575

576
#endif  // !defined(USE_SIMULATOR)
577
#endif  // V8_ARM_SIMULATOR_ARM_H_