jump-table-assembler.cc 10.5 KB
Newer Older
1 2 3 4 5 6
// Copyright 2018 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "src/wasm/jump-table-assembler.h"

7 8
#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler-inl.h"
9 10 11 12 13

namespace v8 {
namespace internal {
namespace wasm {

14 15 16 17 18 19
// The implementation is compact enough to implement it inline here. If it gets
// much bigger, we might want to split it in a separate file per architecture.
#if V8_TARGET_ARCH_X64
void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
                                                 Address lazy_compile_target) {
  // Use a push, because mov to an extended register takes 6 bytes.
20 21
  pushq_imm32(func_index);            // 5 bytes
  EmitJumpSlot(lazy_compile_target);  // 5 bytes
22 23
}

24
bool JumpTableAssembler::EmitJumpSlot(Address target) {
25 26
  intptr_t displacement = static_cast<intptr_t>(
      reinterpret_cast<byte*>(target) - pc_ - kNearJmpInstrSize);
27 28 29
  if (!is_int32(displacement)) return false;
  near_jmp(displacement, RelocInfo::NONE);  // 5 bytes
  return true;
30 31
}

32 33 34 35 36
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
  Label data;
  int start_offset = pc_offset();
  jmp(Operand(&data));  // 6 bytes
  Nop(2);               // 2 bytes
37 38 39
  // The data must be properly aligned, so it can be patched atomically (see
  // {PatchFarJumpSlot}).
  DCHECK_EQ(start_offset + kSystemPointerSize, pc_offset());
40 41 42 43 44
  USE(start_offset);
  bind(&data);
  dq(target);  // 8 bytes
}

45 46 47 48 49 50 51 52 53 54 55 56
// static
void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
  // The slot needs to be pointer-size aligned so we can atomically update it.
  DCHECK(IsAligned(slot, kSystemPointerSize));
  // Offset of the target is at 8 bytes, see {EmitFarJumpSlot}.
  reinterpret_cast<std::atomic<Address>*>(slot + kSystemPointerSize)
      ->store(target, std::memory_order_relaxed);
  // The update is atomic because the address is properly aligned.
  // Because of cache coherence, the data update will eventually be seen by all
  // cores. It's ok if they temporarily jump to the old target.
}

57 58 59 60 61 62 63 64
void JumpTableAssembler::NopBytes(int bytes) {
  DCHECK_LE(0, bytes);
  Nop(bytes);
}

#elif V8_TARGET_ARCH_IA32
void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
                                                 Address lazy_compile_target) {
65
  mov(kWasmCompileLazyFuncIndexRegister, func_index);  // 5 bytes
66
  jmp(lazy_compile_target, RelocInfo::NONE);           // 5 bytes
67 68
}

69
bool JumpTableAssembler::EmitJumpSlot(Address target) {
70
  jmp(target, RelocInfo::NONE);
71
  return true;
72 73
}

74
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
75 76 77
  jmp(target, RelocInfo::NONE);
}

78 79 80 81 82
// static
void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
  UNREACHABLE();
}

83 84 85 86 87 88 89 90
void JumpTableAssembler::NopBytes(int bytes) {
  DCHECK_LE(0, bytes);
  Nop(bytes);
}

#elif V8_TARGET_ARCH_ARM
void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
                                                 Address lazy_compile_target) {
91
  // Load function index to a register.
92 93
  // This generates [movw, movt] on ARMv7 and later, [ldr, constant pool marker,
  // constant] on ARMv6.
94
  Move32BitImmediate(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
95 96 97 98 99 100
  // EmitJumpSlot emits either [b], [movw, movt, mov] (ARMv7+), or [ldr,
  // constant].
  // In total, this is <=5 instructions on all architectures.
  // TODO(arm): Optimize this for code size; lazy compile is not performance
  // critical, as it's only executed once per function.
  EmitJumpSlot(lazy_compile_target);
101 102
}

103
bool JumpTableAssembler::EmitJumpSlot(Address target) {
104 105 106
  // Note that {Move32BitImmediate} emits [ldr, constant] for the relocation
  // mode used below, we need this to allow concurrent patching of this slot.
  Move32BitImmediate(pc, Operand(target, RelocInfo::WASM_CALL));
107
  CheckConstPool(true, false);  // force emit of const pool
108
  return true;
109 110
}

111 112 113 114 115 116 117 118 119
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
  // Load from [pc + kInstrSize] to pc. Note that {pc} points two instructions
  // after the currently executing one.
  ldr_pcrel(pc, -kInstrSize);  // 1 instruction
  dd(target);                  // 4 bytes (== 1 instruction)
  STATIC_ASSERT(kInstrSize == kInt32Size);
  STATIC_ASSERT(kFarJumpTableSlotSize == 2 * kInstrSize);
}

120 121 122 123 124
// static
void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
  UNREACHABLE();
}

125 126 127 128 129 130 131 132 133 134 135
void JumpTableAssembler::NopBytes(int bytes) {
  DCHECK_LE(0, bytes);
  DCHECK_EQ(0, bytes % kInstrSize);
  for (; bytes > 0; bytes -= kInstrSize) {
    nop();
  }
}

#elif V8_TARGET_ARCH_ARM64
void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
                                                 Address lazy_compile_target) {
136
  int start = pc_offset();
137
  CodeEntry();                                             // 0-1 instr
138 139 140 141 142
  Mov(kWasmCompileLazyFuncIndexRegister.W(), func_index);  // 1-2 instr
  Jump(lazy_compile_target, RelocInfo::NONE);              // 1 instr
  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
  DCHECK(nop_bytes == 0 || nop_bytes == kInstrSize);
  if (nop_bytes) nop();
143 144
}

145 146 147 148 149 150
bool JumpTableAssembler::EmitJumpSlot(Address target) {
  if (!TurboAssembler::IsNearCallOffset(
          (reinterpret_cast<byte*>(target) - pc_) / kInstrSize)) {
    return false;
  }

151 152
  CodeEntry();

153
  Jump(target, RelocInfo::NONE);
154
  return true;
155 156 157
}

void JumpTableAssembler::EmitFarJumpSlot(Address target) {
158 159 160 161 162
  // This code uses hard-coded registers and instructions (and avoids
  // {UseScratchRegisterScope} or {InstructionAccurateScope}) because this code
  // will only be called for the very specific runtime slot table, and we want
  // to have maximum control over the generated code.
  // Do not reuse this code without validating that the same assumptions hold.
163
  CodeEntry();  // 0-1 instructions
164 165
  constexpr Register kTmpReg = x16;
  DCHECK(TmpList()->IncludesAliasOf(kTmpReg));
166 167 168 169 170 171 172 173
  int kOffset = ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 3 : 2;
  // Load from [pc + kOffset * kInstrSize] to {kTmpReg}, then branch there.
  ldr_pcrel(kTmpReg, kOffset);  // 1 instruction
  br(kTmpReg);                  // 1 instruction
#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
  nop();       // To keep the target below aligned to kSystemPointerSize.
#endif
  dq(target);  // 8 bytes (== 2 instructions)
174
  STATIC_ASSERT(2 * kInstrSize == kSystemPointerSize);
175 176
  const int kSlotCount = ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 6 : 4;
  STATIC_ASSERT(kFarJumpTableSlotSize == kSlotCount * kInstrSize);
177 178
}

179 180
// static
void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
181 182 183 184
  // See {EmitFarJumpSlot} for the offset of the target (16 bytes with
  // CFI enabled, 8 bytes otherwise).
  int kTargetOffset =
      ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 4 * kInstrSize : 2 * kInstrSize;
185
  // The slot needs to be pointer-size aligned so we can atomically update it.
186 187
  DCHECK(IsAligned(slot + kTargetOffset, kSystemPointerSize));
  reinterpret_cast<std::atomic<Address>*>(slot + kTargetOffset)
188 189 190 191 192 193 194
      ->store(target, std::memory_order_relaxed);
  // The data update is guaranteed to be atomic since it's a properly aligned
  // and stores a single machine word. This update will eventually be observed
  // by any concurrent [ldr] on the same address because of the data cache
  // coherence. It's ok if other cores temporarily jump to the old target.
}

195 196
void JumpTableAssembler::NopBytes(int bytes) {
  DCHECK_LE(0, bytes);
197 198
  DCHECK_EQ(0, bytes % kInstrSize);
  for (; bytes > 0; bytes -= kInstrSize) {
199 200 201 202
    nop();
  }
}

203
#elif V8_TARGET_ARCH_S390X
204 205 206
void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
                                                 Address lazy_compile_target) {
  // Load function index to r7. 6 bytes
207
  lgfi(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
208 209 210 211 212
  // Jump to {lazy_compile_target}. 6 bytes or 12 bytes
  mov(r1, Operand(lazy_compile_target));
  b(r1);  // 2 bytes
}

213
bool JumpTableAssembler::EmitJumpSlot(Address target) {
214 215
  mov(r1, Operand(target));
  b(r1);
216
  return true;
217 218
}

219 220 221 222
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
  JumpToInstructionStream(target);
}

223 224 225 226 227
// static
void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
  UNREACHABLE();
}

228 229 230 231 232 233 234 235
void JumpTableAssembler::NopBytes(int bytes) {
  DCHECK_LE(0, bytes);
  DCHECK_EQ(0, bytes % 2);
  for (; bytes > 0; bytes -= 2) {
    nop(0);
  }
}

236 237 238
#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
                                                 Address lazy_compile_target) {
239
  int start = pc_offset();
240
  li(kWasmCompileLazyFuncIndexRegister, func_index);  // max. 2 instr
241 242 243
  // Jump produces max. 4 instructions for 32-bit platform
  // and max. 6 instructions for 64-bit platform.
  Jump(lazy_compile_target, RelocInfo::NONE);
244 245 246
  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
  DCHECK_EQ(nop_bytes % kInstrSize, 0);
  for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
247 248
}

249
bool JumpTableAssembler::EmitJumpSlot(Address target) {
250
  PatchAndJump(target);
251
  return true;
252 253
}

254 255 256 257
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
  JumpToInstructionStream(target);
}

258 259 260 261 262
// static
void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
  UNREACHABLE();
}

263 264 265 266 267 268 269 270
void JumpTableAssembler::NopBytes(int bytes) {
  DCHECK_LE(0, bytes);
  DCHECK_EQ(0, bytes % kInstrSize);
  for (; bytes > 0; bytes -= kInstrSize) {
    nop();
  }
}

271
#elif V8_TARGET_ARCH_PPC64
272 273
void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
                                                 Address lazy_compile_target) {
274
  int start = pc_offset();
275 276
  // Load function index to register. max 5 instrs
  mov(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
277 278 279 280
  // Jump to {lazy_compile_target}. max 5 instrs
  mov(r0, Operand(lazy_compile_target));
  mtctr(r0);
  bctr();
281 282 283
  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
  DCHECK_EQ(nop_bytes % kInstrSize, 0);
  for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
284 285
}

286
bool JumpTableAssembler::EmitJumpSlot(Address target) {
287 288 289
  mov(r0, Operand(target));
  mtctr(r0);
  bctr();
290
  return true;
291 292
}

293 294 295 296
void JumpTableAssembler::EmitFarJumpSlot(Address target) {
  JumpToInstructionStream(target);
}

297 298 299 300 301
// static
void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
  UNREACHABLE();
}

302 303 304 305 306 307 308 309
void JumpTableAssembler::NopBytes(int bytes) {
  DCHECK_LE(0, bytes);
  DCHECK_EQ(0, bytes % 4);
  for (; bytes > 0; bytes -= 4) {
    nop(0);
  }
}

310
#else
311
#error Unknown architecture.
312 313
#endif

314 315 316
}  // namespace wasm
}  // namespace internal
}  // namespace v8