Commit 3cd6705f authored by Clemens Hammacher's avatar Clemens Hammacher Committed by Commit Bot

[wasm][x64] Use near jump in jump table

On x64, we allocate one big code region such that we can use near jumps
and near calls. The jump table did not make use of that design yet.
This CL changes that by emitting jump table slots as near jumps. This
also speeds up patching jump table slots significantly, since far jumps
populate the inline constant pool, which is unneeded overhead in this
case.
As a drive-by, this CL cleans up the API of near_call and near_jmp. The
current semantics is broken, and only works because this is only used
for WebAssembly calls which are patched anyway after code generation.
Also, x64 now uses the same path in test-jump-table-assembler.cc as
arm64 to ensure that all targets are within near-call-distance.

R=mstarzinger@chromium.org

Bug: v8:8916
Change-Id: Iffc34e248b72167307ffdab62dd2212c4ae86a32
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1561313Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#60777}
parent cdc7bd61
......@@ -45,8 +45,12 @@ void JumpTableAssembler::EmitRuntimeStubSlot(Address builtin_target) {
}
void JumpTableAssembler::EmitJumpSlot(Address target) {
movq(kScratchRegister, static_cast<uint64_t>(target));
jmp(kScratchRegister);
// On x64, all code is allocated within a single code section, so we can use
// relative jumps.
static_assert(kMaxWasmCodeMemory <= size_t{2} * GB, "can use relative jump");
intptr_t displacement = static_cast<intptr_t>(
reinterpret_cast<byte*>(target) - pc_ - kNearJmpInstrSize);
near_jmp(displacement, RelocInfo::NONE);
}
void JumpTableAssembler::NopBytes(int bytes) {
......@@ -125,6 +129,9 @@ void JumpTableAssembler::EmitJumpSlot(Address target) {
// TODO(wasm): Currently this is guaranteed to be a {near_call} and hence is
// patchable concurrently. Once {kMaxWasmCodeMemory} is raised on ARM64, make
// sure concurrent patching is still supported.
DCHECK(TurboAssembler::IsNearCallOffset(
(reinterpret_cast<byte*>(target) - pc_) / kInstrSize));
Jump(target, RelocInfo::NONE);
}
......
......@@ -1123,22 +1123,20 @@ void Assembler::call(Handle<Code> target, RelocInfo::Mode rmode) {
emitl(code_target_index);
}
void Assembler::near_call(Address addr, RelocInfo::Mode rmode) {
void Assembler::near_call(intptr_t disp, RelocInfo::Mode rmode) {
EnsureSpace ensure_space(this);
emit(0xE8);
intptr_t value = static_cast<intptr_t>(addr);
DCHECK(is_int32(value));
DCHECK(is_int32(disp));
RecordRelocInfo(rmode);
emitl(static_cast<int32_t>(value));
emitl(static_cast<int32_t>(disp));
}
void Assembler::near_jmp(Address addr, RelocInfo::Mode rmode) {
void Assembler::near_jmp(intptr_t disp, RelocInfo::Mode rmode) {
EnsureSpace ensure_space(this);
emit(0xE9);
intptr_t value = static_cast<intptr_t>(addr);
DCHECK(is_int32(value));
RecordRelocInfo(rmode);
emitl(static_cast<int32_t>(value));
DCHECK(is_int32(disp));
if (!RelocInfo::IsNone(rmode)) RecordRelocInfo(rmode);
emitl(static_cast<int32_t>(disp));
}
void Assembler::call(Register adr) {
......
......@@ -733,8 +733,13 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// Call near relative 32-bit displacement, relative to next instruction.
void call(Label* L);
void call(Address entry, RelocInfo::Mode rmode);
void near_call(Address entry, RelocInfo::Mode rmode);
void near_jmp(Address entry, RelocInfo::Mode rmode);
// Explicitly emit a near call / near jump. The displacement is relative to
// the next instructions (which starts at {pc_offset() + kNearJmpInstrSize}).
static constexpr int kNearJmpInstrSize = 5;
void near_call(intptr_t disp, RelocInfo::Mode rmode);
void near_jmp(intptr_t disp, RelocInfo::Mode rmode);
void call(Handle<Code> target,
RelocInfo::Mode rmode = RelocInfo::CODE_TARGET);
......
......@@ -32,7 +32,7 @@ constexpr int kJumpTableSlotCount = 128;
constexpr uint32_t kJumpTableSize =
JumpTableAssembler::SizeForNumberOfSlots(kJumpTableSlotCount);
#if V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
constexpr uint32_t kAvailableBufferSlots =
(kMaxWasmCodeMemory - kJumpTableSize) / AssemblerBase::kMinimalBufferSize;
constexpr uint32_t kBufferSlotStartOffset =
......@@ -45,7 +45,7 @@ Address GenerateJumpTableThunk(
Address jump_target, byte* thunk_slot_buffer,
std::bitset<kAvailableBufferSlots>* used_slots,
std::vector<std::unique_ptr<TestingAssemblerBuffer>>* thunk_buffers) {
#if V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
// To guarantee that the branch range lies within the near-call range,
// generate the thunk in the same (kMaxWasmCodeMemory-sized) buffer as the
// jump_target itself.
......@@ -65,9 +65,6 @@ Address GenerateJumpTableThunk(
byte* buffer =
thunk_slot_buffer + buffer_index * AssemblerBase::kMinimalBufferSize;
DCHECK(TurboAssembler::IsNearCallOffset(
(reinterpret_cast<byte*>(jump_target) - buffer) / kInstrSize));
#else
USE(thunk_slot_buffer);
USE(used_slots);
......@@ -202,7 +199,7 @@ class JumpTablePatcher : public v8::base::Thread {
TEST(JumpTablePatchingStress) {
constexpr int kNumberOfRunnerThreads = 5;
#if V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_X64
// We need the branches (from GenerateJumpTableThunk) to be within near-call
// range of the jump table slots. The address hint to AllocateAssemblerBuffer
// is not reliable enough to guarantee that we can always achieve this with
......@@ -218,6 +215,7 @@ TEST(JumpTablePatchingStress) {
auto buffer = AllocateAssemblerBuffer(kJumpTableSize);
byte* thunk_slot_buffer = nullptr;
#endif
std::bitset<kAvailableBufferSlots> used_thunk_slots;
buffer->MakeWritableAndExecutable();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment