Commit c4179d7c authored by Georgia Kouveli's avatar Georgia Kouveli Committed by Commit Bot

[arm64] Use direct calls for WebAssembly

This requires changing the way stubs and builtins are encoded in tags, as for
arm64 we only have 26 bits to encode a PC-relative offset. With the previous
encoding scheme the builtin ids were shifted by 16 bits and ended up exceeding
this range.

Change-Id: I0f396390a622ea67b890d2dd47ca12e00092e204
Reviewed-on: https://chromium-review.googlesource.com/1059209
Commit-Queue: Georgia Kouveli <georgia.kouveli@arm.com>
Reviewed-by: 's avatarBen Titzer <titzer@chromium.org>
Reviewed-by: 's avatarClemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#53262}
parent 5330111d
......@@ -697,10 +697,6 @@ class Assembler : public AssemblerBase {
Address pc, Address target,
RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
// TODO(arm64): This is only needed until direct calls are supported in
// WebAssembly for ARM64.
void set_code_in_js_code_space(bool) {}
// Here we are patching the address in the constant pool, not the actual call
// instruction. The address in the constant pool is the same size as a
// pointer.
......
......@@ -554,7 +554,6 @@ Assembler::Assembler(IsolateData isolate_data, void* buffer, int buffer_size)
const_pool_blocked_nesting_ = 0;
veneer_pool_blocked_nesting_ = 0;
code_target_sharing_blocked_nesting_ = 0;
near_branches_allowed_ = true;
Reset();
}
......@@ -598,20 +597,13 @@ void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
case HeapObjectRequest::kCodeStub: {
request.code_stub()->set_isolate(isolate);
Instruction* instr = reinterpret_cast<Instruction*>(pc);
// TODO(arm64): Only keep the else part when direct calls are supported
// for WebAssembly.
if (instr->IsLdrLiteralX()) {
Memory::Address_at(target_pointer_address_at(pc)) =
request.code_stub()->GetCode().address();
} else {
DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
DCHECK_GE(instr->ImmPCOffset(), 0);
DCHECK_EQ(instr->ImmPCOffset() % kInstructionSize, 0);
DCHECK_LT(instr->ImmPCOffset() >> kInstructionSizeLog2,
code_targets_.size());
code_targets_[instr->ImmPCOffset() >> kInstructionSizeLog2] =
request.code_stub()->GetCode();
}
DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
DCHECK_GE(instr->ImmPCOffset(), 0);
DCHECK_EQ(instr->ImmPCOffset() % kInstructionSize, 0);
DCHECK_LT(instr->ImmPCOffset() >> kInstructionSizeLog2,
code_targets_.size());
code_targets_[instr->ImmPCOffset() >> kInstructionSizeLog2] =
request.code_stub()->GetCode();
break;
}
}
......
......@@ -961,16 +961,6 @@ class Assembler : public AssemblerBase {
// for the input HeapObjectRequest.
void near_call(HeapObjectRequest request);
// TODO(arm64): This is only needed until direct calls are supported in
// WebAssembly for ARM64.
// Only allow near calls and jumps when the code is in the JavaScript code
// space.
void set_code_in_js_code_space(bool value) {
set_near_branches_allowed(value);
}
void set_near_branches_allowed(bool value) { near_branches_allowed_ = value; }
bool near_branches_allowed() const { return near_branches_allowed_; }
// Return the address in the constant pool of the code target address used by
// the branch/call instruction at pc.
inline static Address target_pointer_address_at(Address pc);
......@@ -3189,22 +3179,6 @@ class Assembler : public AssemblerBase {
DISALLOW_IMPLICIT_CONSTRUCTORS(BlockConstPoolScope);
};
// Class for disabling near calls and jumps. This scope does not deal with
// nesting.
class FarBranchesOnlyScope {
public:
explicit FarBranchesOnlyScope(Assembler* assem) : assem_(assem) {
DCHECK(assem_->near_branches_allowed());
assem_->set_near_branches_allowed(false);
}
~FarBranchesOnlyScope() { assem_->set_near_branches_allowed(true); }
private:
Assembler* assem_;
DISALLOW_IMPLICIT_CONSTRUCTORS(FarBranchesOnlyScope);
};
// Check if is time to emit a constant pool.
void CheckConstPool(bool force_emit, bool require_jump);
......@@ -3562,9 +3536,6 @@ class Assembler : public AssemblerBase {
return code_target_sharing_blocked_nesting_ == 0;
}
// Allow generation of near calls and jumps.
bool near_branches_allowed_;
// Relocation info generation
// Each relocation is encoded as a variable size value
static constexpr int kMaxRelocSize = RelocInfoWriter::kMaxSize;
......
......@@ -1749,18 +1749,10 @@ void TurboAssembler::CallStubDelayed(CodeStub* stub) {
Label start_call;
Bind(&start_call);
#endif
if (near_branches_allowed()) {
Operand operand = Operand::EmbeddedCode(stub);
near_call(operand.heap_object_request());
} else {
UseScratchRegisterScope temps(this);
Register temp = temps.AcquireX();
Ldr(temp, Operand::EmbeddedCode(stub));
Blr(temp);
}
Operand operand = Operand::EmbeddedCode(stub);
near_call(operand.heap_object_request());
#ifdef DEBUG
AssertSizeOfCodeGeneratedSince(
&start_call, near_branches_allowed() ? kNearCallSize : kFarCallSize);
AssertSizeOfCodeGeneratedSince(&start_call, kNearCallSize);
#endif
}
......@@ -1966,13 +1958,29 @@ void TurboAssembler::JumpHelper(int64_t offset, RelocInfo::Mode rmode,
Bind(&done);
}
namespace {
// The calculated offset is either:
// * the 'target' input unmodified if this is a WASM call, or
// * the offset of the target from the current PC, in instructions, for any
// other type of call.
static int64_t CalculateTargetOffset(Address target, RelocInfo::Mode rmode,
byte* pc) {
int64_t offset = static_cast<int64_t>(target);
// The target of WebAssembly calls is still an index instead of an actual
// address at this point, and needs to be encoded as-is.
if (rmode != RelocInfo::WASM_CALL) {
offset -= reinterpret_cast<int64_t>(pc);
DCHECK_EQ(offset % kInstructionSize, 0);
offset = offset / static_cast<int>(kInstructionSize);
}
return offset;
}
} // namespace
void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode,
Condition cond) {
int64_t offset =
static_cast<int64_t>(target) - reinterpret_cast<int64_t>(pc_);
DCHECK_EQ(offset % kInstructionSize, 0);
offset = offset / static_cast<int>(kInstructionSize);
JumpHelper(offset, rmode, cond);
JumpHelper(CalculateTargetOffset(target, rmode, pc_), rmode, cond);
}
void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
......@@ -2019,9 +2027,7 @@ void TurboAssembler::Call(Address target, RelocInfo::Mode rmode) {
#endif
if (CanUseNearCallOrJump(rmode)) {
int64_t offset =
static_cast<int64_t>(target) - reinterpret_cast<int64_t>(pc_);
offset = offset / static_cast<int>(kInstructionSize);
int64_t offset = CalculateTargetOffset(target, rmode, pc_);
DCHECK(IsNearCallOffset(offset));
near_call(static_cast<int>(offset), rmode);
} else {
......
......@@ -220,10 +220,12 @@ class TurboAssembler : public Assembler {
bool allow_macro_instructions() const { return allow_macro_instructions_; }
#endif
// We should avoid using near calls or jumps when generating WebAssembly code
// or calls to WebAssembly from JavaScript.
// We should not use near calls or jumps for JS->WASM calls and calls to
// external references, since the code spaces are not guaranteed to be close
// to each other.
bool CanUseNearCallOrJump(RelocInfo::Mode rmode) {
return near_branches_allowed() && rmode != RelocInfo::JS_TO_WASM_CALL;
return rmode != RelocInfo::JS_TO_WASM_CALL &&
rmode != RelocInfo::EXTERNAL_REFERENCE;
}
// Activation support.
......
......@@ -3033,8 +3033,6 @@ void Builtins::Generate_ArgumentsAdaptorTrampoline(MacroAssembler* masm) {
}
void Builtins::Generate_WasmCompileLazy(MacroAssembler* masm) {
// TODO(arm64): Remove when direct calls are supported for WebAssembly.
Assembler::FarBranchesOnlyScope br_scope(masm);
{
FrameScope scope(masm, StackFrame::INTERNAL);
......
......@@ -90,12 +90,6 @@ CodeGenerator::CodeGenerator(Zone* codegen_zone, Frame* frame, Linkage* linkage,
code_kind == Code::WASM_FUNCTION) {
tasm_.enable_serializer();
}
// TODO(arm64): Remove when direct calls are supported in WebAssembly for
// ARM64.
tasm_.set_code_in_js_code_space(code_kind != Code::WASM_FUNCTION &&
code_kind != Code::WASM_TO_JS_FUNCTION &&
code_kind != Code::WASM_INTERPRETER_ENTRY &&
code_kind != Code::C_WASM_ENTRY);
}
bool CodeGenerator::wasm_runtime_exception_support() const {
......
......@@ -179,7 +179,12 @@ constexpr int kElidedFrameSlots = 0;
#endif
constexpr int kDoubleSizeLog2 = 3;
#if V8_TARGET_ARCH_ARM64
// ARM64 only supports direct calls within a 128 MB range.
constexpr size_t kMaxWasmCodeMemory = 128 * MB;
#else
constexpr size_t kMaxWasmCodeMemory = 256 * MB;
#endif
#if V8_HOST_ARCH_64_BIT
constexpr int kPointerSizeLog2 = 3;
......
......@@ -535,10 +535,6 @@ class Assembler : public AssemblerBase {
Address pc, Address target,
RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
// TODO(arm64): This is only needed until direct calls are supported in
// WebAssembly for ARM64.
void set_code_in_js_code_space(bool) {}
static constexpr int kSpecialTargetSize = kPointerSize;
// Distance between the address of the code target in the call instruction
......
......@@ -606,10 +606,6 @@ class Assembler : public AssemblerBase {
Address pc, Address target,
RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
// TODO(arm64): This is only needed until direct calls are supported in
// WebAssembly for ARM64.
void set_code_in_js_code_space(bool) {}
// Size of an instruction.
static constexpr int kInstrSize = sizeof(Instr);
......
......@@ -616,10 +616,6 @@ class Assembler : public AssemblerBase {
Address pc, Address target,
RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
// TODO(arm64): This is only needed until direct calls are supported in
// WebAssembly for ARM64.
void set_code_in_js_code_space(bool) {}
// Size of an instruction.
static constexpr int kInstrSize = sizeof(Instr);
......
......@@ -599,10 +599,6 @@ class Assembler : public AssemblerBase {
Address pc, Address target,
RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
// TODO(arm64): This is only needed until direct calls are supported in
// WebAssembly for ARM64.
void set_code_in_js_code_space(bool) {}
// Size of an instruction.
static constexpr int kInstrSize = sizeof(Instr);
......
......@@ -575,10 +575,6 @@ class Assembler : public AssemblerBase {
Address pc, Address target,
RelocInfo::Mode mode = RelocInfo::INTERNAL_REFERENCE);
// TODO(arm64): This is only needed until direct calls are supported in
// WebAssembly for ARM64.
void set_code_in_js_code_space(bool) {}
// Here we are patching the address in the IIHF/IILF instruction pair.
// These values are used in the serialization process and must be zero for
// S390 platform, as Code, Embedded Object or External-reference pointers
......
......@@ -337,11 +337,7 @@ void LiftoffAssembler::CacheState::Split(const CacheState& source) {
// TODO(clemensh): Provide a reasonably sized buffer, based on wasm function
// size.
LiftoffAssembler::LiftoffAssembler(Isolate* isolate)
: TurboAssembler(isolate, nullptr, 0, CodeObjectRequired::kNo) {
// TODO(arm64): Remove when direct calls are supported in WebAssembly for
// ARM64.
set_code_in_js_code_space(false);
}
: TurboAssembler(isolate, nullptr, 0, CodeObjectRequired::kNo) {}
LiftoffAssembler::~LiftoffAssembler() {
if (num_locals_ > kInlineLocalTypes) {
......
......@@ -14,6 +14,7 @@
#include "src/codegen.h"
#include "src/disassembler.h"
#include "src/globals.h"
#include "src/macro-assembler-inl.h"
#include "src/macro-assembler.h"
#include "src/objects-inl.h"
#include "src/wasm/function-compiler.h"
......@@ -60,6 +61,17 @@ void GenerateJumpTrampoline(MacroAssembler* masm, Address target) {
__ b(ip);
}
#undef __
#elif V8_TARGET_ARCH_ARM64
#define __ masm->
constexpr bool kModuleCanAllocateMoreMemory = false;
void GenerateJumpTrampoline(MacroAssembler* masm, Address target) {
UseScratchRegisterScope temps(masm);
Register scratch = temps.AcquireX();
__ Mov(scratch, reinterpret_cast<uint64_t>(target));
__ Br(scratch);
}
#undef __
#else
const bool kModuleCanAllocateMoreMemory = true;
#endif
......@@ -575,7 +587,7 @@ WasmCode* NativeModule::AddCode(
return ret;
}
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_S390X
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_S390X || V8_TARGET_ARCH_ARM64
Address NativeModule::CreateTrampolineTo(Handle<Code> code) {
MacroAssembler masm(code->GetIsolate(), nullptr, 0, CodeObjectRequired::kNo);
Address dest = code->raw_instruction_start();
......
......@@ -120,9 +120,6 @@ class Reader {
constexpr size_t kVersionSize = 4 * sizeof(uint32_t);
// Start from 1 so an encoded stub id is not confused with an encoded builtin.
constexpr int kFirstStubId = 1;
void WriteVersion(Isolate* isolate, Writer* writer) {
writer->Write(SerializedData::ComputeMagicNumber(
isolate->heap()->external_reference_table()));
......@@ -141,11 +138,22 @@ bool IsSupportedVersion(Isolate* isolate, const Vector<const byte> version) {
// On Intel, call sites are encoded as a displacement. For linking and for
// serialization/deserialization, we want to store/retrieve a tag (the function
// index). On Intel, that means accessing the raw displacement. Everywhere else,
// that simply means accessing the target address.
// index). On Intel, that means accessing the raw displacement.
// On ARM64, call sites are encoded as either a literal load or a direct branch.
// Other platforms simply require accessing the target address.
void SetWasmCalleeTag(RelocInfo* rinfo, uint32_t tag) {
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
*(reinterpret_cast<uint32_t*>(rinfo->target_address_address())) = tag;
#elif V8_TARGET_ARCH_ARM64
Instruction* instr = reinterpret_cast<Instruction*>(rinfo->pc());
if (instr->IsLdrLiteralX()) {
Memory::Address_at(rinfo->constant_pool_entry_address()) =
static_cast<Address>(tag);
} else {
DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
instr->SetBranchImmTarget(
reinterpret_cast<Instruction*>(rinfo->pc() + tag * kInstructionSize));
}
#else
Address addr = static_cast<Address>(tag);
if (rinfo->rmode() == RelocInfo::EXTERNAL_REFERENCE) {
......@@ -159,6 +167,15 @@ void SetWasmCalleeTag(RelocInfo* rinfo, uint32_t tag) {
uint32_t GetWasmCalleeTag(RelocInfo* rinfo) {
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_IA32
return *(reinterpret_cast<uint32_t*>(rinfo->target_address_address()));
#elif V8_TARGET_ARCH_ARM64
Instruction* instr = reinterpret_cast<Instruction*>(rinfo->pc());
if (instr->IsLdrLiteralX()) {
return static_cast<uint32_t>(
Memory::Address_at(rinfo->constant_pool_entry_address()));
} else {
DCHECK(instr->IsBranchAndLink() || instr->IsUnconditionalBranch());
return static_cast<uint32_t>(instr->ImmPCOffset() / kInstructionSize);
}
#else
Address addr = rinfo->rmode() == RelocInfo::EXTERNAL_REFERENCE
? rinfo->target_external_reference()
......@@ -183,6 +200,14 @@ constexpr size_t kCodeHeaderSize =
sizeof(size_t) + // protected instructions size
sizeof(WasmCode::Tier); // tier
// Bitfields used for encoding stub and builtin ids in a tag. We only use 26
// bits total as ARM64 can only encode 26 bits in branch immediate instructions.
class IsStubIdField : public BitField<bool, 0, 1> {};
class StubOrBuiltinIdField
: public BitField<uint32_t, IsStubIdField::kNext, 25> {};
static_assert(StubOrBuiltinIdField::kNext == 26,
"ARM64 only supports 26 bits for this field");
} // namespace
class V8_EXPORT_PRIVATE NativeModuleSerializer {
......@@ -280,7 +305,7 @@ void NativeModuleSerializer::WriteCopiedStubs(Writer* writer) {
// Get the stub count from the number of keys.
size_t num_stubs = (stubs_size - sizeof(uint32_t)) / sizeof(uint32_t);
writer->Write(static_cast<uint32_t>(num_stubs));
uint32_t stub_id = kFirstStubId;
uint32_t stub_id = 0;
for (auto pair : native_module_->trampolines_) {
v8::internal::Code* code = Code::GetCodeFromTargetAddress(pair.first);
......@@ -383,13 +408,12 @@ uint32_t NativeModuleSerializer::EncodeBuiltinOrStub(Address address) {
if (builtin_iter != builtin_lookup_.end()) {
uint32_t id = builtin_iter->second;
DCHECK_LT(id, std::numeric_limits<uint16_t>::max());
tag = id << 16;
tag = IsStubIdField::encode(false) | StubOrBuiltinIdField::encode(id);
} else {
auto stub_iter = stub_lookup_.find(address);
DCHECK(stub_iter != stub_lookup_.end());
uint32_t id = stub_iter->second;
DCHECK_LT(id, std::numeric_limits<uint16_t>::max());
tag = id & 0x0000FFFF;
tag = IsStubIdField::encode(true) | StubOrBuiltinIdField::encode(id);
}
return tag;
}
......@@ -594,13 +618,12 @@ bool NativeModuleDeserializer::ReadCode(uint32_t fn_index, Reader* reader) {
}
Address NativeModuleDeserializer::GetTrampolineOrStubFromTag(uint32_t tag) {
if ((tag & 0x0000FFFF) == 0) {
int builtin_id = static_cast<int>(tag >> 16);
v8::internal::Code* builtin = isolate_->builtins()->builtin(builtin_id);
return native_module_->GetLocalAddressFor(handle(builtin));
uint32_t id = StubOrBuiltinIdField::decode(tag);
if (IsStubIdField::decode(tag)) {
return stubs_[id];
} else {
DCHECK_EQ(tag & 0xFFFF0000, 0);
return stubs_[tag - kFirstStubId];
v8::internal::Code* builtin = isolate_->builtins()->builtin(id);
return native_module_->GetLocalAddressFor(handle(builtin));
}
}
......
......@@ -493,10 +493,6 @@ class Assembler : public AssemblerBase {
inline Handle<Code> code_target_object_handle_at(Address pc);
inline Address runtime_entry_at(Address pc);
// TODO(arm64): This is only needed until direct calls are supported in
// WebAssembly for ARM64.
void set_code_in_js_code_space(bool) {}
// Number of bytes taken up by the branch target in the code.
static constexpr int kSpecialTargetSize = 4; // 32-bit displacement.
// Distance between the address of the code target in the call instruction
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment