Commit dc88bdf3 authored by Junliang Yan's avatar Junliang Yan Committed by V8 LUCI CQ

ppc/s390: [isolate-data] Split builtin tables into tiers

Port 06af754c

Original Message:
  .. for more efficient access to builtins from generated code.

  Root-relative accesses tend to be faster and produce more compact
  code when the root-relative offset is small. IsolateData contains
  a few large tables (roots, external references, builtins), resulting
  in very large offsets in general.

  This CL starts by splitting the builtin table into tiers: tier 0
  is a minimal set of perf-critical builtins that should be cheap to
  access. The offset to tier 0 builtins is guaranteed to be small.

  The full builtin table also remains in IsolateData for occasions in
  which we need to lookup builtins by index.

  In future work, we can also split external references and roots into
  tiers.

  On x64, this reduces deopt exit sizes from 7 to 4 bytes and from 12
  to 9 bytes (dynamic map checks / EagerWithResume deopts).

Change-Id: I021d60b20b783da170987ffcf0327b93206f7e5d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3172917Reviewed-by: 's avatarMilad Fa <mfarazma@redhat.com>
Commit-Queue: Junliang Yan <junyan@redhat.com>
Cr-Commit-Position: refs/heads/main@{#76967}
parent d7dde472
......@@ -187,15 +187,14 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
DCHECK_IMPLIES(options().isolate_independent_code,
Builtins::IsIsolateIndependentBuiltin(*code));
Builtin builtin_index = Builtin::kNoBuiltinId;
Builtin builtin = Builtin::kNoBuiltinId;
bool target_is_builtin =
isolate()->builtins()->IsBuiltinHandle(code, &builtin_index);
isolate()->builtins()->IsBuiltinHandle(code, &builtin);
if (root_array_available_ && options().isolate_independent_code) {
Label skip;
Register scratch = ip;
int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
IsolateData::builtin_entry_table_offset();
int offset = IsolateData::BuiltinEntrySlotOffset(code->builtin_id());
LoadU64(scratch, MemOperand(kRootRegister, offset), r0);
if (cond != al) b(NegateCondition(cond), &skip, cr);
Jump(scratch);
......@@ -204,10 +203,10 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
} else if (options().inline_offheap_trampolines && target_is_builtin) {
// Inline the trampoline.
Label skip;
RecordCommentForOffHeapTrampoline(builtin_index);
RecordCommentForOffHeapTrampoline(builtin);
// Use ip directly instead of using UseScratchRegisterScope, as we do
// not preserve scratch registers across calls.
mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
if (cond != al) b(NegateCondition(cond), &skip, cr);
Jump(ip);
bind(&skip);
......@@ -274,14 +273,13 @@ void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
DCHECK_IMPLIES(options().use_pc_relative_calls_and_jumps,
Builtins::IsIsolateIndependentBuiltin(*code));
Builtin builtin_index = Builtin::kNoBuiltinId;
Builtin builtin = Builtin::kNoBuiltinId;
bool target_is_builtin =
isolate()->builtins()->IsBuiltinHandle(code, &builtin_index);
isolate()->builtins()->IsBuiltinHandle(code, &builtin);
if (root_array_available_ && options().isolate_independent_code) {
Label skip;
int offset = static_cast<int>(code->builtin_id()) * kSystemPointerSize +
IsolateData::builtin_entry_table_offset();
int offset = IsolateData::BuiltinEntrySlotOffset(code->builtin_id());
LoadU64(ip, MemOperand(kRootRegister, offset));
if (cond != al) b(NegateCondition(cond), &skip);
Call(ip);
......@@ -289,14 +287,7 @@ void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
return;
} else if (options().inline_offheap_trampolines && target_is_builtin) {
// Inline the trampoline.
RecordCommentForOffHeapTrampoline(builtin_index);
// Use ip directly instead of using UseScratchRegisterScope, as we do
// not preserve scratch registers across calls.
mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
Label skip;
if (cond != al) b(NegateCondition(cond), &skip);
Call(ip);
bind(&skip);
CallBuiltin(builtin, cond);
return;
}
DCHECK(code->IsExecutable());
......@@ -304,6 +295,18 @@ void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
Call(static_cast<Address>(target_index), rmode, cond);
}
void TurboAssembler::CallBuiltin(Builtin builtin, Condition cond) {
ASM_CODE_COMMENT_STRING(this, CommentForOffHeapTrampoline("call", builtin));
DCHECK(Builtins::IsBuiltinId(builtin));
// Use ip directly instead of using UseScratchRegisterScope, as we do not
// preserve scratch registers across calls.
mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
Label skip;
if (cond != al) b(NegateCondition(cond), &skip);
Call(ip);
bind(&skip);
}
void TurboAssembler::Drop(int count) {
if (count > 0) {
AddS64(sp, sp, Operand(count * kSystemPointerSize), r0);
......@@ -3646,8 +3649,9 @@ void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
DeoptimizeKind kind, Label* ret,
Label*) {
BlockTrampolinePoolScope block_trampoline_pool(this);
CHECK_LE(target, Builtins::kLastTier0);
LoadU64(ip, MemOperand(kRootRegister,
IsolateData::builtin_entry_slot_offset(target)));
IsolateData::BuiltinEntrySlotOffset(target)));
Call(ip);
DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
(kind == DeoptimizeKind::kLazy)
......
......@@ -49,6 +49,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
public:
using TurboAssemblerBase::TurboAssemblerBase;
void CallBuiltin(Builtin builtin, Condition cond);
void Popcnt32(Register dst, Register src);
void Popcnt64(Register dst, Register src);
// Converts the integer (untagged smi) in |src| to a double, storing
......
......@@ -416,14 +416,14 @@ void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
DCHECK_IMPLIES(options().isolate_independent_code,
Builtins::IsIsolateIndependentBuiltin(*code));
Builtin builtin_index = Builtin::kNoBuiltinId;
Builtin builtin = Builtin::kNoBuiltinId;
bool target_is_builtin =
isolate()->builtins()->IsBuiltinHandle(code, &builtin_index);
isolate()->builtins()->IsBuiltinHandle(code, &builtin);
if (options().inline_offheap_trampolines && target_is_builtin) {
// Inline the trampoline.
RecordCommentForOffHeapTrampoline(builtin_index);
mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
RecordCommentForOffHeapTrampoline(builtin);
mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
b(cond, ip);
return;
}
......@@ -474,21 +474,28 @@ void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
DCHECK_IMPLIES(options().isolate_independent_code,
Builtins::IsIsolateIndependentBuiltin(*code));
Builtin builtin_index = Builtin::kNoBuiltinId;
Builtin builtin = Builtin::kNoBuiltinId;
bool target_is_builtin =
isolate()->builtins()->IsBuiltinHandle(code, &builtin_index);
isolate()->builtins()->IsBuiltinHandle(code, &builtin);
if (target_is_builtin && options().inline_offheap_trampolines) {
// Inline the trampoline.
RecordCommentForOffHeapTrampoline(builtin_index);
mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
Call(ip);
CallBuiltin(builtin);
return;
}
DCHECK(code->IsExecutable());
call(code, rmode);
}
void TurboAssembler::CallBuiltin(Builtin builtin) {
ASM_CODE_COMMENT_STRING(this, CommentForOffHeapTrampoline("call", builtin));
DCHECK(Builtins::IsBuiltinId(builtin));
// Use ip directly instead of using UseScratchRegisterScope, as we do not
// preserve scratch registers across calls.
mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
Call(ip);
}
void TurboAssembler::Drop(int count) {
if (count > 0) {
int total = count * kSystemPointerSize;
......@@ -4779,8 +4786,9 @@ void TurboAssembler::StoreReturnAddressAndCall(Register target) {
void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
DeoptimizeKind kind, Label* ret,
Label*) {
ASM_CODE_COMMENT(this);
LoadU64(ip, MemOperand(kRootRegister,
IsolateData::builtin_entry_slot_offset(target)));
IsolateData::BuiltinEntrySlotOffset(target)));
Call(ip);
DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
(kind == DeoptimizeKind::kLazy)
......
......@@ -44,6 +44,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
public:
using TurboAssemblerBase::TurboAssemblerBase;
void CallBuiltin(Builtin builtin);
void AtomicCmpExchangeHelper(Register addr, Register output,
Register old_value, Register new_value,
int start, int end, int shift_amount, int offset,
......
......@@ -2,15 +2,24 @@
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "src/codegen/assembler-inl.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/register-configuration.h"
#include "src/codegen/safepoint-table.h"
#include "src/deoptimizer/deoptimizer.h"
#include "src/execution/isolate-data.h"
namespace v8 {
namespace internal {
// The deopt exit sizes below depend on the following IsolateData layout
// guarantees:
#define ASSERT_OFFSET(BuiltinName) \
STATIC_ASSERT(IsolateData::builtin_tier0_entry_table_offset() + \
Builtins::ToInt(BuiltinName) * kSystemPointerSize <= \
0x1000)
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Eager);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Lazy);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Soft);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Bailout);
#undef ASSERT_OFFSET
const bool Deoptimizer::kSupportsFixedDeoptExitSizes = true;
const int Deoptimizer::kNonLazyDeoptExitSize = 3 * kInstrSize;
const int Deoptimizer::kLazyDeoptExitSize = 3 * kInstrSize;
......
......@@ -3,10 +3,23 @@
// found in the LICENSE file.
#include "src/deoptimizer/deoptimizer.h"
#include "src/execution/isolate-data.h"
namespace v8 {
namespace internal {
// The deopt exit sizes below depend on the following IsolateData layout
// guarantees:
#define ASSERT_OFFSET(BuiltinName) \
STATIC_ASSERT(IsolateData::builtin_tier0_entry_table_offset() + \
Builtins::ToInt(BuiltinName) * kSystemPointerSize <= \
0x1000)
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Eager);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Lazy);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Soft);
ASSERT_OFFSET(Builtin::kDeoptimizationEntry_Bailout);
#undef ASSERT_OFFSET
const bool Deoptimizer::kSupportsFixedDeoptExitSizes = true;
const int Deoptimizer::kNonLazyDeoptExitSize = 6 + 2;
const int Deoptimizer::kLazyDeoptExitSize = 6 + 2;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment