Commit 33e1a6e9 authored by Jakob Gruber's avatar Jakob Gruber Committed by Commit Bot

[compiler] Widen optimization for external reference loads

Turbofan applies the following optimization to external reference
loads on arm64 and x64: if the root-relative offset to an external
reference's address is known to be constant (and the root register has
been initialized), calculate the external reference as |kRootRegister
+ <offset>| instead of loading it from the external reference table.

There are two main cases to consider:

1. External references to arbitrary addresses in the native address
space, e.g. libc_memcpy. These kinds of external references have a
fixed address within the same running process, but may (and likely
will) change between processes (e.g.: mksnapshot and later chromium),
and the root-relative offset is different for each Isolate within the
same process.

These kinds of external references can be optimized as above when
*not* generating code which will later be serialized, and *not*
generating isolate-independent code.

2. External references to addresses within the fixed-size region of
the Isolate (essentially: within IsolateData). Since these move with
the Isolate, their root-relative offset is guaranteed to be constant
at all times.

The optimization can always be applied to these cases as long as the
root register has been initialized.

Prior to this CL, we only recognized and optimized for case 1. This CL
additionally adds support for 2.

An example of improved code generated due to this CL:

Before:
// r13 is the kRootRegister on x64.
// 0x3010 is the root-relative offset to Isolate::context_address.
leaq rdx, [r13+0x3010]
movq r8, [rdx]

After:
movq rdx, [r13+0x3010]

Bug: v8:9534
Change-Id: Idfcca751e98a56c0e5ead2c701c12a677df75399
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1748727
Commit-Queue: Jakob Gruber <jgruber@chromium.org>
Auto-Submit: Jakob Gruber <jgruber@chromium.org>
Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63158}
parent 54eca658
...@@ -564,23 +564,21 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode, ...@@ -564,23 +564,21 @@ void EmitLoad(InstructionSelector* selector, Node* node, InstructionCode opcode,
// is used when we merge a conversion into the load. // is used when we merge a conversion into the load.
outputs[0] = g.DefineAsRegister(output == nullptr ? node : output); outputs[0] = g.DefineAsRegister(output == nullptr ? node : output);
if (selector->CanAddressRelativeToRootsRegister()) { ExternalReferenceMatcher m(base);
ExternalReferenceMatcher m(base); if (m.HasValue() && g.IsIntegerConstant(index) &&
if (m.HasValue() && g.IsIntegerConstant(index)) { selector->CanAddressRelativeToRootsRegister(m.Value())) {
ptrdiff_t const delta = ptrdiff_t const delta =
g.GetIntegerConstantValue(index) + g.GetIntegerConstantValue(index) +
TurboAssemblerBase::RootRegisterOffsetForExternalReference( TurboAssemblerBase::RootRegisterOffsetForExternalReference(
selector->isolate(), m.Value()); selector->isolate(), m.Value());
input_count = 1; input_count = 1;
// Check that the delta is a 32-bit integer due to the limitations of // Check that the delta is a 32-bit integer due to the limitations of
// immediate operands. // immediate operands.
if (is_int32(delta)) { if (is_int32(delta)) {
inputs[0] = g.UseImmediate(static_cast<int32_t>(delta)); inputs[0] = g.UseImmediate(static_cast<int32_t>(delta));
opcode |= AddressingModeField::encode(kMode_Root); opcode |= AddressingModeField::encode(kMode_Root);
selector->Emit(opcode, arraysize(outputs), outputs, input_count, selector->Emit(opcode, arraysize(outputs), outputs, input_count, inputs);
inputs); return;
return;
}
} }
} }
......
...@@ -421,9 +421,27 @@ void InstructionSelector::SetEffectLevel(Node* node, int effect_level) { ...@@ -421,9 +421,27 @@ void InstructionSelector::SetEffectLevel(Node* node, int effect_level) {
effect_level_[id] = effect_level; effect_level_[id] = effect_level;
} }
bool InstructionSelector::CanAddressRelativeToRootsRegister() const { bool InstructionSelector::CanAddressRelativeToRootsRegister(
return enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing && const ExternalReference& reference) const {
CanUseRootsRegister(); // There are three things to consider here:
// 1. CanUseRootsRegister: Is kRootRegister initialized?
const bool root_register_is_available_and_initialized = CanUseRootsRegister();
if (!root_register_is_available_and_initialized) return false;
// 2. enable_roots_relative_addressing_: Can we address everything on the heap
// through the root register, i.e. are root-relative addresses to arbitrary
// addresses guaranteed not to change between code generation and
// execution?
const bool all_root_relative_offsets_are_constant =
(enable_roots_relative_addressing_ == kEnableRootsRelativeAddressing);
if (all_root_relative_offsets_are_constant) return true;
// 3. IsAddressableThroughRootRegister: Is the target address guaranteed to
// have a fixed root-relative offset? If so, we can ignore 2.
const bool this_root_relative_offset_is_constant =
TurboAssemblerBase::IsAddressableThroughRootRegister(isolate(),
reference);
return this_root_relative_offset_is_constant;
} }
bool InstructionSelector::CanUseRootsRegister() const { bool InstructionSelector::CanUseRootsRegister() const {
......
...@@ -446,7 +446,8 @@ class V8_EXPORT_PRIVATE InstructionSelector final { ...@@ -446,7 +446,8 @@ class V8_EXPORT_PRIVATE InstructionSelector final {
// Check if we can generate loads and stores of ExternalConstants relative // Check if we can generate loads and stores of ExternalConstants relative
// to the roots register. // to the roots register.
bool CanAddressRelativeToRootsRegister() const; bool CanAddressRelativeToRootsRegister(
const ExternalReference& reference) const;
// Check if we can use the roots register to access GC roots. // Check if we can use the roots register to access GC roots.
bool CanUseRootsRegister() const; bool CanUseRootsRegister() const;
......
...@@ -170,9 +170,10 @@ class X64OperandGenerator final : public OperandGenerator { ...@@ -170,9 +170,10 @@ class X64OperandGenerator final : public OperandGenerator {
AddressingMode GetEffectiveAddressMemoryOperand(Node* operand, AddressingMode GetEffectiveAddressMemoryOperand(Node* operand,
InstructionOperand inputs[], InstructionOperand inputs[],
size_t* input_count) { size_t* input_count) {
if (selector()->CanAddressRelativeToRootsRegister()) { {
LoadMatcher<ExternalReferenceMatcher> m(operand); LoadMatcher<ExternalReferenceMatcher> m(operand);
if (m.index().HasValue() && m.object().HasValue()) { if (m.index().HasValue() && m.object().HasValue() &&
selector()->CanAddressRelativeToRootsRegister(m.object().Value())) {
ptrdiff_t const delta = ptrdiff_t const delta =
m.index().Value() + m.index().Value() +
TurboAssemblerBase::RootRegisterOffsetForExternalReference( TurboAssemblerBase::RootRegisterOffsetForExternalReference(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment