Commit f11a938a authored by Andreas Haas's avatar Andreas Haas Committed by Commit Bot

[wasm][liftoff] Order registers in a register pair

With this CL the registers in a register pair get ordered such that the
low word register always has a lower register code than the high word
register. This should allow easier reasoning about the register
allocation, and prevent some register allocation bugs.

Background: for many operations in Liftoff, input registers are reused
as output registers. With register pairs, input register pairs are
reused as output register pairs. Additional reasoning, and sometimes
even additional code is needed when the registers of the output register
pair are swapped, i.e. when the high word register of the input becomes
the low word register of the output. With this CL the additional
reasoning is not necessary anymore, as the high word and low word
registers would get swapped during register allocation.

Additionally this CL fixes the logic of the last_spilled_regs list. This
list stored the last spilled registers, but recorded only one of the two
registers of a register pair. With this CL, both registers get recorded.

This CL does not have a regression test. The regression test was more
than 9000 lines long, and quite slow. I was not able to minimize it
substantially. The test would be fragile by nature, as it has to create
a special register configuration or otherwise does not test anything
meaningful. All in all I think it's better not to add the test.

R=clemensb@chromium.org

Bug: chromium:1074586
Change-Id: I4b2475b0c6537c7ce2e51fee281388cdd85f2953
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2168875Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Andreas Haas <ahaas@chromium.org>
Cr-Commit-Position: refs/heads/master@{#67473}
parent 45b065b2
...@@ -581,13 +581,7 @@ inline void AtomicOp32( ...@@ -581,13 +581,7 @@ inline void AtomicOp32(
void (Assembler::*load)(Register, Register, Condition), void (Assembler::*load)(Register, Register, Condition),
void (Assembler::*store)(Register, Register, Register, Condition), void (Assembler::*store)(Register, Register, Register, Condition),
void (*op)(LiftoffAssembler*, Register, Register, Register)) { void (*op)(LiftoffAssembler*, Register, Register, Register)) {
UseScratchRegisterScope temps(lasm); Register store_result = pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
Register actual_addr = liftoff::CalculateActualAddress(
lasm, &temps, dst_addr, offset_reg, offset_imm);
Register store_result =
temps.CanAcquire()
? temps.Acquire()
: pinned.set(__ GetUnusedRegister(kGpReg, pinned)).gp();
// Allocate an additional {temp} register to hold the result that should be // Allocate an additional {temp} register to hold the result that should be
// stored to memory. Note that {temp} and {store_result} are not allowed to be // stored to memory. Note that {temp} and {store_result} are not allowed to be
...@@ -596,9 +590,13 @@ inline void AtomicOp32( ...@@ -596,9 +590,13 @@ inline void AtomicOp32(
// Make sure that {result} is unique. // Make sure that {result} is unique.
Register result_reg = result.gp(); Register result_reg = result.gp();
if (result_reg == value.gp() || result_reg == actual_addr) { if (result_reg == value.gp() || result_reg == dst_addr ||
result_reg == offset_reg) {
result_reg = __ GetUnusedRegister(kGpReg, pinned).gp(); result_reg = __ GetUnusedRegister(kGpReg, pinned).gp();
} }
UseScratchRegisterScope temps(lasm);
Register actual_addr = liftoff::CalculateActualAddress(
lasm, &temps, dst_addr, offset_reg, offset_imm);
__ dmb(ISH); __ dmb(ISH);
Label retry; Label retry;
__ bind(&retry); __ bind(&retry);
...@@ -731,10 +729,10 @@ inline void AtomicOp64(LiftoffAssembler* lasm, Register dst_addr, ...@@ -731,10 +729,10 @@ inline void AtomicOp64(LiftoffAssembler* lasm, Register dst_addr,
pinned.set(result_high); pinned.set(result_high);
} }
Register store_result = __ GetUnusedRegister(kGpReg, pinned).gp();
UseScratchRegisterScope temps(lasm); UseScratchRegisterScope temps(lasm);
Register actual_addr = liftoff::CalculateActualAddress( Register actual_addr = liftoff::CalculateActualAddress(
lasm, &temps, dst_addr, offset_reg, offset_imm); lasm, &temps, dst_addr, offset_reg, offset_imm);
Register store_result = __ GetUnusedRegister(kGpReg, pinned).gp();
__ dmb(ISH); __ dmb(ISH);
Label retry; Label retry;
......
...@@ -981,12 +981,15 @@ void LiftoffAssembler::SpillRegister(LiftoffRegister reg) { ...@@ -981,12 +981,15 @@ void LiftoffAssembler::SpillRegister(LiftoffRegister reg) {
// {clear_used} call below only clears one of them. // {clear_used} call below only clears one of them.
cache_state_.dec_used(slot->reg().low()); cache_state_.dec_used(slot->reg().low());
cache_state_.dec_used(slot->reg().high()); cache_state_.dec_used(slot->reg().high());
cache_state_.last_spilled_regs.set(slot->reg().low());
cache_state_.last_spilled_regs.set(slot->reg().high());
} }
Spill(slot->offset(), slot->reg(), slot->type()); Spill(slot->offset(), slot->reg(), slot->type());
slot->MakeStack(); slot->MakeStack();
if (--remaining_uses == 0) break; if (--remaining_uses == 0) break;
} }
cache_state_.clear_used(reg); cache_state_.clear_used(reg);
cache_state_.last_spilled_regs.set(reg);
} }
void LiftoffAssembler::set_num_locals(uint32_t num_locals) { void LiftoffAssembler::set_num_locals(uint32_t num_locals) {
......
...@@ -259,7 +259,6 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -259,7 +259,6 @@ class LiftoffAssembler : public TurboAssembler {
last_spilled_regs = {}; last_spilled_regs = {};
} }
LiftoffRegister reg = unspilled.GetFirstRegSet(); LiftoffRegister reg = unspilled.GetFirstRegSet();
last_spilled_regs.set(reg);
return reg; return reg;
} }
...@@ -355,6 +354,15 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -355,6 +354,15 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegList candidates = kGpCacheRegList; LiftoffRegList candidates = kGpCacheRegList;
Register low = pinned.set(GetUnusedRegister(candidates, pinned)).gp(); Register low = pinned.set(GetUnusedRegister(candidates, pinned)).gp();
Register high = GetUnusedRegister(candidates, pinned).gp(); Register high = GetUnusedRegister(candidates, pinned).gp();
if (low.code() > high.code()) {
// Establish the invariant that the register of the low word always has
// a lower code than the register of the high word. This guarantees that
// if a register pair of an input is reused for the result, the low
// word and high word registers are not swapped, i.e. the low word
// register of the result is not the high word register of the input,
// and vice versa.
std::swap(low, high);
}
return LiftoffRegister::ForPair(low, high); return LiftoffRegister::ForPair(low, high);
} else if (kNeedS128RegPair && rc == kFpRegPair) { } else if (kNeedS128RegPair && rc == kFpRegPair) {
// kFpRegPair specific logic here because we need adjacent registers, not // kFpRegPair specific logic here because we need adjacent registers, not
......
// Copyright 2020 the V8 project authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
// Flags: --wasm-staging
load('test/mjsunit/wasm/wasm-module-builder.js');
const builder = new WasmModuleBuilder();
builder.addMemory(1, 1, false, true);
builder.addGlobal(kWasmI32, 1);
const sig = builder.addType(makeSig([kWasmI32, kWasmI64, kWasmI64, kWasmI64], [kWasmF32]));
// Generate function 1 (out of 3).
builder.addFunction(undefined, sig)
.addLocals({i32_count: 57}).addLocals({i64_count: 11})
.addBodyWithEnd([
// signature: f_illl
// body:
kExprLocalGet, 0x1b, // local.get
kExprLocalSet, 0x1c, // local.set
kExprI32Const, 0x00, // i32.const
kExprIf, kWasmStmt, // if @11
kExprGlobalGet, 0x00, // global.get
kExprLocalSet, 0x1e, // local.set
kExprBlock, kWasmStmt, // block @19
kExprGlobalGet, 0x00, // global.get
kExprLocalSet, 0x21, // local.set
kExprBlock, kWasmStmt, // block @25
kExprBlock, kWasmStmt, // block @27
kExprBlock, kWasmStmt, // block @29
kExprGlobalGet, 0x00, // global.get
kExprLocalSet, 0x0a, // local.set
kExprI32Const, 0x00, // i32.const
kExprLocalSet, 0x28, // local.set
kExprLocalGet, 0x00, // local.get
kExprLocalSet, 0x0b, // local.set
kExprI32Const, 0x00, // i32.const
kExprBrIf, 0x01, // br_if depth=1
kExprEnd, // end @47
kExprUnreachable, // unreachable
kExprEnd, // end @49
kExprI32Const, 0x01, // i32.const
kExprLocalSet, 0x36, // local.set
kExprI32Const, 0x00, // i32.const
kExprIf, kWasmStmt, // if @56
kExprEnd, // end @59
kExprLocalGet, 0x00, // local.get
kExprLocalSet, 0x10, // local.set
kExprI32Const, 0x00, // i32.const
kExprI32Eqz, // i32.eqz
kExprLocalSet, 0x38, // local.set
kExprBlock, kWasmStmt, // block @69
kExprI32Const, 0x7f, // i32.const
kExprI32Eqz, // i32.eqz
kExprLocalSet, 0x39, // local.set
kExprI32Const, 0x01, // i32.const
kExprIf, kWasmStmt, // if @78
kExprGlobalGet, 0x00, // global.get
kExprLocalSet, 0x11, // local.set
kExprI32Const, 0x00, // i32.const
kExprI32Eqz, // i32.eqz
kExprLocalSet, 0x12, // local.set
kExprGlobalGet, 0x00, // global.get
kExprLocalSet, 0x13, // local.set
kExprI32Const, 0x00, // i32.const
kExprI32Const, 0x01, // i32.const
kExprI32Sub, // i32.sub
kExprLocalSet, 0x3a, // local.set
kExprI32Const, 0x00, // i32.const
kAtomicPrefix, kExprI64AtomicLoad16U, 0x01, 0x04, // i64.atomic.load16_u
kExprDrop, // drop
kExprI64Const, 0x01, // i64.const
kExprLocalSet, 0x44, // local.set
kExprI64Const, 0x01, // i64.const
kExprLocalSet, 0x3e, // local.set
kExprElse, // else @115
kExprNop, // nop
kExprEnd, // end @117
kExprLocalGet, 0x40, // local.get
kExprLocalSet, 0x41, // local.set
kExprLocalGet, 0x41, // local.get
kExprI64Const, 0x4b, // i64.const
kExprI64Add, // i64.add
kExprDrop, // drop
kExprEnd, // end @128
kExprEnd, // end @129
kExprUnreachable, // unreachable
kExprEnd, // end @132
kExprUnreachable, // unreachable
kExprEnd, // end @134
kExprF32Const, 0x00, 0x00, 0x84, 0x42, // f32.const
kExprEnd, // end @140
]);
const instance = builder.instantiate();
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment