Commit 59c55c1c authored by Hao Xu's avatar Hao Xu Committed by V8 LUCI CQ

[turbofan][x64] Avoid TruncateInt64ToInt32 when possible

Turbofan optimizes TruncateInt64ToInt32 by trying to merge it into
its input. This can avoid generating a movl instruction. However, when
the input is BitcastTaggedToWordForTagAndSmiBits, it fails to apply
this optimization and this results in a redundant move:

  Turbofan IR:
  149: Load[kRepTaggedSigned|kTypeInt32](...)
  150: BitcastTaggedToWordForTagAndSmiBits(149)
  151: TruncateInt64ToInt32(150)
  152: Word32And(151, 558)

  Assembly:
  103  movl r11,[r8+0x1f]
  107  movl r11,r11
  10a  andl r11,0x4

This CL fixes this and avoids the redundant movl instruction.

Change-Id: Id59d1ce1e3b139344f750e903726dd9f338cfdba
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3787681Reviewed-by: 's avatarTobias Tebbi <tebbi@chromium.org>
Commit-Queue: Hao A Xu <hao.a.xu@intel.com>
Cr-Commit-Position: refs/heads/main@{#82156}
parent e553a440
......@@ -884,55 +884,50 @@ void InstructionSelector::VisitStackPointerGreaterThan(
namespace {
bool TryMergeTruncateInt64ToInt32IntoLoad(InstructionSelector* selector,
void TryMergeTruncateInt64ToInt32IntoLoad(InstructionSelector* selector,
Node* node, Node* load) {
if ((load->opcode() == IrOpcode::kLoad ||
load->opcode() == IrOpcode::kLoadImmutable) &&
selector->CanCover(node, load)) {
LoadRepresentation load_rep = LoadRepresentationOf(load->op());
MachineRepresentation rep = load_rep.representation();
InstructionCode opcode;
switch (rep) {
case MachineRepresentation::kBit: // Fall through.
case MachineRepresentation::kWord8:
opcode = load_rep.IsSigned() ? kX64Movsxbl : kX64Movzxbl;
break;
case MachineRepresentation::kWord16:
opcode = load_rep.IsSigned() ? kX64Movsxwl : kX64Movzxwl;
break;
case MachineRepresentation::kWord32:
case MachineRepresentation::kWord64:
case MachineRepresentation::kTaggedSigned:
case MachineRepresentation::kTagged:
case MachineRepresentation::kCompressed: // Fall through.
opcode = kX64Movl;
break;
default:
UNREACHABLE();
}
X64OperandGenerator g(selector);
LoadRepresentation load_rep = LoadRepresentationOf(load->op());
MachineRepresentation rep = load_rep.representation();
InstructionCode opcode;
switch (rep) {
case MachineRepresentation::kBit: // Fall through.
case MachineRepresentation::kWord8:
opcode = load_rep.IsSigned() ? kX64Movsxbl : kX64Movzxbl;
break;
case MachineRepresentation::kWord16:
opcode = load_rep.IsSigned() ? kX64Movsxwl : kX64Movzxwl;
break;
case MachineRepresentation::kWord32:
case MachineRepresentation::kWord64:
case MachineRepresentation::kTaggedSigned:
case MachineRepresentation::kTagged:
case MachineRepresentation::kCompressed: // Fall through.
opcode = kX64Movl;
break;
default:
UNREACHABLE();
}
X64OperandGenerator g(selector);
#ifdef V8_IS_TSAN
// On TSAN builds we require one scratch register. Because of this we also
// have to modify the inputs to take into account possible aliasing and use
// UseUniqueRegister which is not required for non-TSAN builds.
InstructionOperand temps[] = {g.TempRegister()};
size_t temp_count = arraysize(temps);
auto reg_kind = OperandGenerator::RegisterUseKind::kUseUniqueRegister;
// On TSAN builds we require one scratch register. Because of this we also
// have to modify the inputs to take into account possible aliasing and use
// UseUniqueRegister which is not required for non-TSAN builds.
InstructionOperand temps[] = {g.TempRegister()};
size_t temp_count = arraysize(temps);
auto reg_kind = OperandGenerator::RegisterUseKind::kUseUniqueRegister;
#else
InstructionOperand* temps = nullptr;
size_t temp_count = 0;
auto reg_kind = OperandGenerator::RegisterUseKind::kUseRegister;
InstructionOperand* temps = nullptr;
size_t temp_count = 0;
auto reg_kind = OperandGenerator::RegisterUseKind::kUseRegister;
#endif // V8_IS_TSAN
InstructionOperand outputs[] = {g.DefineAsRegister(node)};
size_t input_count = 0;
InstructionOperand inputs[3];
AddressingMode mode = g.GetEffectiveAddressMemoryOperand(
node->InputAt(0), inputs, &input_count, reg_kind);
opcode |= AddressingModeField::encode(mode);
selector->Emit(opcode, 1, outputs, input_count, inputs, temp_count, temps);
return true;
}
return false;
InstructionOperand outputs[] = {g.DefineAsRegister(node)};
size_t input_count = 0;
InstructionOperand inputs[3];
AddressingMode mode =
g.GetEffectiveAddressMemoryOperand(load, inputs, &input_count, reg_kind);
opcode |= AddressingModeField::encode(mode);
selector->Emit(opcode, 1, outputs, input_count, inputs, temp_count, temps);
}
// Shared routine for multiple 32-bit shift operations.
......@@ -1891,7 +1886,14 @@ void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
// have to satisfy that condition.
X64OperandGenerator g(this);
Node* value = node->InputAt(0);
if (CanCover(node, value)) {
bool can_cover = false;
if (value->opcode() == IrOpcode::kBitcastTaggedToWordForTagAndSmiBits) {
can_cover = CanCover(node, value) && CanCover(value, value->InputAt(0));
value = value->InputAt(0);
} else {
can_cover = CanCover(node, value);
}
if (can_cover) {
switch (value->opcode()) {
case IrOpcode::kWord64Sar:
case IrOpcode::kWord64Shr: {
......@@ -1909,10 +1911,8 @@ void InstructionSelector::VisitTruncateInt64ToInt32(Node* node) {
}
case IrOpcode::kLoad:
case IrOpcode::kLoadImmutable: {
if (TryMergeTruncateInt64ToInt32IntoLoad(this, node, value)) {
return;
}
break;
TryMergeTruncateInt64ToInt32IntoLoad(this, node, value);
return;
}
default:
break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment