Commit db97c402 authored by jyan's avatar jyan Committed by Commit bot

S390: Optimize For Mul in TurboFan codegen

R=joransiu@ca.ibm.com, bjaideep@ca.ibm.com, michael_dawson@ca.ibm.com, mbrandy@us.ibm.com
BUG=

Review-Url: https://codereview.chromium.org/2265073003
Cr-Commit-Position: refs/heads/master@{#38801}
parent da5d713d
...@@ -27,6 +27,16 @@ class S390OperandConverter final : public InstructionOperandConverter { ...@@ -27,6 +27,16 @@ class S390OperandConverter final : public InstructionOperandConverter {
size_t OutputCount() { return instr_->OutputCount(); } size_t OutputCount() { return instr_->OutputCount(); }
bool Is64BitOperand(int index) {
return LocationOperand::cast(instr_->InputAt(index))->representation() ==
MachineRepresentation::kWord64;
}
bool Is32BitOperand(int index) {
return LocationOperand::cast(instr_->InputAt(index))->representation() ==
MachineRepresentation::kWord32;
}
bool CompareLogical() const { bool CompareLogical() const {
switch (instr_->flags_condition()) { switch (instr_->flags_condition()) {
case kUnsignedLessThan: case kUnsignedLessThan:
...@@ -104,12 +114,25 @@ class S390OperandConverter final : public InstructionOperandConverter { ...@@ -104,12 +114,25 @@ class S390OperandConverter final : public InstructionOperandConverter {
FrameOffset offset = frame_access_state()->GetFrameOffset(slot); FrameOffset offset = frame_access_state()->GetFrameOffset(slot);
return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset()); return MemOperand(offset.from_stack_pointer() ? sp : fp, offset.offset());
} }
MemOperand InputStackSlot(size_t index) {
InstructionOperand* op = instr_->InputAt(index);
return SlotToMemOperand(AllocatedOperand::cast(op)->index());
}
}; };
static inline bool HasRegisterInput(Instruction* instr, int index) { static inline bool HasRegisterInput(Instruction* instr, int index) {
return instr->InputAt(index)->IsRegister(); return instr->InputAt(index)->IsRegister();
} }
static inline bool HasImmediateInput(Instruction* instr, size_t index) {
return instr->InputAt(index)->IsImmediate();
}
static inline bool HasStackSlotInput(Instruction* instr, size_t index) {
return instr->InputAt(index)->IsStackSlot();
}
namespace { namespace {
class OutOfLineLoadNAN32 final : public OutOfLineCode { class OutOfLineLoadNAN32 final : public OutOfLineCode {
...@@ -287,9 +310,11 @@ Condition FlagsConditionToCondition(FlagsCondition condition, ArchOpcode op) { ...@@ -287,9 +310,11 @@ Condition FlagsConditionToCondition(FlagsCondition condition, ArchOpcode op) {
if (HasRegisterInput(instr, 1)) { \ if (HasRegisterInput(instr, 1)) { \
__ asm_instr(i.OutputRegister(), i.InputRegister(0), \ __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
i.InputRegister(1)); \ i.InputRegister(1)); \
} else { \ } else if (HasImmediateInput(instr, 1)) { \
__ asm_instr(i.OutputRegister(), i.InputRegister(0), \ __ asm_instr(i.OutputRegister(), i.InputRegister(0), \
i.InputImmediate(1)); \ i.InputImmediate(1)); \
} else { \
UNIMPLEMENTED(); \
} \ } \
} while (0) } while (0)
...@@ -1223,14 +1248,54 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1223,14 +1248,54 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
case kS390_Mul32: case kS390_Mul32:
#if V8_TARGET_ARCH_S390X if (HasRegisterInput(instr, 1)) {
case kS390_Mul64: __ Mul32(i.InputRegister(0), i.InputRegister(1));
} else if (HasImmediateInput(instr, 1)) {
__ Mul32(i.InputRegister(0), i.InputImmediate(1));
} else if (HasStackSlotInput(instr, 1)) {
#ifdef V8_TARGET_ARCH_S390X
// Avoid endian-issue here:
// stg r1, 0(fp)
// ...
// msy r2, 0(fp) <-- This will read the upper 32 bits
__ lg(kScratchReg, i.InputStackSlot(1));
__ Mul32(i.InputRegister(0), kScratchReg);
#else
__ Mul32(i.InputRegister(0), i.InputStackSlot(1));
#endif #endif
__ Mul(i.OutputRegister(), i.InputRegister(0), i.InputRegister(1)); } else {
UNIMPLEMENTED();
}
break;
case kS390_Mul64:
if (HasRegisterInput(instr, 1)) {
__ Mul64(i.InputRegister(0), i.InputRegister(1));
} else if (HasImmediateInput(instr, 1)) {
__ Mul64(i.InputRegister(0), i.InputImmediate(1));
} else if (HasStackSlotInput(instr, 1)) {
__ Mul64(i.InputRegister(0), i.InputStackSlot(1));
} else {
UNIMPLEMENTED();
}
break; break;
case kS390_MulHigh32: case kS390_MulHigh32:
__ LoadRR(r1, i.InputRegister(0)); __ LoadRR(r1, i.InputRegister(0));
if (HasRegisterInput(instr, 1)) {
__ mr_z(r0, i.InputRegister(1)); __ mr_z(r0, i.InputRegister(1));
} else if (HasStackSlotInput(instr, 1)) {
#ifdef V8_TARGET_ARCH_S390X
// Avoid endian-issue here:
// stg r1, 0(fp)
// ...
// mfy r2, 0(fp) <-- This will read the upper 32 bits
__ lg(kScratchReg, i.InputStackSlot(1));
__ mr_z(r0, kScratchReg);
#else
__ mfy(r0, i.InputStackSlot(1));
#endif
} else {
UNIMPLEMENTED();
}
__ LoadW(i.OutputRegister(), r0); __ LoadW(i.OutputRegister(), r0);
break; break;
case kS390_Mul32WithHigh32: case kS390_Mul32WithHigh32:
...@@ -1241,7 +1306,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1241,7 +1306,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
case kS390_MulHighU32: case kS390_MulHighU32:
__ LoadRR(r1, i.InputRegister(0)); __ LoadRR(r1, i.InputRegister(0));
if (HasRegisterInput(instr, 1)) {
__ mlr(r0, i.InputRegister(1)); __ mlr(r0, i.InputRegister(1));
} else if (HasStackSlotInput(instr, 1)) {
#ifdef V8_TARGET_ARCH_S390X
// Avoid endian-issue here:
// stg r1, 0(fp)
// ...
// mfy r2, 0(fp) <-- This will read the upper 32 bits
__ lg(kScratchReg, i.InputStackSlot(1));
__ mlr(r0, kScratchReg);
#else
__ ml(r0, i.InputStackSlot(1));
#endif
} else {
UNIMPLEMENTED();
}
__ LoadlW(i.OutputRegister(), r0); __ LoadlW(i.OutputRegister(), r0);
break; break;
case kS390_MulFloat: case kS390_MulFloat:
......
...@@ -35,6 +35,16 @@ class S390OperandGenerator final : public OperandGenerator { ...@@ -35,6 +35,16 @@ class S390OperandGenerator final : public OperandGenerator {
return UseRegister(node); return UseRegister(node);
} }
int64_t GetImmediate(Node* node) {
if (node->opcode() == IrOpcode::kInt32Constant)
return OpParameter<int32_t>(node);
else if (node->opcode() == IrOpcode::kInt64Constant)
return OpParameter<int64_t>(node);
else
UNIMPLEMENTED();
return 0L;
}
bool CanBeImmediate(Node* node, ImmediateMode mode) { bool CanBeImmediate(Node* node, ImmediateMode mode) {
int64_t value; int64_t value;
if (node->opcode() == IrOpcode::kInt32Constant) if (node->opcode() == IrOpcode::kInt32Constant)
...@@ -132,6 +142,18 @@ class S390OperandGenerator final : public OperandGenerator { ...@@ -132,6 +142,18 @@ class S390OperandGenerator final : public OperandGenerator {
return kMode_MRR; return kMode_MRR;
} }
} }
bool CanBeBetterLeftOperand(Node* node) const {
return !selector()->IsLive(node);
}
MachineRepresentation GetRepresentation(Node* node) {
return sequence()->GetRepresentation(selector()->GetVirtualRegister(node));
}
bool Is64BitOperand(Node* node) {
return MachineRepresentation::kWord64 == GetRepresentation(node);
}
}; };
namespace { namespace {
...@@ -182,13 +204,36 @@ void VisitBinop(InstructionSelector* selector, Node* node, ...@@ -182,13 +204,36 @@ void VisitBinop(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) { FlagsContinuation* cont) {
S390OperandGenerator g(selector); S390OperandGenerator g(selector);
Matcher m(node); Matcher m(node);
Node* left = m.left().node();
Node* right = m.right().node();
InstructionOperand inputs[4]; InstructionOperand inputs[4];
size_t input_count = 0; size_t input_count = 0;
InstructionOperand outputs[2]; InstructionOperand outputs[2];
size_t output_count = 0; size_t output_count = 0;
inputs[input_count++] = g.UseRegister(m.left().node()); // TODO(turbofan): match complex addressing modes.
inputs[input_count++] = g.UseOperand(m.right().node(), operand_mode); if (left == right) {
// If both inputs refer to the same operand, enforce allocating a register
// for both of them to ensure that we don't end up generating code like
// this:
//
// mov rax, [rbp-0x10]
// add rax, [rbp-0x10]
// jo label
InstructionOperand const input = g.UseRegister(left);
inputs[input_count++] = input;
inputs[input_count++] = input;
} else if (g.CanBeImmediate(right, operand_mode)) {
inputs[input_count++] = g.UseRegister(left);
inputs[input_count++] = g.UseImmediate(right);
} else {
if (node->op()->HasProperty(Operator::kCommutative) &&
g.CanBeBetterLeftOperand(right)) {
std::swap(left, right);
}
inputs[input_count++] = g.UseRegister(left);
inputs[input_count++] = g.UseRegister(right);
}
if (cont->IsBranch()) { if (cont->IsBranch()) {
inputs[input_count++] = g.Label(cont->true_block()); inputs[input_count++] = g.Label(cont->true_block());
...@@ -1002,28 +1047,89 @@ void EmitInt32MulWithOverflow(InstructionSelector* selector, Node* node, ...@@ -1002,28 +1047,89 @@ void EmitInt32MulWithOverflow(InstructionSelector* selector, Node* node,
VisitCompare(selector, kS390_Cmp32, high32_operand, temp_operand, cont); VisitCompare(selector, kS390_Cmp32, high32_operand, temp_operand, cont);
} }
void VisitMul(InstructionSelector* selector, Node* node, ArchOpcode opcode) {
S390OperandGenerator g(selector);
Int32BinopMatcher m(node);
Node* left = m.left().node();
Node* right = m.right().node();
if (g.CanBeImmediate(right, kInt32Imm)) {
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
g.UseImmediate(right));
} else {
if (g.CanBeBetterLeftOperand(right)) {
std::swap(left, right);
}
selector->Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(left),
g.Use(right));
}
}
} // namespace } // namespace
void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf);
return EmitInt32MulWithOverflow(this, node, &cont);
}
VisitMul(this, node, kS390_Mul32);
// FlagsContinuation cont;
// EmitInt32MulWithOverflow(this, node, &cont);
}
void InstructionSelector::VisitInt32Mul(Node* node) { void InstructionSelector::VisitInt32Mul(Node* node) {
VisitRRR(this, kS390_Mul32, node); S390OperandGenerator g(this);
Int32BinopMatcher m(node);
Node* left = m.left().node();
Node* right = m.right().node();
if (g.CanBeImmediate(right, kInt32Imm) &&
base::bits::IsPowerOfTwo32(g.GetImmediate(right))) {
int power = 31 - base::bits::CountLeadingZeros32(g.GetImmediate(right));
Emit(kS390_ShiftLeft32, g.DefineSameAsFirst(node), g.UseRegister(left),
g.UseImmediate(power));
return;
}
VisitMul(this, node, kS390_Mul32);
} }
#if V8_TARGET_ARCH_S390X #if V8_TARGET_ARCH_S390X
void InstructionSelector::VisitInt64Mul(Node* node) { void InstructionSelector::VisitInt64Mul(Node* node) {
VisitRRR(this, kS390_Mul64, node); S390OperandGenerator g(this);
Int64BinopMatcher m(node);
Node* left = m.left().node();
Node* right = m.right().node();
if (g.CanBeImmediate(right, kInt32Imm) &&
base::bits::IsPowerOfTwo64(g.GetImmediate(right))) {
int power = 31 - base::bits::CountLeadingZeros64(g.GetImmediate(right));
Emit(kS390_ShiftLeft64, g.DefineSameAsFirst(node), g.UseRegister(left),
g.UseImmediate(power));
return;
}
VisitMul(this, node, kS390_Mul64);
} }
#endif #endif
void InstructionSelector::VisitInt32MulHigh(Node* node) { void InstructionSelector::VisitInt32MulHigh(Node* node) {
S390OperandGenerator g(this); S390OperandGenerator g(this);
Emit(kS390_MulHigh32, g.DefineAsRegister(node), Int32BinopMatcher m(node);
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); Node* left = m.left().node();
Node* right = m.right().node();
if (g.CanBeBetterLeftOperand(right)) {
std::swap(left, right);
}
Emit(kS390_MulHigh32, g.DefineAsRegister(node), g.UseRegister(left),
g.Use(right));
} }
void InstructionSelector::VisitUint32MulHigh(Node* node) { void InstructionSelector::VisitUint32MulHigh(Node* node) {
S390OperandGenerator g(this); S390OperandGenerator g(this);
Emit(kS390_MulHighU32, g.DefineAsRegister(node), Int32BinopMatcher m(node);
g.UseRegister(node->InputAt(0)), g.UseRegister(node->InputAt(1))); Node* left = m.left().node();
Node* right = m.right().node();
if (g.CanBeBetterLeftOperand(right)) {
std::swap(left, right);
}
Emit(kS390_MulHighU32, g.DefineAsRegister(node), g.UseRegister(left),
g.Use(right));
} }
void InstructionSelector::VisitInt32Div(Node* node) { void InstructionSelector::VisitInt32Div(Node* node) {
...@@ -1721,15 +1827,6 @@ void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) { ...@@ -1721,15 +1827,6 @@ void InstructionSelector::VisitUint64LessThanOrEqual(Node* node) {
} }
#endif #endif
void InstructionSelector::VisitInt32MulWithOverflow(Node* node) {
if (Node* ovf = NodeProperties::FindProjection(node, 1)) {
FlagsContinuation cont = FlagsContinuation::ForSet(kNotEqual, ovf);
return EmitInt32MulWithOverflow(this, node, &cont);
}
FlagsContinuation cont;
EmitInt32MulWithOverflow(this, node, &cont);
}
void InstructionSelector::VisitFloat32Equal(Node* node) { void InstructionSelector::VisitFloat32Equal(Node* node) {
FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node); FlagsContinuation cont = FlagsContinuation::ForSet(kEqual, node);
VisitFloat32Compare(this, node, &cont); VisitFloat32Compare(this, node, &cont);
......
...@@ -2098,9 +2098,15 @@ void Assembler::slgrk(Register r1, Register r2, Register r3) { ...@@ -2098,9 +2098,15 @@ void Assembler::slgrk(Register r1, Register r2, Register r3) {
// ---------------------------- // ----------------------------
// Multiply Register-Storage (64<32) // Multiply Register-Storage (64<32)
void Assembler::m(Register r1, const MemOperand& opnd) { void Assembler::m(Register r1, const MemOperand& opnd) {
DCHECK(r1.code() % 2 == 0);
rx_form(M, r1, opnd.rx(), opnd.rb(), opnd.offset()); rx_form(M, r1, opnd.rx(), opnd.rb(), opnd.offset());
} }
void Assembler::mfy(Register r1, const MemOperand& opnd) {
DCHECK(r1.code() % 2 == 0);
rxy_form(MFY, r1, opnd.rx(), opnd.rb(), opnd.offset());
}
// Multiply Register (64<32) // Multiply Register (64<32)
void Assembler::mr_z(Register r1, Register r2) { void Assembler::mr_z(Register r1, Register r2) {
DCHECK(r1.code() % 2 == 0); DCHECK(r1.code() % 2 == 0);
......
...@@ -1055,6 +1055,7 @@ class Assembler : public AssemblerBase { ...@@ -1055,6 +1055,7 @@ class Assembler : public AssemblerBase {
// 32-bit Multiply Instructions // 32-bit Multiply Instructions
void m(Register r1, const MemOperand& opnd); void m(Register r1, const MemOperand& opnd);
void mfy(Register r1, const MemOperand& opnd);
void mr_z(Register r1, Register r2); void mr_z(Register r1, Register r2);
void ml(Register r1, const MemOperand& opnd); void ml(Register r1, const MemOperand& opnd);
void mlr(Register r1, Register r2); void mlr(Register r1, Register r2);
......
...@@ -907,6 +907,9 @@ bool Decoder::DecodeFourByte(Instruction* instr) { ...@@ -907,6 +907,9 @@ bool Decoder::DecodeFourByte(Instruction* instr) {
case LDGR: case LDGR:
Format(instr, "ldgr\t'f5,'r6"); Format(instr, "ldgr\t'f5,'r6");
break; break;
case MS:
Format(instr, "ms\t'r1,'d1('r2d,'r3)");
break;
case STE: case STE:
Format(instr, "ste\t'f1,'d1('r2d,'r3)"); Format(instr, "ste\t'f1,'d1('r2d,'r3)");
break; break;
...@@ -1358,6 +1361,12 @@ bool Decoder::DecodeSixByte(Instruction* instr) { ...@@ -1358,6 +1361,12 @@ bool Decoder::DecodeSixByte(Instruction* instr) {
case LEY: case LEY:
Format(instr, "ley\t'f1,'d2('r2d,'r3)"); Format(instr, "ley\t'f1,'d2('r2d,'r3)");
break; break;
case MSG:
Format(instr, "msg\t'r1,'d2('r2d,'r3)");
break;
case MSY:
Format(instr, "msy\t'r1,'d2('r2d,'r3)");
break;
case STEY: case STEY:
Format(instr, "stey\t'f1,'d2('r2d,'r3)"); Format(instr, "stey\t'f1,'d2('r2d,'r3)");
break; break;
......
...@@ -3697,6 +3697,36 @@ void MacroAssembler::mov(Register dst, const Operand& src) { ...@@ -3697,6 +3697,36 @@ void MacroAssembler::mov(Register dst, const Operand& src) {
#endif #endif
} }
void MacroAssembler::Mul32(Register dst, const MemOperand& src1) {
if (is_uint12(src1.offset())) {
ms(dst, src1);
} else if (is_int20(src1.offset())) {
msy(dst, src1);
} else {
UNIMPLEMENTED();
}
}
void MacroAssembler::Mul32(Register dst, Register src1) { msr(dst, src1); }
void MacroAssembler::Mul32(Register dst, const Operand& src1) {
msfi(dst, src1);
}
void MacroAssembler::Mul64(Register dst, const MemOperand& src1) {
if (is_int20(src1.offset())) {
msg(dst, src1);
} else {
UNIMPLEMENTED();
}
}
void MacroAssembler::Mul64(Register dst, Register src1) { msgr(dst, src1); }
void MacroAssembler::Mul64(Register dst, const Operand& src1) {
msgfi(dst, src1);
}
void MacroAssembler::Mul(Register dst, Register src1, Register src2) { void MacroAssembler::Mul(Register dst, Register src1, Register src2) {
if (dst.is(src2)) { if (dst.is(src2)) {
MulP(dst, src1); MulP(dst, src1);
......
...@@ -301,6 +301,12 @@ class MacroAssembler : public Assembler { ...@@ -301,6 +301,12 @@ class MacroAssembler : public Assembler {
void MulP(Register dst, Register src); void MulP(Register dst, Register src);
void MulP(Register dst, const MemOperand& opnd); void MulP(Register dst, const MemOperand& opnd);
void Mul(Register dst, Register src1, Register src2); void Mul(Register dst, Register src1, Register src2);
void Mul32(Register dst, const MemOperand& src1);
void Mul32(Register dst, Register src1);
void Mul32(Register dst, const Operand& src1);
void Mul64(Register dst, const MemOperand& src1);
void Mul64(Register dst, Register src1);
void Mul64(Register dst, const Operand& src1);
// Divide // Divide
void DivP(Register dividend, Register divider); void DivP(Register dividend, Register divider);
......
...@@ -6584,7 +6584,6 @@ EVALUATE(MR) { ...@@ -6584,7 +6584,6 @@ EVALUATE(MR) {
int32_t low_bits = product & 0x00000000FFFFFFFF; int32_t low_bits = product & 0x00000000FFFFFFFF;
set_low_register(r1, high_bits); set_low_register(r1, high_bits);
set_low_register(r1 + 1, low_bits); set_low_register(r1 + 1, low_bits);
set_low_register(r1, r1_val);
return length; return length;
} }
...@@ -6940,9 +6939,22 @@ EVALUATE(S) { ...@@ -6940,9 +6939,22 @@ EVALUATE(S) {
} }
EVALUATE(M) { EVALUATE(M) {
UNIMPLEMENTED(); DCHECK_OPCODE(M);
USE(instr); DECODE_RX_A_INSTRUCTION(x2, b2, r1, d2_val);
return 0; int64_t b2_val = (b2 == 0) ? 0 : get_register(b2);
int64_t x2_val = (x2 == 0) ? 0 : get_register(x2);
intptr_t addr = b2_val + x2_val + d2_val;
DCHECK(r1 % 2 == 0);
int32_t mem_val = ReadW(addr, instr);
int32_t r1_val = get_low_register<int32_t>(r1 + 1);
int64_t product =
static_cast<int64_t>(r1_val) * static_cast<int64_t>(mem_val);
int32_t high_bits = product >> 32;
r1_val = high_bits;
int32_t low_bits = product & 0x00000000FFFFFFFF;
set_low_register(r1, high_bits);
set_low_register(r1 + 1, low_bits);
return length;
} }
EVALUATE(D) { EVALUATE(D) {
...@@ -11156,9 +11168,21 @@ EVALUATE(SY) { ...@@ -11156,9 +11168,21 @@ EVALUATE(SY) {
} }
EVALUATE(MFY) { EVALUATE(MFY) {
UNIMPLEMENTED(); DCHECK_OPCODE(MFY);
USE(instr); DECODE_RXY_A_INSTRUCTION(r1, x2, b2, d2);
return 0; int64_t x2_val = (x2 == 0) ? 0 : get_register(x2);
int64_t b2_val = (b2 == 0) ? 0 : get_register(b2);
DCHECK(r1 % 2 == 0);
int32_t mem_val = ReadW(b2_val + x2_val + d2, instr);
int32_t r1_val = get_low_register<int32_t>(r1 + 1);
int64_t product =
static_cast<int64_t>(r1_val) * static_cast<int64_t>(mem_val);
int32_t high_bits = product >> 32;
r1_val = high_bits;
int32_t low_bits = product & 0x00000000FFFFFFFF;
set_low_register(r1, high_bits);
set_low_register(r1 + 1, low_bits);
return length;
} }
EVALUATE(ALY) { EVALUATE(ALY) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment