Commit f7a3ede0 authored by jyan's avatar jyan Committed by Commit bot

s390: TF Codegen Optimization

List of items:
  1. Avoid zero-extending for subsequent 32-bit operations if current operation does not change upper 32-bit or does zero-extending.
  2. Match complex address mode for binary operation where possible (eg. use Add R,MEM).
  3. Detect instruction forms in selector. Eg. kAllowRRR, kAllowRM
  4. Optimize sequence for Int32MulWithOverflow, Int32Div, etc.
  5. Remove Not32/Not64 which is the same as XOR

R=bjaideep@ca.ibm.com, joransiu@ca.ibm.com
BUG=

Review-Url: https://codereview.chromium.org/2649113007
Cr-Commit-Position: refs/heads/master@{#42669}
parent c8691efb
This diff is collapsed.
......@@ -34,6 +34,7 @@ namespace compiler {
V(S390_RotLeftAndClear64) \
V(S390_RotLeftAndClearLeft64) \
V(S390_RotLeftAndClearRight64) \
V(S390_Lay) \
V(S390_Add32) \
V(S390_Add64) \
V(S390_AddPair) \
......@@ -46,6 +47,7 @@ namespace compiler {
V(S390_SubPair) \
V(S390_MulPair) \
V(S390_Mul32) \
V(S390_Mul32WithOverflow) \
V(S390_Mul32WithHigh32) \
V(S390_Mul64) \
V(S390_MulHigh32) \
......
......@@ -35,6 +35,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_RotLeftAndClear64:
case kS390_RotLeftAndClearLeft64:
case kS390_RotLeftAndClearRight64:
case kS390_Lay:
case kS390_Add32:
case kS390_Add64:
case kS390_AddPair:
......@@ -47,6 +48,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_SubFloat:
case kS390_SubDouble:
case kS390_Mul32:
case kS390_Mul32WithOverflow:
case kS390_Mul32WithHigh32:
case kS390_Mul64:
case kS390_MulHigh32:
......
......@@ -1892,6 +1892,10 @@ void Assembler::msg(Register r1, const MemOperand& opnd) {
rxy_form(MSG, r1, opnd.rx(), opnd.rb(), opnd.offset());
}
void Assembler::msgf(Register r1, const MemOperand& opnd) {
rxy_form(MSGF, r1, opnd.rx(), opnd.rb(), opnd.offset());
}
// --------------------------
// 32-bit Divide Instructions
// --------------------------
......@@ -1902,7 +1906,15 @@ void Assembler::d(Register r1, const MemOperand& opnd) {
// Divide Logical Register-Storage (32<-64)
void Assembler::dl(Register r1, const MemOperand& opnd) {
rx_form(DL, r1, opnd.rx(), opnd.rb(), opnd.offset());
rxy_form(DL, r1, opnd.rx(), opnd.rb(), opnd.offset());
}
void Assembler::dsg(Register r1, const MemOperand& opnd) {
rxy_form(DSG, r1, opnd.rx(), opnd.rb(), opnd.offset());
}
void Assembler::dsgf(Register r1, const MemOperand& opnd) {
rxy_form(DSGF, r1, opnd.rx(), opnd.rb(), opnd.offset());
}
// --------------------
......
......@@ -988,6 +988,9 @@ class Assembler : public AssemblerBase {
RI1_FORM(nill);
RI1_FORM(oill);
RXY_FORM(pfd);
RXY_FORM(dsgf);
RXY_FORM(msgf);
RXY_FORM(dsg);
RXE_FORM(sdb);
RXY_FORM(slgf);
RS1_FORM(srdl);
......
......@@ -712,6 +712,9 @@ bool Decoder::DecodeFourByte(Instruction* instr) {
case XGRK:
Format(instr, "xgrk\t'r5,'r6,'r3");
break;
case CGFR:
Format(instr, "cgfr\t'r5,'r6");
break;
case CGR:
Format(instr, "cgr\t'r5,'r6");
break;
......@@ -775,6 +778,12 @@ bool Decoder::DecodeFourByte(Instruction* instr) {
case DSGR:
Format(instr, "dsgr\t'r5,'r6");
break;
case DSGFR:
Format(instr, "dsgfr\t'r5,'r6");
break;
case MSGFR:
Format(instr, "msgfr\t'r5,'r6");
break;
case LZDR:
Format(instr, "lzdr\t'f5");
break;
......@@ -1391,6 +1400,15 @@ bool Decoder::DecodeSixByte(Instruction* instr) {
case MSG:
Format(instr, "msg\t'r1,'d2('r2d,'r3)");
break;
case DSG:
Format(instr, "dsg\t'r1,'d2('r2d,'r3)");
break;
case DSGF:
Format(instr, "dsgf\t'r1,'d2('r2d,'r3)");
break;
case MSGF:
Format(instr, "msgf\t'r1,'d2('r2d,'r3)");
break;
case MSY:
Format(instr, "msy\t'r1,'d2('r2d,'r3)");
break;
......
......@@ -3270,6 +3270,53 @@ void MacroAssembler::Mul32(Register dst, const Operand& src1) {
msfi(dst, src1);
}
void MacroAssembler::MulHigh32(Register dst, Register src1,
const MemOperand& src2) {
lgfr(dst, src1);
msgf(dst, src2);
srlg(dst, dst, Operand(32));
}
void MacroAssembler::MulHigh32(Register dst, Register src1, Register src2) {
if (dst.is(src2)) {
std::swap(src1, src2);
}
lgfr(dst, src1);
msgfr(dst, src2);
srlg(dst, dst, Operand(32));
}
void MacroAssembler::MulHigh32(Register dst, Register src1,
const Operand& src2) {
lgfr(dst, src1);
msgfi(dst, src2);
srlg(dst, dst, Operand(32));
}
void MacroAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
const MemOperand& src2) {
lgfr(dst, src1);
msgf(dst, src2);
cgfr(dst, dst);
}
void MacroAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
Register src2) {
if (dst.is(src2)) {
std::swap(src1, src2);
}
lgfr(dst, src1);
msgfr(dst, src2);
cgfr(dst, dst);
}
void MacroAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
const Operand& src2) {
lgfr(dst, src1);
msgfi(dst, src2);
cgfr(dst, dst);
}
void MacroAssembler::Mul64(Register dst, const MemOperand& src1) {
if (is_int20(src1.offset())) {
msg(dst, src1);
......@@ -3362,6 +3409,12 @@ void MacroAssembler::Add32(Register dst, const Operand& opnd) {
afi(dst, opnd);
}
// Add 32-bit (Register dst = Register dst + Immediate opnd)
void MacroAssembler::Add32_RI(Register dst, const Operand& opnd) {
// Just a wrapper for above
Add32(dst, opnd);
}
// Add Pointer Size (Register dst = Register dst + Immediate opnd)
void MacroAssembler::AddP(Register dst, const Operand& opnd) {
#if V8_TARGET_ARCH_S390X
......@@ -3386,6 +3439,13 @@ void MacroAssembler::Add32(Register dst, Register src, const Operand& opnd) {
Add32(dst, opnd);
}
// Add 32-bit (Register dst = Register src + Immediate opnd)
void MacroAssembler::Add32_RRI(Register dst, Register src,
const Operand& opnd) {
// Just a wrapper for above
Add32(dst, src, opnd);
}
// Add Pointer Size (Register dst = Register src + Immediate opnd)
void MacroAssembler::AddP(Register dst, Register src, const Operand& opnd) {
if (!dst.is(src)) {
......@@ -4134,12 +4194,24 @@ void MacroAssembler::Load(Register dst, const Operand& opnd) {
#else
lhi(dst, opnd);
#endif
} else {
} else if (is_int32(value)) {
#if V8_TARGET_ARCH_S390X
lgfi(dst, opnd);
#else
iilf(dst, opnd);
#endif
} else if (is_uint32(value)) {
#if V8_TARGET_ARCH_S390X
llilf(dst, opnd);
#else
iilf(dst, opnd);
#endif
} else {
int32_t hi_32 = static_cast<int64_t>(value) >> 32;
int32_t lo_32 = static_cast<int32_t>(value);
iihf(dst, Operand(hi_32));
iilf(dst, Operand(lo_32));
}
}
......
......@@ -245,8 +245,10 @@ class MacroAssembler : public Assembler {
// Add (Register - Immediate)
void Add32(Register dst, const Operand& imm);
void Add32_RI(Register dst, const Operand& imm);
void AddP(Register dst, const Operand& imm);
void Add32(Register dst, Register src, const Operand& imm);
void Add32_RRI(Register dst, Register src, const Operand& imm);
void AddP(Register dst, Register src, const Operand& imm);
// Add (Register - Register)
......@@ -282,8 +284,12 @@ class MacroAssembler : public Assembler {
// Subtract (Register - Immediate)
void Sub32(Register dst, const Operand& imm);
void Sub32_RI(Register dst, const Operand& imm) { Sub32(dst, imm); }
void SubP(Register dst, const Operand& imm);
void Sub32(Register dst, Register src, const Operand& imm);
void Sub32_RRI(Register dst, Register src, const Operand& imm) {
Sub32(dst, src, imm);
}
void SubP(Register dst, Register src, const Operand& imm);
// Subtract (Register - Register)
......@@ -316,6 +322,14 @@ class MacroAssembler : public Assembler {
void Mul32(Register dst, const MemOperand& src1);
void Mul32(Register dst, Register src1);
void Mul32(Register dst, const Operand& src1);
void MulHigh32(Register dst, Register src1, const MemOperand& src2);
void MulHigh32(Register dst, Register src1, Register src2);
void MulHigh32(Register dst, Register src1, const Operand& src2);
void Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
const MemOperand& src2);
void Mul32WithOverflowIfCCUnequal(Register dst, Register src1, Register src2);
void Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
const Operand& src2);
void Mul64(Register dst, const MemOperand& src1);
void Mul64(Register dst, Register src1);
void Mul64(Register dst, const Operand& src1);
......
......@@ -10117,15 +10117,26 @@ EVALUATE(SLGFR) {
}
EVALUATE(MSGFR) {
UNIMPLEMENTED();
USE(instr);
return 0;
DCHECK_OPCODE(MSGFR);
DECODE_RRE_INSTRUCTION(r1, r2);
int64_t r1_val = get_register(r1);
int64_t r2_val = static_cast<int64_t>(get_low_register<int32_t>(r2));
int64_t product = r1_val * r2_val;
set_register(r1, product);
return length;
}
EVALUATE(DSGFR) {
UNIMPLEMENTED();
USE(instr);
return 0;
DCHECK_OPCODE(DSGFR);
DECODE_RRE_INSTRUCTION(r1, r2);
DCHECK(r1 % 2 == 0);
int64_t r1_val = get_register(r1 + 1);
int64_t r2_val = static_cast<int64_t>(get_low_register<int32_t>(r2));
int64_t quotient = r1_val / r2_val;
int64_t remainder = r1_val % r2_val;
set_register(r1, remainder);
set_register(r1 + 1, quotient);
return length;
}
EVALUATE(KMAC) {
......@@ -10201,9 +10212,13 @@ EVALUATE(KMC) {
}
EVALUATE(CGFR) {
UNIMPLEMENTED();
USE(instr);
return 0;
DCHECK_OPCODE(CGFR);
DECODE_RRE_INSTRUCTION(r1, r2);
// Compare (64)
int64_t r1_val = get_register(r1);
int64_t r2_val = static_cast<int64_t>(get_low_register<int32_t>(r2));
SetS390ConditionCode<int64_t>(r1_val, r2_val);
return length;
}
EVALUATE(KIMD) {
......@@ -11031,15 +11046,34 @@ EVALUATE(SLGF) {
}
EVALUATE(MSGF) {
UNIMPLEMENTED();
USE(instr);
return 0;
DCHECK_OPCODE(MSGF);
DECODE_RXY_A_INSTRUCTION(r1, x2, b2, d2);
int64_t b2_val = (b2 == 0) ? 0 : get_register(b2);
int64_t x2_val = (x2 == 0) ? 0 : get_register(x2);
intptr_t d2_val = d2;
int64_t mem_val =
static_cast<int64_t>(ReadW(b2_val + d2_val + x2_val, instr));
int64_t r1_val = get_register(r1);
int64_t product = r1_val * mem_val;
set_register(r1, product);
return length;
}
EVALUATE(DSGF) {
UNIMPLEMENTED();
USE(instr);
return 0;
DCHECK_OPCODE(DSGF);
DECODE_RXY_A_INSTRUCTION(r1, x2, b2, d2);
DCHECK(r1 % 2 == 0);
int64_t b2_val = (b2 == 0) ? 0 : get_register(b2);
int64_t x2_val = (x2 == 0) ? 0 : get_register(x2);
intptr_t d2_val = d2;
int64_t mem_val =
static_cast<int64_t>(ReadW(b2_val + d2_val + x2_val, instr));
int64_t r1_val = get_register(r1 + 1);
int64_t quotient = r1_val / mem_val;
int64_t remainder = r1_val % mem_val;
set_register(r1, remainder);
set_register(r1 + 1, quotient);
return length;
}
EVALUATE(LRVG) {
......@@ -11598,9 +11632,20 @@ EVALUATE(ML) {
}
EVALUATE(DL) {
UNIMPLEMENTED();
USE(instr);
return 0;
DCHECK_OPCODE(ML);
DECODE_RXY_A_INSTRUCTION(r1, x2, b2, d2);
int64_t x2_val = (x2 == 0) ? 0 : get_register(x2);
int64_t b2_val = (b2 == 0) ? 0 : get_register(b2);
DCHECK(r1 % 2 == 0);
uint32_t mem_val = ReadWU(b2_val + x2_val + d2, instr);
uint32_t r1_val = get_low_register<uint32_t>(r1 + 1);
uint64_t quotient =
static_cast<uint64_t>(r1_val) / static_cast<uint64_t>(mem_val);
uint64_t remainder =
static_cast<uint64_t>(r1_val) % static_cast<uint64_t>(mem_val);
set_low_register(r1, remainder);
set_low_register(r1 + 1, quotient);
return length;
}
EVALUATE(ALC) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment