Commit 37b461a9 authored by Miran.Karic's avatar Miran.Karic Committed by Commit Bot

MIPS64: Add optimizations to li and Dsubu macro.

Here we optimize Dsubu by instead of loading imm and subtracting, we
load -imm and perform addition when loading -imm takes less instructions
than loading imm. Similarily li is optimized by loading -imm and
performing addition or loading ~imm and inverting bits using nor when
one of these loads takes two instructions less than loading imm, saving
at least one instruction. Tests are adjusted to cover these
optimizations.

BUG=
TEST=cctest/test-assembler-mips/li_macro
     cctest/test-assembler-mips/Dsubu

Review-Url: https://codereview.chromium.org/2909913002
Cr-Commit-Position: refs/heads/master@{#46001}
parent 79fe6e3e
...@@ -658,16 +658,17 @@ void MacroAssembler::Subu(Register rd, Register rs, const Operand& rt) { ...@@ -658,16 +658,17 @@ void MacroAssembler::Subu(Register rd, Register rs, const Operand& rt) {
addiu(rd, rs, addiu(rd, rs,
static_cast<int32_t>( static_cast<int32_t>(
-rt.imm64_)); // No subiu instr, use addiu(x, y, -imm). -rt.imm64_)); // No subiu instr, use addiu(x, y, -imm).
} else if (-rt.imm64_ >> 16 == 0 && !MustUseReg(rt.rmode_)) {
// Use load -imm and addu when loading -imm generates one instruction.
DCHECK(!rs.is(at));
li(at, -rt.imm64_);
addu(rd, rs, at);
} else { } else {
// li handles the relocation.
DCHECK(!rs.is(at)); DCHECK(!rs.is(at));
li(at, rt); if (-rt.imm64_ >> 16 == 0 && !MustUseReg(rt.rmode_)) {
subu(rd, rs, at); // Use load -imm and addu when loading -imm generates one instruction.
li(at, -rt.imm64_);
addu(rd, rs, at);
} else {
// li handles the relocation.
li(at, rt);
subu(rd, rs, at);
}
} }
} }
} }
...@@ -676,19 +677,21 @@ void MacroAssembler::Subu(Register rd, Register rs, const Operand& rt) { ...@@ -676,19 +677,21 @@ void MacroAssembler::Subu(Register rd, Register rs, const Operand& rt) {
void MacroAssembler::Dsubu(Register rd, Register rs, const Operand& rt) { void MacroAssembler::Dsubu(Register rd, Register rs, const Operand& rt) {
if (rt.is_reg()) { if (rt.is_reg()) {
dsubu(rd, rs, rt.rm()); dsubu(rd, rs, rt.rm());
} else if (is_int16(-rt.imm64_) && !MustUseReg(rt.rmode_)) {
daddiu(rd, rs,
static_cast<int32_t>(
-rt.imm64_)); // No dsubiu instr, use daddiu(x, y, -imm).
} else { } else {
if (is_int16(-rt.imm64_) && !MustUseReg(rt.rmode_)) { DCHECK(!rs.is(at));
daddiu(rd, rs, int li_count = InstrCountForLi64Bit(rt.imm64_);
static_cast<int32_t>( int li_neg_count = InstrCountForLi64Bit(-rt.imm64_);
-rt.imm64_)); // No dsubiu instr, use daddiu(x, y, -imm). if (li_neg_count < li_count && !MustUseReg(rt.rmode_)) {
} else if (-rt.imm64_ >> 16 == 0 && !MustUseReg(rt.rmode_)) {
// Use load -imm and daddu when loading -imm generates one instruction. // Use load -imm and daddu when loading -imm generates one instruction.
DCHECK(!rs.is(at)); DCHECK(rt.imm64_ != std::numeric_limits<int32_t>::min());
li(at, -rt.imm64_); li(at, Operand(-rt.imm64_));
daddu(rd, rs, at); Daddu(rd, rs, at);
} else { } else {
// li handles the relocation. // li handles the relocation.
DCHECK(!rs.is(at));
li(at, rt); li(at, rt);
dsubu(rd, rs, at); dsubu(rd, rs, at);
} }
...@@ -1710,6 +1713,15 @@ void MacroAssembler::li(Register dst, Handle<Object> value, LiFlags mode) { ...@@ -1710,6 +1713,15 @@ void MacroAssembler::li(Register dst, Handle<Object> value, LiFlags mode) {
li(dst, Operand(value), mode); li(dst, Operand(value), mode);
} }
static inline int InstrCountForLiLower32Bit(int64_t value) {
if (!is_int16(static_cast<int32_t>(value)) && (value & kUpper16MaskOf64) &&
(value & kImm16Mask)) {
return 2;
} else {
return 1;
}
}
void MacroAssembler::LiLower32BitHelper(Register rd, Operand j) { void MacroAssembler::LiLower32BitHelper(Register rd, Operand j) {
if (is_int16(static_cast<int32_t>(j.imm64_))) { if (is_int16(static_cast<int32_t>(j.imm64_))) {
daddiu(rd, zero_reg, (j.imm64_ & kImm16Mask)); daddiu(rd, zero_reg, (j.imm64_ & kImm16Mask));
...@@ -1734,180 +1746,293 @@ static inline int InstrCountForLoadReplicatedConst32(int64_t value) { ...@@ -1734,180 +1746,293 @@ static inline int InstrCountForLoadReplicatedConst32(int64_t value) {
return INT_MAX; return INT_MAX;
} }
void MacroAssembler::li(Register rd, Operand j, LiFlags mode) { int MacroAssembler::InstrCountForLi64Bit(int64_t value) {
if (is_int32(value)) {
return InstrCountForLiLower32Bit(value);
} else {
int bit31 = value >> 31 & 0x1;
if ((value & kUpper16MaskOf64) == 0 && is_int16(value >> 32) &&
kArchVariant == kMips64r6) {
return 2;
} else if ((value & (kHigher16MaskOf64 | kUpper16MaskOf64)) == 0 &&
kArchVariant == kMips64r6) {
return 2;
} else if ((value & kImm16Mask) == 0 && is_int16((value >> 32) + bit31) &&
kArchVariant == kMips64r6) {
return 2;
} else if ((value & kImm16Mask) == 0 &&
((value >> 31) & 0x1ffff) == ((0x20000 - bit31) & 0x1ffff) &&
kArchVariant == kMips64r6) {
return 2;
} else if (is_int16(static_cast<int32_t>(value)) &&
is_int16((value >> 32) + bit31) && kArchVariant == kMips64r6) {
return 2;
} else if (is_int16(static_cast<int32_t>(value)) &&
((value >> 31) & 0x1ffff) == ((0x20000 - bit31) & 0x1ffff) &&
kArchVariant == kMips64r6) {
return 2;
} else if (base::bits::IsPowerOfTwo64(value + 1)) {
return 2;
} else {
int shift_cnt = base::bits::CountTrailingZeros64(value);
int rep32_count = InstrCountForLoadReplicatedConst32(value);
int64_t tmp = value >> shift_cnt;
if (is_uint16(tmp)) {
return 2;
} else if (is_int16(tmp)) {
return 2;
} else if (rep32_count < 3) {
return 2;
} else if (is_int32(tmp)) {
return 3;
} else {
shift_cnt = 16 + base::bits::CountTrailingZeros64(value >> 16);
tmp = value >> shift_cnt;
if (is_uint16(tmp)) {
return 3;
} else if (is_int16(tmp)) {
return 3;
} else if (rep32_count < 4) {
return 3;
} else if (kArchVariant == kMips64r6) {
int64_t imm = value;
int count = InstrCountForLiLower32Bit(imm);
imm = (imm >> 32) + bit31;
if (imm & kImm16Mask) {
count++;
}
imm = (imm >> 16) + (imm >> 15 & 0x1);
if (imm & kImm16Mask) {
count++;
}
return count;
} else {
if (is_int48(value)) {
int64_t k = value >> 16;
int count = InstrCountForLiLower32Bit(k) + 1;
if (value & kImm16Mask) {
count++;
}
return count;
} else {
int64_t k = value >> 32;
int count = InstrCountForLiLower32Bit(k);
if ((value >> 16) & kImm16Mask) {
count += 3;
if (value & kImm16Mask) {
count++;
}
} else {
count++;
if (value & kImm16Mask) {
count++;
}
}
return count;
}
}
}
}
}
UNREACHABLE();
return INT_MAX;
}
void MacroAssembler::li_optimized(Register rd, Operand j, LiFlags mode) {
DCHECK(!j.is_reg()); DCHECK(!j.is_reg());
DCHECK(!MustUseReg(j.rmode_));
DCHECK(mode == OPTIMIZE_SIZE);
BlockTrampolinePoolScope block_trampoline_pool(this); BlockTrampolinePoolScope block_trampoline_pool(this);
if (!MustUseReg(j.rmode_) && mode == OPTIMIZE_SIZE) { // Normal load of an immediate value which does not need Relocation Info.
// Normal load of an immediate value which does not need Relocation Info. if (is_int32(j.imm64_)) {
if (is_int32(j.imm64_)) { LiLower32BitHelper(rd, j);
LiLower32BitHelper(rd, j); } else {
int bit31 = j.imm64_ >> 31 & 0x1;
if ((j.imm64_ & kUpper16MaskOf64) == 0 && is_int16(j.imm64_ >> 32) &&
kArchVariant == kMips64r6) {
// 64-bit value which consists of an unsigned 16-bit value in its
// least significant 32-bits, and a signed 16-bit value in its
// most significant 32-bits.
ori(rd, zero_reg, j.imm64_ & kImm16Mask);
dahi(rd, j.imm64_ >> 32 & kImm16Mask);
} else if ((j.imm64_ & (kHigher16MaskOf64 | kUpper16MaskOf64)) == 0 &&
kArchVariant == kMips64r6) {
// 64-bit value which consists of an unsigned 16-bit value in its
// least significant 48-bits, and a signed 16-bit value in its
// most significant 16-bits.
ori(rd, zero_reg, j.imm64_ & kImm16Mask);
dati(rd, j.imm64_ >> 48 & kImm16Mask);
} else if ((j.imm64_ & kImm16Mask) == 0 &&
is_int16((j.imm64_ >> 32) + bit31) &&
kArchVariant == kMips64r6) {
// 16 LSBs (Least Significant Bits) all set to zero.
// 48 MSBs (Most Significant Bits) hold a signed 32-bit value.
lui(rd, j.imm64_ >> kLuiShift & kImm16Mask);
dahi(rd, ((j.imm64_ >> 32) + bit31) & kImm16Mask);
} else if ((j.imm64_ & kImm16Mask) == 0 &&
((j.imm64_ >> 31) & 0x1ffff) == ((0x20000 - bit31) & 0x1ffff) &&
kArchVariant == kMips64r6) {
// 16 LSBs all set to zero.
// 48 MSBs hold a signed value which can't be represented by signed
// 32-bit number, and the middle 16 bits are all zero, or all one.
lui(rd, j.imm64_ >> kLuiShift & kImm16Mask);
dati(rd, ((j.imm64_ >> 48) + bit31) & kImm16Mask);
} else if (is_int16(static_cast<int32_t>(j.imm64_)) &&
is_int16((j.imm64_ >> 32) + bit31) &&
kArchVariant == kMips64r6) {
// 32 LSBs contain a signed 16-bit number.
// 32 MSBs contain a signed 16-bit number.
daddiu(rd, zero_reg, j.imm64_ & kImm16Mask);
dahi(rd, ((j.imm64_ >> 32) + bit31) & kImm16Mask);
} else if (is_int16(static_cast<int32_t>(j.imm64_)) &&
((j.imm64_ >> 31) & 0x1ffff) == ((0x20000 - bit31) & 0x1ffff) &&
kArchVariant == kMips64r6) {
// 48 LSBs contain an unsigned 16-bit number.
// 16 MSBs contain a signed 16-bit number.
daddiu(rd, zero_reg, j.imm64_ & kImm16Mask);
dati(rd, ((j.imm64_ >> 48) + bit31) & kImm16Mask);
} else if (base::bits::IsPowerOfTwo64(j.imm64_ + 1)) {
// 64-bit values which have their "n" MSBs set to one, and their
// "64-n" LSBs set to zero. "n" must meet the restrictions 0 < n < 64.
int shift_cnt = 64 - base::bits::CountTrailingZeros64(j.imm64_ + 1);
daddiu(rd, zero_reg, -1);
if (shift_cnt < 32) {
dsrl(rd, rd, shift_cnt);
} else {
dsrl32(rd, rd, shift_cnt & 31);
}
} else { } else {
int bit31 = j.imm64_ >> 31 & 0x1; int shift_cnt = base::bits::CountTrailingZeros64(j.imm64_);
int rep32_count = InstrCountForLoadReplicatedConst32(j.imm64_); int rep32_count = InstrCountForLoadReplicatedConst32(j.imm64_);
if ((j.imm64_ & kUpper16MaskOf64) == 0 && is_int16(j.imm64_ >> 32) && int64_t tmp = j.imm64_ >> shift_cnt;
kArchVariant == kMips64r6) { if (is_uint16(tmp)) {
// 64-bit value which consists of an unsigned 16-bit value in its // Value can be computed by loading a 16-bit unsigned value, and
// least significant 32-bits, and a signed 16-bit value in its // then shifting left.
// most significant 32-bits. ori(rd, zero_reg, tmp & kImm16Mask);
ori(rd, zero_reg, j.imm64_ & kImm16Mask); if (shift_cnt < 32) {
dahi(rd, j.imm64_ >> 32 & kImm16Mask); dsll(rd, rd, shift_cnt);
} else if ((j.imm64_ & (kHigher16MaskOf64 | kUpper16MaskOf64)) == 0 && } else {
kArchVariant == kMips64r6) { dsll32(rd, rd, shift_cnt & 31);
// 64-bit value which consists of an unsigned 16-bit value in its }
// least significant 48-bits, and a signed 16-bit value in its } else if (is_int16(tmp)) {
// most significant 16-bits. // Value can be computed by loading a 16-bit signed value, and
ori(rd, zero_reg, j.imm64_ & kImm16Mask); // then shifting left.
dati(rd, j.imm64_ >> 48 & kImm16Mask); daddiu(rd, zero_reg, static_cast<int32_t>(tmp));
} else if ((j.imm64_ & kImm16Mask) == 0 &&
is_int16((j.imm64_ >> 32) + bit31) &&
kArchVariant == kMips64r6) {
// 16 LSBs (Least Significant Bits) all set to zero.
// 48 MSBs (Most Significant Bits) hold a signed 32-bit value.
lui(rd, j.imm64_ >> kLuiShift & kImm16Mask);
dahi(rd, ((j.imm64_ >> 32) + bit31) & kImm16Mask);
} else if ((j.imm64_ & kImm16Mask) == 0 &&
((j.imm64_ >> 31) & 0x1ffff) ==
((0x20000 - bit31) & 0x1ffff) &&
kArchVariant == kMips64r6) {
// 16 LSBs all set to zero.
// 48 MSBs hold a signed value which can't be represented by signed
// 32-bit number, and the middle 16 bits are all zero, or all one.
lui(rd, j.imm64_ >> kLuiShift & kImm16Mask);
dati(rd, ((j.imm64_ >> 48) + bit31) & kImm16Mask);
} else if (is_int16(static_cast<int32_t>(j.imm64_)) &&
is_int16((j.imm64_ >> 32) + bit31) &&
kArchVariant == kMips64r6) {
// 32 LSBs contain a signed 16-bit number.
// 32 MSBs contain a signed 16-bit number.
daddiu(rd, zero_reg, j.imm64_ & kImm16Mask);
dahi(rd, ((j.imm64_ >> 32) + bit31) & kImm16Mask);
} else if (is_int16(static_cast<int32_t>(j.imm64_)) &&
((j.imm64_ >> 31) & 0x1ffff) ==
((0x20000 - bit31) & 0x1ffff) &&
kArchVariant == kMips64r6) {
// 48 LSBs contain an unsigned 16-bit number.
// 16 MSBs contain a signed 16-bit number.
daddiu(rd, zero_reg, j.imm64_ & kImm16Mask);
dati(rd, ((j.imm64_ >> 48) + bit31) & kImm16Mask);
} else if (base::bits::IsPowerOfTwo64(j.imm64_ + 1)) {
// 64-bit values which have their "n" MSBs set to one, and their
// "64-n" LSBs set to zero. "n" must meet the restrictions 0 < n < 64.
int shift_cnt = 64 - base::bits::CountTrailingZeros64(j.imm64_ + 1);
daddiu(rd, zero_reg, -1);
if (shift_cnt < 32) { if (shift_cnt < 32) {
dsrl(rd, rd, shift_cnt); dsll(rd, rd, shift_cnt);
} else { } else {
dsrl32(rd, rd, shift_cnt & 31); dsll32(rd, rd, shift_cnt & 31);
}
} else if (rep32_count < 3) {
// Value being loaded has 32 LSBs equal to the 32 MSBs, and the
// value loaded into the 32 LSBs can be loaded with a single
// MIPS instruction.
LiLower32BitHelper(rd, j);
Dins(rd, rd, 32, 32);
} else if (is_int32(tmp)) {
// Loads with 3 instructions.
// Value can be computed by loading a 32-bit signed value, and
// then shifting left.
lui(rd, tmp >> kLuiShift & kImm16Mask);
ori(rd, rd, tmp & kImm16Mask);
if (shift_cnt < 32) {
dsll(rd, rd, shift_cnt);
} else {
dsll32(rd, rd, shift_cnt & 31);
} }
} else { } else {
int shift_cnt = base::bits::CountTrailingZeros64(j.imm64_); shift_cnt = 16 + base::bits::CountTrailingZeros64(j.imm64_ >> 16);
int64_t tmp = j.imm64_ >> shift_cnt; tmp = j.imm64_ >> shift_cnt;
if (is_uint16(tmp)) { if (is_uint16(tmp)) {
// Value can be computed by loading a 16-bit unsigned value, and // Value can be computed by loading a 16-bit unsigned value,
// then shifting left. // shifting left, and "or"ing in another 16-bit unsigned value.
ori(rd, zero_reg, tmp & kImm16Mask); ori(rd, zero_reg, tmp & kImm16Mask);
if (shift_cnt < 32) { if (shift_cnt < 32) {
dsll(rd, rd, shift_cnt); dsll(rd, rd, shift_cnt);
} else { } else {
dsll32(rd, rd, shift_cnt & 31); dsll32(rd, rd, shift_cnt & 31);
} }
ori(rd, rd, j.imm64_ & kImm16Mask);
} else if (is_int16(tmp)) { } else if (is_int16(tmp)) {
// Value can be computed by loading a 16-bit signed value, and // Value can be computed by loading a 16-bit signed value,
// then shifting left. // shifting left, and "or"ing in a 16-bit unsigned value.
daddiu(rd, zero_reg, static_cast<int32_t>(tmp)); daddiu(rd, zero_reg, static_cast<int32_t>(tmp));
if (shift_cnt < 32) { if (shift_cnt < 32) {
dsll(rd, rd, shift_cnt); dsll(rd, rd, shift_cnt);
} else { } else {
dsll32(rd, rd, shift_cnt & 31); dsll32(rd, rd, shift_cnt & 31);
} }
} else if (rep32_count < 3) { ori(rd, rd, j.imm64_ & kImm16Mask);
} else if (rep32_count < 4) {
// Value being loaded has 32 LSBs equal to the 32 MSBs, and the // Value being loaded has 32 LSBs equal to the 32 MSBs, and the
// value loaded into the 32 LSBs can be loaded with a single // value in the 32 LSBs requires 2 MIPS instructions to load.
// MIPS instruction.
LiLower32BitHelper(rd, j); LiLower32BitHelper(rd, j);
Dins(rd, rd, 32, 32); Dins(rd, rd, 32, 32);
} else if (is_int32(tmp)) { } else if (kArchVariant == kMips64r6) {
// Loads with 3 instructions. // Loads with 3-4 instructions.
// Value can be computed by loading a 32-bit signed value, and // Catch-all case to get any other 64-bit values which aren't
// then shifting left. // handled by special cases above.
lui(rd, tmp >> kLuiShift & kImm16Mask); int64_t imm = j.imm64_;
ori(rd, rd, tmp & kImm16Mask); LiLower32BitHelper(rd, j);
if (shift_cnt < 32) { imm = (imm >> 32) + bit31;
dsll(rd, rd, shift_cnt); if (imm & kImm16Mask) {
} else { dahi(rd, imm & kImm16Mask);
dsll32(rd, rd, shift_cnt & 31); }
imm = (imm >> 16) + (imm >> 15 & 0x1);
if (imm & kImm16Mask) {
dati(rd, imm & kImm16Mask);
} }
} else { } else {
shift_cnt = 16 + base::bits::CountTrailingZeros64(j.imm64_ >> 16); if (is_int48(j.imm64_)) {
tmp = j.imm64_ >> shift_cnt; Operand k = Operand(j.imm64_ >> 16);
if (is_uint16(tmp)) { LiLower32BitHelper(rd, k);
// Value can be computed by loading a 16-bit unsigned value, dsll(rd, rd, 16);
// shifting left, and "or"ing in another 16-bit unsigned value. if (j.imm64_ & kImm16Mask) {
ori(rd, zero_reg, tmp & kImm16Mask); ori(rd, rd, j.imm64_ & kImm16Mask);
if (shift_cnt < 32) {
dsll(rd, rd, shift_cnt);
} else {
dsll32(rd, rd, shift_cnt & 31);
}
ori(rd, rd, j.imm64_ & kImm16Mask);
} else if (is_int16(tmp)) {
// Value can be computed by loading a 16-bit signed value,
// shifting left, and "or"ing in a 16-bit unsigned value.
daddiu(rd, zero_reg, static_cast<int32_t>(tmp));
if (shift_cnt < 32) {
dsll(rd, rd, shift_cnt);
} else {
dsll32(rd, rd, shift_cnt & 31);
}
ori(rd, rd, j.imm64_ & kImm16Mask);
} else if (rep32_count < 4) {
// Value being loaded has 32 LSBs equal to the 32 MSBs, and the
// value in the 32 LSBs requires 2 MIPS instructions to load.
LiLower32BitHelper(rd, j);
Dins(rd, rd, 32, 32);
} else if (kArchVariant == kMips64r6) {
// Loads with 3-4 instructions.
// Catch-all case to get any other 64-bit values which aren't
// handled by special cases above.
int64_t imm = j.imm64_;
LiLower32BitHelper(rd, j);
imm = (imm >> 32) + bit31;
if (imm & kImm16Mask) {
dahi(rd, imm & kImm16Mask);
}
imm = (imm >> 16) + (imm >> 15 & 0x1);
if (imm & kImm16Mask) {
dati(rd, imm & kImm16Mask);
} }
} else { } else {
if (is_int48(j.imm64_)) { Operand k = Operand(j.imm64_ >> 32);
Operand k = Operand(j.imm64_ >> 16); LiLower32BitHelper(rd, k);
LiLower32BitHelper(rd, k); if ((j.imm64_ >> 16) & kImm16Mask) {
dsll(rd, rd, 16);
ori(rd, rd, (j.imm64_ >> 16) & kImm16Mask);
dsll(rd, rd, 16); dsll(rd, rd, 16);
if (j.imm64_ & kImm16Mask) { if (j.imm64_ & kImm16Mask) {
ori(rd, rd, j.imm64_ & kImm16Mask); ori(rd, rd, j.imm64_ & kImm16Mask);
} }
} else { } else {
Operand k = Operand(j.imm64_ >> 32); dsll32(rd, rd, 0);
LiLower32BitHelper(rd, k); if (j.imm64_ & kImm16Mask) {
if ((j.imm64_ >> 16) & kImm16Mask) { ori(rd, rd, j.imm64_ & kImm16Mask);
dsll(rd, rd, 16);
ori(rd, rd, (j.imm64_ >> 16) & kImm16Mask);
dsll(rd, rd, 16);
if (j.imm64_ & kImm16Mask) {
ori(rd, rd, j.imm64_ & kImm16Mask);
}
} else {
dsll32(rd, rd, 0);
if (j.imm64_ & kImm16Mask) {
ori(rd, rd, j.imm64_ & kImm16Mask);
}
} }
} }
} }
} }
} }
} }
}
}
void MacroAssembler::li(Register rd, Operand j, LiFlags mode) {
DCHECK(!j.is_reg());
BlockTrampolinePoolScope block_trampoline_pool(this);
if (!MustUseReg(j.rmode_) && mode == OPTIMIZE_SIZE) {
int li_count = InstrCountForLi64Bit(j.imm64_);
int li_neg_count = InstrCountForLi64Bit(-j.imm64_);
int li_not_count = InstrCountForLi64Bit(~j.imm64_);
// Loading -MIN_INT64 could cause problems, but loading MIN_INT64 takes only
// two instructions so no need to check for this.
if (li_neg_count <= li_not_count && li_neg_count < li_count - 1) {
DCHECK(j.imm64_ != std::numeric_limits<int64_t>::min());
li_optimized(rd, Operand(-j.imm64_), mode);
Dsubu(rd, zero_reg, rd);
} else if (li_neg_count > li_not_count && li_not_count < li_count - 1) {
DCHECK(j.imm64_ != std::numeric_limits<int64_t>::min());
li_optimized(rd, Operand(~j.imm64_), mode);
nor(rd, rd, rd);
} else {
li_optimized(rd, j, mode);
}
} else if (MustUseReg(j.rmode_)) { } else if (MustUseReg(j.rmode_)) {
RecordRelocInfo(j.rmode_, j.imm64_); RecordRelocInfo(j.rmode_, j.imm64_);
lui(rd, (j.imm64_ >> 32) & kImm16Mask); lui(rd, (j.imm64_ >> 32) & kImm16Mask);
......
...@@ -740,6 +740,8 @@ class MacroAssembler: public Assembler { ...@@ -740,6 +740,8 @@ class MacroAssembler: public Assembler {
// Load int32 in the rd register. // Load int32 in the rd register.
void li(Register rd, Operand j, LiFlags mode = OPTIMIZE_SIZE); void li(Register rd, Operand j, LiFlags mode = OPTIMIZE_SIZE);
void li_optimized(Register rd, Operand j, LiFlags mode = OPTIMIZE_SIZE);
static int InstrCountForLi64Bit(int64_t value);
inline void LiLower32BitHelper(Register rd, Operand j); inline void LiLower32BitHelper(Register rd, Operand j);
inline void li(Register rd, int64_t j, LiFlags mode = OPTIMIZE_SIZE) { inline void li(Register rd, int64_t j, LiFlags mode = OPTIMIZE_SIZE) {
li(rd, Operand(j), mode); li(rd, Operand(j), mode);
......
...@@ -5071,6 +5071,9 @@ uint64_t run_li_macro(uint64_t imm, LiFlags mode, int32_t num_instr = 0) { ...@@ -5071,6 +5071,9 @@ uint64_t run_li_macro(uint64_t imm, LiFlags mode, int32_t num_instr = 0) {
assm.GetCode(isolate, &desc); assm.GetCode(isolate, &desc);
Handle<Code> code = isolate->factory()->NewCode( Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>()); desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
F2 f = FUNCTION_CAST<F2>(code->entry()); F2 f = FUNCTION_CAST<F2>(code->entry());
uint64_t res = reinterpret_cast<uint64_t>( uint64_t res = reinterpret_cast<uint64_t>(
...@@ -5123,8 +5126,10 @@ TEST(li_macro) { ...@@ -5123,8 +5126,10 @@ TEST(li_macro) {
{0x00000001fffffffe, 4, 2}, // max_uint32 << 1 {0x00000001fffffffe, 4, 2}, // max_uint32 << 1
// r2 - lui + ori + dsll + ori // r2 - lui + ori + dsll + ori
// r6 - daddiu + dahi // r6 - daddiu + dahi
{0x0000fffffffffffe, 5, 2}, // max_uint48 - 1 {0x0000fffffffffffe, 4, 2}, // max_uint48 - 1
// r2 - ori + dsll + ori + dsll + ori // r2 - daddiu + dsll32 + ori + dsubu
// Loading imm directly would require ori + dsll + ori + dsll + ori.
// Optimized by loading -imm and using dsubu to get imm.
// r6 - daddiu + dati // r6 - daddiu + dati
{0xffffffff00000000, 2, 2}, // max_uint32 << 32 {0xffffffff00000000, 2, 2}, // max_uint32 << 32
// r2 - daddiu + dsll32 // r2 - daddiu + dsll32
...@@ -5151,6 +5156,9 @@ TEST(li_macro) { ...@@ -5151,6 +5156,9 @@ TEST(li_macro) {
{0xffff8000ffff0000, 3, 2}, {0xffff8000ffff0000, 3, 2},
// r2 - lui + ori + dsll // r2 - lui + ori + dsll
// r6 - lui + dahi // r6 - lui + dahi
{0x0000ffffffff0000, 4, 2},
// r2 - ori + dsll + ori + dsll
// r6 - lui + dati
{0x1234ffff80000000, 3, 2}, {0x1234ffff80000000, 3, 2},
// r2 - lui + ori + dsll // r2 - lui + ori + dsll
// r6 - lui + dati // r6 - lui + dati
...@@ -5160,8 +5168,10 @@ TEST(li_macro) { ...@@ -5160,8 +5168,10 @@ TEST(li_macro) {
{0xffff8000ffff8000, 2, 2}, {0xffff8000ffff8000, 2, 2},
// r2 - daddiu + dinsu // r2 - daddiu + dinsu
// r6 - daddiu + dahi // r6 - daddiu + dahi
{0xffff0000ffff8000, 5, 3}, {0xffff0000ffff8000, 4, 3},
// r2 - lui + dsll + ori + dsll + ori // r2 - ori + dsll32 + ori + dsubu
// Loading imm directly would require lui + dsll + ori + dsll + ori.
// Optimized by loading -imm and using dsubu to get imm.
// r6 - daddiu + dahi + dati // r6 - daddiu + dahi + dati
{0x8000000080000000, 2, 2}, {0x8000000080000000, 2, 2},
// lui + dinsu // lui + dinsu
...@@ -5180,11 +5190,15 @@ TEST(li_macro) { ...@@ -5180,11 +5190,15 @@ TEST(li_macro) {
{0x1ffffabcd, 4, 2}, {0x1ffffabcd, 4, 2},
// r2 - lui + ori + dsll + ori // r2 - lui + ori + dsll + ori
// r6 - daddiu + dahi // r6 - daddiu + dahi
{0xffffffffabcd, 5, 2}, {0xffffffffabcd, 4, 2},
// r2 - ori + dsll + ori + dsll + ori // r2 - daddiu + dsll32 + ori + dsubu
// Loading imm directly would require ori + dsll + ori + dsll + ori.
// Optimized by loading -imm and using dsubu to get imm.
// r6 - daddiu + dati // r6 - daddiu + dati
{0x1ffffffffabcd, 6, 2}, {0x1ffffffffabcd, 4, 2},
// r2 - lui + ori + dsll + ori + dsll + ori // r2 - daddiu + dsll32 + ori + dsubu
// Loading imm directly would require lui + ori + dsll + ori + dsll + ori.
// Optimized by loading -imm and using dsubu to get imm.
// r6 - daddiu + dati // r6 - daddiu + dati
{0xffff7fff80010000, 5, 2}, {0xffff7fff80010000, 5, 2},
// r2 - lui + ori + dsll + ori + dsll // r2 - lui + ori + dsll + ori + dsll
...@@ -5209,6 +5223,12 @@ TEST(li_macro) { ...@@ -5209,6 +5223,12 @@ TEST(li_macro) {
// r2 - lui + ori + dsll + ori + dsll + ori instruction sequence, // r2 - lui + ori + dsll + ori + dsll + ori instruction sequence,
// r6 - lui + ori + dahi + dati. // r6 - lui + ori + dahi + dati.
// Load using full instruction sequence. // Load using full instruction sequence.
{0xffff0000ffffffff, 3, 3},
// r2 - ori + dsll32 + nor
// Loading imm directly would require lui + dsll + ori + dsll + ori.
// Optimized by loading ~imm and using nor to get imm. Loading -imm would
// require one instruction more.
// r6 - daddiu + dahi + dati
}; };
size_t nr_test_cases = sizeof(tc) / sizeof(TestCase_li); size_t nr_test_cases = sizeof(tc) / sizeof(TestCase_li);
...@@ -6199,6 +6219,9 @@ uint64_t run_Subu(uint64_t imm, int32_t num_instr) { ...@@ -6199,6 +6219,9 @@ uint64_t run_Subu(uint64_t imm, int32_t num_instr) {
assm.GetCode(isolate, &desc); assm.GetCode(isolate, &desc);
Handle<Code> code = isolate->factory()->NewCode( Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>()); desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
F2 f = FUNCTION_CAST<F2>(code->entry()); F2 f = FUNCTION_CAST<F2>(code->entry());
uint64_t res = reinterpret_cast<uint64_t>( uint64_t res = reinterpret_cast<uint64_t>(
...@@ -6278,6 +6301,9 @@ uint64_t run_Dsubu(uint64_t imm, int32_t num_instr) { ...@@ -6278,6 +6301,9 @@ uint64_t run_Dsubu(uint64_t imm, int32_t num_instr) {
assm.GetCode(isolate, &desc); assm.GetCode(isolate, &desc);
Handle<Code> code = isolate->factory()->NewCode( Handle<Code> code = isolate->factory()->NewCode(
desc, Code::ComputeFlags(Code::STUB), Handle<Code>()); desc, Code::ComputeFlags(Code::STUB), Handle<Code>());
#ifdef OBJECT_PRINT
code->Print(std::cout);
#endif
F2 f = FUNCTION_CAST<F2>(code->entry()); F2 f = FUNCTION_CAST<F2>(code->entry());
uint64_t res = reinterpret_cast<uint64_t>( uint64_t res = reinterpret_cast<uint64_t>(
...@@ -6341,6 +6367,12 @@ TEST(Dsubu) { ...@@ -6341,6 +6367,12 @@ TEST(Dsubu) {
// r6 - ori + dati + dsubu. // r6 - ori + dati + dsubu.
// The result of 0 - min_int64 eqauls max_int64 + 1, which wraps around to // The result of 0 - min_int64 eqauls max_int64 + 1, which wraps around to
// min_int64 again. // min_int64 again.
{0xffff0000ffffffff, 0x0000ffff00000001, 4},
// The test case above generates:
// r2 - ori + dsrl32 + ori + daddu instruction sequence,
// r6 - daddiu + dahi + dati + dsubu.
// For r2 loading imm would take more instructions than loading -imm so we
// can load -imm and add with daddu.
}; };
size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseDsubu); size_t nr_test_cases = sizeof(tc) / sizeof(TestCaseDsubu);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment