Commit f2da7ce0 authored by Lu Yahan's avatar Lu Yahan Committed by V8 LUCI CQ

[riscv64] Delete s10 from scratch_list

S10 is a Callee save register and be used in scratch_list.
In cctest, could use scratch but not does't go through the JSEntry function that can save callee save reg. So cctest could be crashed due to using s10.

Bug: v8:12124
Change-Id: I62c3582ad490681d5efb24e8bfe0884006d42e66
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3103425Reviewed-by: 's avatarJi Qiu <qiuji@iscas.ac.cn>
Commit-Queue: Ji Qiu <qiuji@iscas.ac.cn>
Auto-Submit: Yahan Lu <yahan@iscas.ac.cn>
Cr-Commit-Position: refs/heads/main@{#76375}
parent 67113424
......@@ -429,7 +429,6 @@ void BaselineAssembler::Switch(Register reg, int case_value_base,
// Mostly copied from code-generator-riscv64.cc
ScratchRegisterScope scope(this);
Register temp = scope.AcquireScratch();
Label table;
__ Branch(&fallthrough, AsMasmCondition(Condition::kUnsignedGreaterThanEqual),
reg, Operand(int64_t(num_labels)));
......@@ -438,12 +437,12 @@ void BaselineAssembler::Switch(Register reg, int case_value_base,
DCHECK(is_int32(imm64));
int32_t Hi20 = (((int32_t)imm64 + 0x800) >> 12);
int32_t Lo12 = (int32_t)imm64 << 20 >> 20;
__ auipc(temp, Hi20); // Read PC + Hi20 into t6
__ addi(temp, temp, Lo12); // jump PC + Hi20 + Lo12
__ auipc(t6, Hi20); // Read PC + Hi20 into t6
__ addi(t6, t6, Lo12); // jump PC + Hi20 + Lo12
int entry_size_log2 = 3;
__ CalcScaledAddress(temp, temp, reg, entry_size_log2);
__ Jump(temp);
__ CalcScaledAddress(t6, t6, reg, entry_size_log2);
__ Jump(t6);
{
TurboAssembler::BlockTrampolinePoolScope(masm());
__ BlockTrampolinePoolFor(num_labels * kInstrSize * 2);
......
......@@ -401,17 +401,15 @@ void Builtins::Generate_ResumeGeneratorTrampoline(MacroAssembler* masm) {
a3, FieldMemOperand(a4, JSFunction::kSharedFunctionInfoOffset));
__ Lhu(a3,
FieldMemOperand(a3, SharedFunctionInfo::kFormalParameterCountOffset));
UseScratchRegisterScope temps(masm);
Register scratch = temps.Acquire();
__ LoadTaggedPointerField(
scratch,
t1,
FieldMemOperand(a1, JSGeneratorObject::kParametersAndRegistersOffset));
{
Label done_loop, loop;
__ bind(&loop);
__ Sub64(a3, a3, Operand(1));
__ Branch(&done_loop, lt, a3, Operand(zero_reg), Label::Distance::kNear);
__ CalcScaledAddress(kScratchReg, scratch, a3, kTaggedSizeLog2);
__ CalcScaledAddress(kScratchReg, t1, a3, kTaggedSizeLog2);
__ LoadAnyTaggedField(
kScratchReg, FieldMemOperand(kScratchReg, FixedArray::kHeaderSize));
__ Push(kScratchReg);
......
......@@ -207,7 +207,7 @@ void Assembler::AllocateAndInstallRequestedHeapObjects(Isolate* isolate) {
Assembler::Assembler(const AssemblerOptions& options,
std::unique_ptr<AssemblerBuffer> buffer)
: AssemblerBase(options, std::move(buffer)),
scratch_register_list_(t3.bit() | t5.bit() | s10.bit()),
scratch_register_list_(t3.bit() | t5.bit()),
constpool_(this) {
reloc_info_writer.Reposition(buffer_start_ + buffer_->size(), pc_);
......
......@@ -1057,7 +1057,10 @@ void TurboAssembler::CalcScaledAddress(Register rd, Register rt, Register rs,
// ------------Pseudo-instructions-------------
// Change endianness
void TurboAssembler::ByteSwap(Register rd, Register rs, int operand_size) {
void TurboAssembler::ByteSwap(Register rd, Register rs, int operand_size,
Register scratch) {
DCHECK_NE(scratch, rs);
DCHECK_NE(scratch, rd);
DCHECK(operand_size == 4 || operand_size == 8);
if (operand_size == 4) {
// Uint32_t x1 = 0x00FF00FF;
......@@ -1068,7 +1071,7 @@ void TurboAssembler::ByteSwap(Register rd, Register rs, int operand_size) {
DCHECK((rd != t6) && (rs != t6));
Register x0 = temps.Acquire();
Register x1 = temps.Acquire();
Register x2 = temps.Acquire();
Register x2 = scratch;
li(x1, 0x00FF00FF);
slliw(x0, rs, 16);
srliw(rd, rs, 16);
......@@ -1090,7 +1093,7 @@ void TurboAssembler::ByteSwap(Register rd, Register rs, int operand_size) {
DCHECK((rd != t6) && (rs != t6));
Register x0 = temps.Acquire();
Register x1 = temps.Acquire();
Register x2 = temps.Acquire();
Register x2 = scratch;
li(x1, 0x0000FFFF0000FFFFl);
slli(x0, rs, 32);
srli(rd, rs, 32);
......@@ -1193,20 +1196,19 @@ void TurboAssembler::UnalignedLoadHelper(Register rd, const MemOperand& rs) {
}
template <int NBYTES>
void TurboAssembler::UnalignedFLoadHelper(FPURegister frd,
const MemOperand& rs) {
void TurboAssembler::UnalignedFLoadHelper(FPURegister frd, const MemOperand& rs,
Register scratch_base) {
DCHECK(NBYTES == 4 || NBYTES == 8);
DCHECK_NE(scratch_base, rs.rm());
BlockTrampolinePoolScope block_trampoline_pool(this);
MemOperand source = rs;
UseScratchRegisterScope temps(this);
Register scratch_base = temps.Acquire();
if (NeedAdjustBaseAndOffset(rs, OffsetAccessType::TWO_ACCESSES, NBYTES - 1)) {
// Adjust offset for two accesses and check if offset + 3 fits into int12.
DCHECK(scratch_base != rs.rm());
AdjustBaseAndOffset(&source, scratch_base, OffsetAccessType::TWO_ACCESSES,
NBYTES - 1);
}
UseScratchRegisterScope temps(this);
Register scratch_other = temps.Acquire();
Register scratch = temps.Acquire();
DCHECK(scratch != rs.rm() && scratch_other != scratch &&
......@@ -1258,10 +1260,10 @@ void TurboAssembler::UnalignedStoreHelper(Register rd, const MemOperand& rs,
template <int NBYTES>
void TurboAssembler::UnalignedFStoreHelper(FPURegister frd,
const MemOperand& rs) {
const MemOperand& rs,
Register scratch) {
DCHECK(NBYTES == 8 || NBYTES == 4);
UseScratchRegisterScope temps(this);
Register scratch = temps.Acquire();
DCHECK_NE(scratch, rs.rm());
if (NBYTES == 4) {
fmv_x_w(scratch, frd);
} else {
......@@ -1354,20 +1356,28 @@ void MacroAssembler::StoreWordPair(Register rd, const MemOperand& rs) {
Sw(scratch, MemOperand(rs.rm(), rs.offset() + kSystemPointerSize / 2));
}
void TurboAssembler::ULoadFloat(FPURegister fd, const MemOperand& rs) {
UnalignedFLoadHelper<4>(fd, rs);
void TurboAssembler::ULoadFloat(FPURegister fd, const MemOperand& rs,
Register scratch) {
DCHECK_NE(scratch, rs.rm());
UnalignedFLoadHelper<4>(fd, rs, scratch);
}
void TurboAssembler::UStoreFloat(FPURegister fd, const MemOperand& rs) {
UnalignedFStoreHelper<4>(fd, rs);
void TurboAssembler::UStoreFloat(FPURegister fd, const MemOperand& rs,
Register scratch) {
DCHECK_NE(scratch, rs.rm());
UnalignedFStoreHelper<4>(fd, rs, scratch);
}
void TurboAssembler::ULoadDouble(FPURegister fd, const MemOperand& rs) {
UnalignedFLoadHelper<8>(fd, rs);
void TurboAssembler::ULoadDouble(FPURegister fd, const MemOperand& rs,
Register scratch) {
DCHECK_NE(scratch, rs.rm());
UnalignedFLoadHelper<8>(fd, rs, scratch);
}
void TurboAssembler::UStoreDouble(FPURegister fd, const MemOperand& rs) {
UnalignedFStoreHelper<8>(fd, rs);
void TurboAssembler::UStoreDouble(FPURegister fd, const MemOperand& rs,
Register scratch) {
DCHECK_NE(scratch, rs.rm());
UnalignedFStoreHelper<8>(fd, rs, scratch);
}
void TurboAssembler::Lb(Register rd, const MemOperand& rs) {
......@@ -2620,7 +2630,9 @@ void TurboAssembler::Ctz64(Register rd, Register rs) {
}
}
void TurboAssembler::Popcnt32(Register rd, Register rs) {
void TurboAssembler::Popcnt32(Register rd, Register rs, Register scratch) {
DCHECK_NE(scratch, rs);
DCHECK_NE(scratch, rd);
// https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
//
// A generalization of the best bit counting method to integers of
......@@ -2644,7 +2656,6 @@ void TurboAssembler::Popcnt32(Register rd, Register rs) {
uint32_t shift = 24;
UseScratchRegisterScope temps(this);
BlockTrampolinePoolScope block_trampoline_pool(this);
Register scratch = temps.Acquire();
Register scratch2 = temps.Acquire();
Register value = temps.Acquire();
DCHECK((rd != value) && (rs != value));
......@@ -2669,7 +2680,9 @@ void TurboAssembler::Popcnt32(Register rd, Register rs) {
Srl32(rd, rd, shift);
}
void TurboAssembler::Popcnt64(Register rd, Register rs) {
void TurboAssembler::Popcnt64(Register rd, Register rs, Register scratch) {
DCHECK_NE(scratch, rs);
DCHECK_NE(scratch, rd);
// uint64_t B0 = 0x5555555555555555l; // (T)~(T)0/3
// uint64_t B1 = 0x3333333333333333l; // (T)~(T)0/15*3
// uint64_t B2 = 0x0F0F0F0F0F0F0F0Fl; // (T)~(T)0/255*15
......@@ -2679,7 +2692,6 @@ void TurboAssembler::Popcnt64(Register rd, Register rs) {
uint64_t shift = 24;
UseScratchRegisterScope temps(this);
BlockTrampolinePoolScope block_trampoline_pool(this);
Register scratch = temps.Acquire();
Register scratch2 = temps.Acquire();
Register value = temps.Acquire();
DCHECK((rd != value) && (rs != value));
......@@ -3549,9 +3561,9 @@ void MacroAssembler::PushStackHandler() {
// Link the current handler as the next handler.
UseScratchRegisterScope temps(this);
Register handler_address = temps.Acquire();
Register handler = temps.Acquire();
li(handler_address,
ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
Register handler = temps.Acquire();
Ld(handler, MemOperand(handler_address));
push(handler);
......@@ -3813,18 +3825,19 @@ void MacroAssembler::InvokeFunctionWithNewTarget(
// Contract with called JS functions requires that function is passed in a1.
DCHECK_EQ(function, a1);
Register expected_parameter_count = a2;
UseScratchRegisterScope temps(this);
Register temp_reg = temps.Acquire();
LoadTaggedPointerField(
temp_reg,
FieldMemOperand(function, JSFunction::kSharedFunctionInfoOffset));
LoadTaggedPointerField(cp,
FieldMemOperand(function, JSFunction::kContextOffset));
// The argument count is stored as uint16_t
Lhu(expected_parameter_count,
FieldMemOperand(temp_reg,
SharedFunctionInfo::kFormalParameterCountOffset));
{
UseScratchRegisterScope temps(this);
Register temp_reg = temps.Acquire();
LoadTaggedPointerField(
temp_reg,
FieldMemOperand(function, JSFunction::kSharedFunctionInfoOffset));
LoadTaggedPointerField(
cp, FieldMemOperand(function, JSFunction::kContextOffset));
// The argument count is stored as uint16_t
Lhu(expected_parameter_count,
FieldMemOperand(temp_reg,
SharedFunctionInfo::kFormalParameterCountOffset));
}
InvokeFunctionCode(function, new_target, expected_parameter_count,
actual_parameter_count, type);
}
......
......@@ -571,8 +571,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Clz64(Register rd, Register rs);
void Ctz32(Register rd, Register rs);
void Ctz64(Register rd, Register rs);
void Popcnt32(Register rd, Register rs);
void Popcnt64(Register rd, Register rs);
void Popcnt32(Register rd, Register rs, Register scratch);
void Popcnt64(Register rd, Register rs, Register scratch);
// Bit field starts at bit pos and extending for size bits is extracted from
// rs and stored zero/sign-extended and right-justified in rt
......@@ -591,7 +591,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Neg_d(FPURegister fd, FPURegister fs);
// Change endianness
void ByteSwap(Register dest, Register src, int operand_size);
void ByteSwap(Register dest, Register src, int operand_size,
Register scratch);
void Clear_if_nan_d(Register rd, FPURegister fs);
void Clear_if_nan_s(Register rd, FPURegister fs);
......@@ -606,9 +607,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Register scratch_other = no_reg);
template <int NBYTES>
void UnalignedFLoadHelper(FPURegister frd, const MemOperand& rs);
void UnalignedFLoadHelper(FPURegister frd, const MemOperand& rs,
Register scratch);
template <int NBYTES>
void UnalignedFStoreHelper(FPURegister frd, const MemOperand& rs);
void UnalignedFStoreHelper(FPURegister frd, const MemOperand& rs,
Register scratch);
template <typename Reg_T, typename Func>
void AlignedLoadHelper(Reg_T target, const MemOperand& rs, Func generator);
......@@ -632,11 +635,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Uld(Register rd, const MemOperand& rs);
void Usd(Register rd, const MemOperand& rs);
void ULoadFloat(FPURegister fd, const MemOperand& rs);
void UStoreFloat(FPURegister fd, const MemOperand& rs);
void ULoadFloat(FPURegister fd, const MemOperand& rs, Register scratch);
void UStoreFloat(FPURegister fd, const MemOperand& rs, Register scratch);
void ULoadDouble(FPURegister fd, const MemOperand& rs);
void UStoreDouble(FPURegister fd, const MemOperand& rs);
void ULoadDouble(FPURegister fd, const MemOperand& rs, Register scratch);
void UStoreDouble(FPURegister fd, const MemOperand& rs, Register scratch);
void Lb(Register rd, const MemOperand& rs);
void Lbu(Register rd, const MemOperand& rs);
......
......@@ -1054,12 +1054,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kRiscvPopcnt32: {
Register src = i.InputRegister(0);
Register dst = i.OutputRegister();
__ Popcnt32(dst, src);
__ Popcnt32(dst, src, kScratchReg);
} break;
case kRiscvPopcnt64: {
Register src = i.InputRegister(0);
Register dst = i.OutputRegister();
__ Popcnt64(dst, src);
__ Popcnt64(dst, src, kScratchReg);
} break;
case kRiscvShl32:
if (instr->InputAt(1)->IsRegister()) {
......@@ -1573,7 +1573,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kRiscvULoadFloat: {
__ ULoadFloat(i.OutputSingleRegister(), i.MemoryOperand());
__ ULoadFloat(i.OutputSingleRegister(), i.MemoryOperand(), kScratchReg);
break;
}
case kRiscvStoreFloat: {
......@@ -1593,14 +1593,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (ft == kDoubleRegZero && !__ IsSingleZeroRegSet()) {
__ LoadFPRImmediate(kDoubleRegZero, 0.0f);
}
__ UStoreFloat(ft, operand);
__ UStoreFloat(ft, operand, kScratchReg);
break;
}
case kRiscvLoadDouble:
__ LoadDouble(i.OutputDoubleRegister(), i.MemoryOperand());
break;
case kRiscvULoadDouble:
__ ULoadDouble(i.OutputDoubleRegister(), i.MemoryOperand());
__ ULoadDouble(i.OutputDoubleRegister(), i.MemoryOperand(), kScratchReg);
break;
case kRiscvStoreDouble: {
FPURegister ft = i.InputOrZeroDoubleRegister(2);
......@@ -1615,7 +1615,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (ft == kDoubleRegZero && !__ IsDoubleZeroRegSet()) {
__ LoadFPRImmediate(kDoubleRegZero, 0.0);
}
__ UStoreDouble(ft, i.MemoryOperand());
__ UStoreDouble(ft, i.MemoryOperand(), kScratchReg);
break;
}
case kRiscvSync: {
......@@ -1671,11 +1671,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kRiscvByteSwap64: {
__ ByteSwap(i.OutputRegister(0), i.InputRegister(0), 8);
__ ByteSwap(i.OutputRegister(0), i.InputRegister(0), 8, kScratchReg);
break;
}
case kRiscvByteSwap32: {
__ ByteSwap(i.OutputRegister(0), i.InputRegister(0), 4);
__ ByteSwap(i.OutputRegister(0), i.InputRegister(0), 4, kScratchReg);
break;
}
case kAtomicLoadInt8:
......
......@@ -79,16 +79,16 @@ inline MemOperand GetMemOp(LiftoffAssembler* assm, Register addr,
if (is_uint31(offset_imm)) {
int32_t offset_imm32 = static_cast<int32_t>(offset_imm);
if (offset == no_reg) return MemOperand(addr, offset_imm32);
assm->Add64(kScratchReg, addr, offset);
return MemOperand(kScratchReg, offset_imm32);
assm->Add64(kScratchReg2, addr, offset);
return MemOperand(kScratchReg2, offset_imm32);
}
// Offset immediate does not fit in 31 bits.
assm->li(kScratchReg, offset_imm);
assm->Add64(kScratchReg, kScratchReg, addr);
assm->li(kScratchReg2, offset_imm);
assm->Add64(kScratchReg2, kScratchReg2, addr);
if (offset != no_reg) {
assm->Add64(kScratchReg, kScratchReg, offset);
assm->Add64(kScratchReg2, kScratchReg2, offset);
}
return MemOperand(kScratchReg, 0);
return MemOperand(kScratchReg2, 0);
}
inline void Load(LiftoffAssembler* assm, LiftoffRegister dst, MemOperand src,
......@@ -128,10 +128,10 @@ inline void Store(LiftoffAssembler* assm, Register base, int32_t offset,
assm->Usd(src.gp(), dst);
break;
case kF32:
assm->UStoreFloat(src.fp(), dst);
assm->UStoreFloat(src.fp(), dst, kScratchReg);
break;
case kF64:
assm->UStoreDouble(src.fp(), dst);
assm->UStoreDouble(src.fp(), dst, kScratchReg);
break;
default:
UNREACHABLE();
......@@ -552,10 +552,10 @@ void LiftoffAssembler::Load(LiftoffRegister dst, Register src_addr,
TurboAssembler::Uld(dst.gp(), src_op);
break;
case LoadType::kF32Load:
TurboAssembler::ULoadFloat(dst.fp(), src_op);
TurboAssembler::ULoadFloat(dst.fp(), src_op, kScratchReg);
break;
case LoadType::kF64Load:
TurboAssembler::ULoadDouble(dst.fp(), src_op);
TurboAssembler::ULoadDouble(dst.fp(), src_op, kScratchReg);
break;
default:
UNREACHABLE();
......@@ -607,10 +607,10 @@ void LiftoffAssembler::Store(Register dst_addr, Register offset_reg,
TurboAssembler::Usd(src.gp(), dst_op);
break;
case StoreType::kF32Store:
TurboAssembler::UStoreFloat(src.fp(), dst_op);
TurboAssembler::UStoreFloat(src.fp(), dst_op, kScratchReg);
break;
case StoreType::kF64Store:
TurboAssembler::UStoreDouble(src.fp(), dst_op);
TurboAssembler::UStoreDouble(src.fp(), dst_op, kScratchReg);
break;
default:
UNREACHABLE();
......@@ -1072,7 +1072,7 @@ void LiftoffAssembler::emit_i64_ctz(LiftoffRegister dst, LiftoffRegister src) {
bool LiftoffAssembler::emit_i64_popcnt(LiftoffRegister dst,
LiftoffRegister src) {
TurboAssembler::Popcnt64(dst.gp(), src.gp());
TurboAssembler::Popcnt64(dst.gp(), src.gp(), kScratchReg);
return true;
}
......@@ -1154,7 +1154,7 @@ void LiftoffAssembler::emit_i32_ctz(Register dst, Register src) {
}
bool LiftoffAssembler::emit_i32_popcnt(Register dst, Register src) {
TurboAssembler::Popcnt32(dst, src);
TurboAssembler::Popcnt32(dst, src, kScratchReg);
return true;
}
......
......@@ -937,8 +937,8 @@ TEST(Uld) {
}
auto fn = [](MacroAssembler& masm, int32_t in_offset, int32_t out_offset) {
__ ULoadFloat(fa0, MemOperand(a0, in_offset));
__ UStoreFloat(fa0, MemOperand(a0, out_offset));
__ ULoadFloat(fa0, MemOperand(a0, in_offset), t0);
__ UStoreFloat(fa0, MemOperand(a0, out_offset), t0);
};
TEST(ULoadFloat) {
......@@ -971,8 +971,8 @@ TEST(ULoadDouble) {
char* buffer_middle = memory_buffer + (kBufferSize / 2);
auto fn = [](MacroAssembler& masm, int32_t in_offset, int32_t out_offset) {
__ ULoadDouble(fa0, MemOperand(a0, in_offset));
__ UStoreDouble(fa0, MemOperand(a0, out_offset));
__ ULoadDouble(fa0, MemOperand(a0, in_offset), t0);
__ UStoreDouble(fa0, MemOperand(a0, out_offset), t0);
};
FOR_FLOAT64_INPUTS(i) {
......@@ -1376,9 +1376,9 @@ TEST(Ctz64) {
TEST(ByteSwap) {
CcTest::InitializeVM();
auto fn0 = [](MacroAssembler& masm) { __ ByteSwap(a0, a0, 4); };
auto fn0 = [](MacroAssembler& masm) { __ ByteSwap(a0, a0, 4, t0); };
CHECK_EQ((int32_t)0x89ab'cdef, GenAndRunTest<int32_t>(0xefcd'ab89, fn0));
auto fn1 = [](MacroAssembler& masm) { __ ByteSwap(a0, a0, 8); };
auto fn1 = [](MacroAssembler& masm) { __ ByteSwap(a0, a0, 8, t0); };
CHECK_EQ((int64_t)0x0123'4567'89ab'cdef,
GenAndRunTest<int64_t>(0xefcd'ab89'6745'2301, fn1));
}
......@@ -1411,17 +1411,17 @@ TEST(Dpopcnt) {
for (int i = 0; i < 7; i++) {
// Load constant.
__ li(a3, Operand(in[i]));
__ Popcnt64(a5, a3);
__ Popcnt64(a5, a3, t0);
__ Sd(a5, MemOperand(a4));
__ Add64(a4, a4, Operand(kSystemPointerSize));
}
__ li(a3, Operand(in[7]));
__ Popcnt64(a5, a3);
__ Popcnt64(a5, a3, t0);
__ Sd(a5, MemOperand(a4));
__ Add64(a4, a4, Operand(kSystemPointerSize));
__ li(a3, Operand(in[8]));
__ Popcnt64(a5, a3);
__ Popcnt64(a5, a3, t0);
__ Sd(a5, MemOperand(a4));
__ Add64(a4, a4, Operand(kSystemPointerSize));
};
......@@ -1462,18 +1462,18 @@ TEST(Popcnt) {
for (int i = 0; i < 6; i++) {
// Load constant.
__ li(a3, Operand(in[i]));
__ Popcnt32(a5, a3);
__ Popcnt32(a5, a3, t0);
__ Sd(a5, MemOperand(a4));
__ Add64(a4, a4, Operand(kSystemPointerSize));
}
__ li(a3, Operand(in[6]));
__ Popcnt64(a5, a3);
__ Popcnt64(a5, a3, t0);
__ Sd(a5, MemOperand(a4));
__ Add64(a4, a4, Operand(kSystemPointerSize));
__ li(a3, Operand(in[7]));
__ Popcnt64(a5, a3);
__ Popcnt64(a5, a3, t0);
__ Sd(a5, MemOperand(a4));
__ Add64(a4, a4, Operand(kSystemPointerSize));
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment