Commit 3b34eeaf authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm] Remove some usages of TempSimd128Register

We can use UseScratchRegisterScope instead of requiring a
TempSimd128Register in the instruction-selector. This reduces register
pressure a little bit (when combined with unique register constraints).

Drive-by cleanup of some variable names in code-generator, s/tmp2/tmp/
when there is only 1 tmp.

Bug: v8:11384
Change-Id: I00a365624cbabeaeeaf78d1d08f0eb284c7e44ac
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2705523Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72851}
parent 0fecb303
...@@ -515,8 +515,9 @@ void ComputePoisonedAddressForLoad(CodeGenerator* codegen, ...@@ -515,8 +515,9 @@ void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
if (instr->InputAt(1)->IsImmediate()) { \ if (instr->InputAt(1)->IsImmediate()) { \
__ asm_imm(dt, dst, src, i.InputInt##width(1)); \ __ asm_imm(dt, dst, src, i.InputInt##width(1)); \
} else { \ } else { \
QwNeonRegister tmp = i.TempSimd128Register(0); \ UseScratchRegisterScope temps(tasm()); \
Register shift = i.TempRegister(1); \ Simd128Register tmp = temps.AcquireQ(); \
Register shift = temps.Acquire(); \
constexpr int mask = (1 << width) - 1; \ constexpr int mask = (1 << width) - 1; \
__ and_(shift, i.InputRegister(1), Operand(mask)); \ __ and_(shift, i.InputRegister(1), Operand(mask)); \
__ vdup(sz, tmp, shift); \ __ vdup(sz, tmp, shift); \
...@@ -534,8 +535,9 @@ void ComputePoisonedAddressForLoad(CodeGenerator* codegen, ...@@ -534,8 +535,9 @@ void ComputePoisonedAddressForLoad(CodeGenerator* codegen,
if (instr->InputAt(1)->IsImmediate()) { \ if (instr->InputAt(1)->IsImmediate()) { \
__ asm_imm(dt, dst, src, i.InputInt##width(1)); \ __ asm_imm(dt, dst, src, i.InputInt##width(1)); \
} else { \ } else { \
QwNeonRegister tmp = i.TempSimd128Register(0); \ UseScratchRegisterScope temps(tasm()); \
Register shift = i.TempRegister(1); \ Simd128Register tmp = temps.AcquireQ(); \
Register shift = temps.Acquire(); \
constexpr int mask = (1 << width) - 1; \ constexpr int mask = (1 << width) - 1; \
__ and_(shift, i.InputRegister(1), Operand(mask)); \ __ and_(shift, i.InputRegister(1), Operand(mask)); \
__ vdup(sz, tmp, shift); \ __ vdup(sz, tmp, shift); \
...@@ -2111,11 +2113,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2111,11 +2113,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kArmI64x2Mul: { case kArmI64x2Mul: {
UseScratchRegisterScope temps(tasm());
QwNeonRegister dst = i.OutputSimd128Register(); QwNeonRegister dst = i.OutputSimd128Register();
QwNeonRegister left = i.InputSimd128Register(0); QwNeonRegister left = i.InputSimd128Register(0);
QwNeonRegister right = i.InputSimd128Register(1); QwNeonRegister right = i.InputSimd128Register(1);
QwNeonRegister tmp1 = i.TempSimd128Register(0); QwNeonRegister tmp1 = i.TempSimd128Register(0);
QwNeonRegister tmp2 = i.TempSimd128Register(1); QwNeonRegister tmp2 = temps.AcquireQ();
// This algorithm uses vector operations to perform 64-bit integer // This algorithm uses vector operations to perform 64-bit integer
// multiplication by splitting it into a high and low 32-bit integers. // multiplication by splitting it into a high and low 32-bit integers.
...@@ -2543,19 +2546,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2543,19 +2546,20 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kArmI32x4BitMask: { case kArmI32x4BitMask: {
Register dst = i.OutputRegister(); Register dst = i.OutputRegister();
UseScratchRegisterScope temps(tasm());
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
Simd128Register tmp2 = i.TempSimd128Register(0); Simd128Register tmp = temps.AcquireQ();
Simd128Register mask = i.TempSimd128Register(1); Simd128Register mask = i.TempSimd128Register(0);
__ vshr(NeonS32, tmp2, src, 31); __ vshr(NeonS32, tmp, src, 31);
// Set i-th bit of each lane i. When AND with tmp, the lanes that // Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0. // are signed will have i-th bit set, unsigned will be 0.
__ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001})); __ vmov(mask.low(), Double(uint64_t{0x0000'0002'0000'0001}));
__ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004})); __ vmov(mask.high(), Double(uint64_t{0x0000'0008'0000'0004}));
__ vand(tmp2, mask, tmp2); __ vand(tmp, mask, tmp);
__ vpadd(Neon32, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon32, tmp.low(), tmp.low(), tmp.high());
__ vpadd(Neon32, tmp2.low(), tmp2.low(), kDoubleRegZero); __ vpadd(Neon32, tmp.low(), tmp.low(), kDoubleRegZero);
__ VmovLow(dst, tmp2.low()); __ VmovLow(dst, tmp.low());
break; break;
} }
case kArmI32x4DotI16x8S: { case kArmI32x4DotI16x8S: {
...@@ -2748,21 +2752,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2748,21 +2752,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kArmI16x8BitMask: { case kArmI16x8BitMask: {
UseScratchRegisterScope temps(tasm());
Register dst = i.OutputRegister(); Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
Simd128Register tmp2 = i.TempSimd128Register(0); Simd128Register tmp = temps.AcquireQ();
Simd128Register mask = i.TempSimd128Register(1); Simd128Register mask = i.TempSimd128Register(0);
__ vshr(NeonS16, tmp2, src, 15); __ vshr(NeonS16, tmp, src, 15);
// Set i-th bit of each lane i. When AND with tmp, the lanes that // Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0. // are signed will have i-th bit set, unsigned will be 0.
__ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001})); __ vmov(mask.low(), Double(uint64_t{0x0008'0004'0002'0001}));
__ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010})); __ vmov(mask.high(), Double(uint64_t{0x0080'0040'0020'0010}));
__ vand(tmp2, mask, tmp2); __ vand(tmp, mask, tmp);
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low()); __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low()); __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
__ vmov(NeonU16, dst, tmp2.low(), 0); __ vmov(NeonU16, dst, tmp.low(), 0);
break; break;
} }
case kArmI16x8Q15MulRSatS: { case kArmI16x8Q15MulRSatS: {
...@@ -2907,23 +2912,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2907,23 +2912,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kArmI8x16BitMask: { case kArmI8x16BitMask: {
UseScratchRegisterScope temps(tasm());
Register dst = i.OutputRegister(); Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0); Simd128Register src = i.InputSimd128Register(0);
Simd128Register tmp2 = i.TempSimd128Register(0); Simd128Register tmp = temps.AcquireQ();
Simd128Register mask = i.TempSimd128Register(1); Simd128Register mask = i.TempSimd128Register(0);
__ vshr(NeonS8, tmp2, src, 7); __ vshr(NeonS8, tmp, src, 7);
// Set i-th bit of each lane i. When AND with tmp, the lanes that // Set i-th bit of each lane i. When AND with tmp, the lanes that
// are signed will have i-th bit set, unsigned will be 0. // are signed will have i-th bit set, unsigned will be 0.
__ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201})); __ vmov(mask.low(), Double(uint64_t{0x8040'2010'0804'0201}));
__ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201})); __ vmov(mask.high(), Double(uint64_t{0x8040'2010'0804'0201}));
__ vand(tmp2, mask, tmp2); __ vand(tmp, mask, tmp);
__ vext(mask, tmp2, tmp2, 8); __ vext(mask, tmp, tmp, 8);
__ vzip(Neon8, mask, tmp2); __ vzip(Neon8, mask, tmp);
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.high()); __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.high());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low()); __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
__ vpadd(Neon16, tmp2.low(), tmp2.low(), tmp2.low()); __ vpadd(Neon16, tmp.low(), tmp.low(), tmp.low());
__ vmov(NeonU16, dst, tmp2.low(), 0); __ vmov(NeonU16, dst, tmp.low(), 0);
break; break;
} }
case kArmSignSelect: { case kArmSignSelect: {
......
...@@ -108,10 +108,7 @@ void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode, ...@@ -108,10 +108,7 @@ void VisitSimdShiftRRR(InstructionSelector* selector, ArchOpcode opcode,
g.UseImmediate(node->InputAt(1))); g.UseImmediate(node->InputAt(1)));
} }
} else { } else {
InstructionOperand temps[] = {g.TempSimd128Register(), g.TempRegister()}; VisitRRR(selector, opcode, node);
selector->Emit(opcode, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseRegister(node->InputAt(1)), arraysize(temps), temps);
} }
} }
...@@ -2816,8 +2813,7 @@ void InstructionSelector::VisitI64x2Neg(Node* node) { ...@@ -2816,8 +2813,7 @@ void InstructionSelector::VisitI64x2Neg(Node* node) {
void InstructionSelector::VisitI64x2Mul(Node* node) { void InstructionSelector::VisitI64x2Mul(Node* node) {
ArmOperandGenerator g(this); ArmOperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register(), InstructionOperand temps[] = {g.TempSimd128Register()};
g.TempSimd128Register()};
Emit(kArmI64x2Mul, g.DefineAsRegister(node), Emit(kArmI64x2Mul, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)), g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
...@@ -3025,8 +3021,7 @@ namespace { ...@@ -3025,8 +3021,7 @@ namespace {
template <ArchOpcode opcode> template <ArchOpcode opcode>
void VisitBitMask(InstructionSelector* selector, Node* node) { void VisitBitMask(InstructionSelector* selector, Node* node) {
ArmOperandGenerator g(selector); ArmOperandGenerator g(selector);
InstructionOperand temps[] = {g.TempSimd128Register(), InstructionOperand temps[] = {g.TempSimd128Register()};
g.TempSimd128Register()};
selector->Emit(opcode, g.DefineAsRegister(node), selector->Emit(opcode, g.DefineAsRegister(node),
g.UseRegister(node->InputAt(0)), arraysize(temps), temps); g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment