Commit ebdc9882 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[wasm-simd] Improve i8x16 shift ins-sel and temps usage

We no longer require dst == src (output = input[0]) in all cases, only
when AVX is not supported. This can help remove an extra move when AVX
is supported. Also in many cases (when input[0] is an immediate), we
require less temporary registers.

Bug: v8:11589
Change-Id: I0d272df12de54f55b4c7a0a330c38ccaca82e927
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3092553Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#76286}
parent c4e4868e
...@@ -3111,30 +3111,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3111,30 +3111,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kIA32I8x16Shl: { case kIA32I8x16Shl: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector. XMMRegister src = i.InputSimd128Register(0);
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
Register tmp = i.TempRegister(0); Register tmp = i.TempRegister(0);
XMMRegister tmp_simd = i.TempSimd128Register(1);
if (HasImmediateInput(instr, 1)) { if (HasImmediateInput(instr, 1)) {
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp, __ I8x16Shl(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
kScratchDoubleReg);
} else { } else {
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp, XMMRegister tmp_simd = i.TempSimd128Register(1);
kScratchDoubleReg, tmp_simd); __ I8x16Shl(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
tmp_simd);
} }
break; break;
} }
case kIA32I8x16ShrS: { case kIA32I8x16ShrS: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector. XMMRegister src = i.InputSimd128Register(0);
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
if (HasImmediateInput(instr, 1)) { if (HasImmediateInput(instr, 1)) {
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputInt3(1), __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
kScratchDoubleReg);
} else { } else {
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputRegister(1), __ I8x16ShrS(dst, src, i.InputRegister(1), i.TempRegister(0),
i.TempRegister(0), kScratchDoubleReg, kScratchDoubleReg, i.TempSimd128Register(1));
i.TempSimd128Register(1));
} }
break; break;
} }
...@@ -3237,16 +3236,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3237,16 +3236,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kIA32I8x16ShrU: { case kIA32I8x16ShrU: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector. XMMRegister src = i.InputSimd128Register(0);
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
Register tmp = i.ToRegister(instr->TempAt(0)); Register tmp = i.TempRegister(0);
if (HasImmediateInput(instr, 1)) { if (HasImmediateInput(instr, 1)) {
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp, __ I8x16ShrU(dst, src, i.InputInt3(1), tmp, kScratchDoubleReg);
kScratchDoubleReg);
} else { } else {
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp, __ I8x16ShrU(dst, src, i.InputRegister(1), tmp, kScratchDoubleReg,
kScratchDoubleReg, i.TempSimd128Register(1)); i.TempSimd128Register(1));
} }
break; break;
......
...@@ -389,14 +389,28 @@ void VisitRROSimdShift(InstructionSelector* selector, Node* node, ...@@ -389,14 +389,28 @@ void VisitRROSimdShift(InstructionSelector* selector, Node* node,
} }
} }
void VisitRROI8x16SimdShift(InstructionSelector* selector, Node* node, void VisitI8x16Shift(InstructionSelector* selector, Node* node,
ArchOpcode opcode) { ArchOpcode opcode) {
IA32OperandGenerator g(selector); IA32OperandGenerator g(selector);
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0)); InstructionOperand output = CpuFeatures::IsSupported(AVX)
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1)); ? g.UseRegister(node)
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; : g.DefineSameAsFirst(node);
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps); if (g.CanBeImmediate(node->InputAt(1))) {
if (opcode == kIA32I8x16ShrS) {
selector->Emit(opcode, output, g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)));
} else {
InstructionOperand temps[] = {g.TempRegister()};
selector->Emit(opcode, output, g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
}
} else {
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
selector->Emit(opcode, output, operand0, operand1, arraysize(temps), temps);
}
} }
} // namespace } // namespace
...@@ -2651,38 +2665,15 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) { ...@@ -2651,38 +2665,15 @@ void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
} }
void InstructionSelector::VisitI8x16Shl(Node* node) { void InstructionSelector::VisitI8x16Shl(Node* node) {
IA32OperandGenerator g(this); VisitI8x16Shift(this, node, kIA32I8x16Shl);
if (g.CanBeImmediate(node->InputAt(1))) {
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
this->Emit(kIA32I8x16Shl, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
} else {
VisitRROI8x16SimdShift(this, node, kIA32I8x16Shl);
}
} }
void InstructionSelector::VisitI8x16ShrS(Node* node) { void InstructionSelector::VisitI8x16ShrS(Node* node) {
IA32OperandGenerator g(this); VisitI8x16Shift(this, node, kIA32I8x16ShrS);
if (g.CanBeImmediate(node->InputAt(1))) {
this->Emit(kIA32I8x16ShrS, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)));
} else {
VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrS);
}
} }
void InstructionSelector::VisitI8x16ShrU(Node* node) { void InstructionSelector::VisitI8x16ShrU(Node* node) {
IA32OperandGenerator g(this); VisitI8x16Shift(this, node, kIA32I8x16ShrU);
if (g.CanBeImmediate(node->InputAt(1))) {
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
this->Emit(kIA32I8x16ShrU, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)), arraysize(temps), temps);
} else {
VisitRROI8x16SimdShift(this, node, kIA32I8x16ShrU);
}
} }
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
......
...@@ -3492,32 +3492,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3492,32 +3492,26 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64I8x16Shl: { case kX64I8x16Shl: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector. XMMRegister src = i.InputSimd128Register(0);
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
// Temp registers for shift mask and additional moves to XMM registers.
Register tmp = i.TempRegister(0);
XMMRegister tmp_simd = i.TempSimd128Register(1);
if (HasImmediateInput(instr, 1)) { if (HasImmediateInput(instr, 1)) {
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp, __ I8x16Shl(dst, src, i.InputInt3(1), kScratchRegister,
kScratchDoubleReg); kScratchDoubleReg);
} else { } else {
__ I8x16Shl(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp, __ I8x16Shl(dst, src, i.InputRegister(1), kScratchRegister,
kScratchDoubleReg, tmp_simd); kScratchDoubleReg, i.TempSimd128Register(0));
} }
break; break;
} }
case kX64I8x16ShrS: { case kX64I8x16ShrS: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector. XMMRegister src = i.InputSimd128Register(0);
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
if (HasImmediateInput(instr, 1)) { if (HasImmediateInput(instr, 1)) {
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputInt3(1), __ I8x16ShrS(dst, src, i.InputInt3(1), kScratchDoubleReg);
kScratchDoubleReg);
} else { } else {
// TODO(zhin): use kScratchRegister instead of TempRegister. __ I8x16ShrS(dst, src, i.InputRegister(1), kScratchRegister,
__ I8x16ShrS(dst, i.InputSimd128Register(0), i.InputRegister(1), kScratchDoubleReg, i.TempSimd128Register(0));
i.TempRegister(0), kScratchDoubleReg,
i.TempSimd128Register(1));
} }
break; break;
} }
...@@ -3573,16 +3567,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3573,16 +3567,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kX64I8x16ShrU: { case kX64I8x16ShrU: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
// TODO(zhin): remove this restriction from instruction-selector. XMMRegister src = i.InputSimd128Register(0);
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_IMPLIES(!CpuFeatures::IsSupported(AVX), dst == src);
// TODO(zhin): use kScratchRegister instead of tmp.
Register tmp = i.TempRegister(0);
if (HasImmediateInput(instr, 1)) { if (HasImmediateInput(instr, 1)) {
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputInt3(1), tmp, __ I8x16ShrU(dst, src, i.InputInt3(1), kScratchRegister,
kScratchDoubleReg); kScratchDoubleReg);
} else { } else {
__ I8x16ShrU(dst, i.InputSimd128Register(0), i.InputRegister(1), tmp, __ I8x16ShrU(dst, src, i.InputRegister(1), kScratchRegister,
kScratchDoubleReg, i.TempSimd128Register(1)); kScratchDoubleReg, i.TempSimd128Register(0));
} }
break; break;
} }
......
...@@ -3047,6 +3047,7 @@ VISIT_ATOMIC_BINOP(Xor) ...@@ -3047,6 +3047,7 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_NARROW_SHIFT_OPCODES(V) \ #define SIMD_NARROW_SHIFT_OPCODES(V) \
V(I8x16Shl) \ V(I8x16Shl) \
V(I8x16ShrS) \
V(I8x16ShrU) V(I8x16ShrU)
void InstructionSelector::VisitS128Const(Node* node) { void InstructionSelector::VisitS128Const(Node* node) {
...@@ -3176,19 +3177,19 @@ SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT) ...@@ -3176,19 +3177,19 @@ SIMD_SHIFT_OPCODES(VISIT_SIMD_SHIFT)
#undef VISIT_SIMD_SHIFT #undef VISIT_SIMD_SHIFT
#undef SIMD_SHIFT_OPCODES #undef SIMD_SHIFT_OPCODES
#define VISIT_SIMD_NARROW_SHIFT(Opcode) \ #define VISIT_SIMD_NARROW_SHIFT(Opcode) \
void InstructionSelector::Visit##Opcode(Node* node) { \ void InstructionSelector::Visit##Opcode(Node* node) { \
X64OperandGenerator g(this); \ X64OperandGenerator g(this); \
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()}; \ InstructionOperand output = \
if (g.CanBeImmediate(node->InputAt(1))) { \ IsSupported(AVX) ? g.UseRegister(node) : g.DefineSameAsFirst(node); \
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ if (g.CanBeImmediate(node->InputAt(1))) { \
g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)), \ Emit(kX64##Opcode, output, g.UseRegister(node->InputAt(0)), \
arraysize(temps), temps); \ g.UseImmediate(node->InputAt(1))); \
} else { \ } else { \
Emit(kX64##Opcode, g.DefineSameAsFirst(node), \ InstructionOperand temps[] = {g.TempSimd128Register()}; \
g.UseUniqueRegister(node->InputAt(0)), \ Emit(kX64##Opcode, output, g.UseUniqueRegister(node->InputAt(0)), \
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \ g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps); \
} \ } \
} }
SIMD_NARROW_SHIFT_OPCODES(VISIT_SIMD_NARROW_SHIFT) SIMD_NARROW_SHIFT_OPCODES(VISIT_SIMD_NARROW_SHIFT)
#undef VISIT_SIMD_NARROW_SHIFT #undef VISIT_SIMD_NARROW_SHIFT
...@@ -3328,19 +3329,6 @@ void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) { ...@@ -3328,19 +3329,6 @@ void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
g.UseRegister(node->InputAt(0)), arraysize(temps), temps); g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
} }
void InstructionSelector::VisitI8x16ShrS(Node* node) {
X64OperandGenerator g(this);
if (g.CanBeImmediate(node->InputAt(1))) {
Emit(kX64I8x16ShrS, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), g.UseImmediate(node->InputAt(1)));
} else {
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
Emit(kX64I8x16ShrS, g.DefineSameAsFirst(node),
g.UseUniqueRegister(node->InputAt(0)),
g.UseUniqueRegister(node->InputAt(1)), arraysize(temps), temps);
}
}
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) { void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
UNREACHABLE(); UNREACHABLE();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment