Commit 97f0c9f4 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Optimize codegen when shift is constant

Define a macro in code-generator-ia32 to help identify cases when the
shift value is an immediate/constant. In those cases we can directly
emit the shifts without any masking, since the instruction selector
would have modulo-ed the shift value. We also don't need any temporaries
in this case.

Bug: v8:10115
Change-Id: I3cdef493fd7c365c733a85ad31b86e2d258b6429
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2037649Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66162}
parent ff4e5da5
......@@ -350,7 +350,14 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Pavgw, pavgw)
#undef AVX_PACKED_OP3
AVX_PACKED_OP3_WITH_TYPE(Psllw, psllw, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Pslld, pslld, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psllq, psllq, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrlw, psrlw, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrld, psrld, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrlq, psrlq, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psraw, psraw, XMMRegister, uint8_t)
AVX_PACKED_OP3_WITH_TYPE(Psrad, psrad, XMMRegister, uint8_t)
#undef AVX_PACKED_OP3_WITH_TYPE
// Non-SSE2 instructions.
......
......@@ -493,6 +493,22 @@ class OutOfLineRecordWrite final : public OutOfLineCode {
__ cmov(zero, dst, tmp); \
} while (false)
#define ASSEMBLE_SIMD_SHIFT(opcode, width) \
do { \
XMMRegister dst = i.OutputSimd128Register(); \
DCHECK_EQ(dst, i.InputSimd128Register(0)); \
if (HasImmediateInput(instr, 1)) { \
__ opcode(dst, dst, static_cast<byte>(i.InputInt##width(1))); \
} else { \
XMMRegister tmp = i.TempSimd128Register(0); \
Register shift = i.InputRegister(1); \
constexpr int mask = (1 << width) - 1; \
__ and_(shift, Immediate(mask)); \
__ Movd(tmp, shift); \
__ opcode(dst, dst, tmp); \
} \
} while (false)
void CodeGenerator::AssembleDeconstructFrame() {
__ mov(esp, ebp);
__ pop(ebp);
......@@ -2021,12 +2037,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32I64x2Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 64.
__ and_(shift, Immediate(63));
__ Movd(tmp, shift);
__ Psllq(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
ASSEMBLE_SIMD_SHIFT(Psllq, 6);
break;
}
case kIA32I64x2ShrS: {
......@@ -2086,12 +2097,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32I64x2ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 64.
__ and_(shift, Immediate(63));
__ Movd(tmp, shift);
__ Psrlq(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
ASSEMBLE_SIMD_SHIFT(Psrlq, 6);
break;
}
case kSSEF32x4Splat: {
......@@ -2488,21 +2494,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32I32x4Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ Movd(tmp, shift);
__ Pslld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
ASSEMBLE_SIMD_SHIFT(Pslld, 5);
break;
}
case kIA32I32x4ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ Movd(tmp, shift);
__ Psrad(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
ASSEMBLE_SIMD_SHIFT(Psrad, 5);
break;
}
case kSSEI32x4Add: {
......@@ -2696,12 +2692,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32I32x4ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 32.
__ and_(shift, 31);
__ movd(tmp, shift);
__ Psrld(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
ASSEMBLE_SIMD_SHIFT(Psrld, 5);
break;
}
case kSSEI32x4MinU: {
......@@ -2819,21 +2810,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32I16x8Shl: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ Movd(tmp, shift);
__ Psllw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
ASSEMBLE_SIMD_SHIFT(Psllw, 4);
break;
}
case kIA32I16x8ShrS: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ Movd(tmp, shift);
__ Psraw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
ASSEMBLE_SIMD_SHIFT(Psraw, 4);
break;
}
case kSSEI16x8SConvertI32x4: {
......@@ -3001,12 +2982,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kIA32I16x8ShrU: {
XMMRegister tmp = i.TempSimd128Register(0);
Register shift = i.InputRegister(1);
// Take shift value modulo 16.
__ and_(shift, 15);
__ Movd(tmp, shift);
__ Psrlw(i.OutputSimd128Register(), i.InputSimd128Register(0), tmp);
ASSEMBLE_SIMD_SHIFT(Psrlw, 4);
break;
}
case kSSEI16x8UConvertI32x4: {
......
......@@ -307,11 +307,17 @@ void VisitRRISimd(InstructionSelector* selector, Node* node,
void VisitRROSimdShift(InstructionSelector* selector, Node* node,
ArchOpcode opcode) {
IA32OperandGenerator g(selector);
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempSimd128Register()};
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
if (g.CanBeImmediate(node->InputAt(1))) {
selector->Emit(opcode, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)),
g.UseImmediate(node->InputAt(1)));
} else {
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempSimd128Register()};
selector->Emit(opcode, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
}
}
void VisitRROI8x16SimdRightShift(InstructionSelector* selector, Node* node,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment