Commit 97014f60 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Unify SSE and AVX impl for i8x16.shl

Delegate to macro assembler functions to pick between SSE or AVX. This
simplifies the optimization of constant shifts later.

Bug: v8:10115
Change-Id: If0b7dc83a68f2d7839c65527a3b6a62310ace6dd
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2103443Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#66753}
parent 20eb2e44
......@@ -305,6 +305,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP3_XO(Packsswb, packsswb)
AVX_OP3_XO(Packuswb, packuswb)
AVX_OP3_XO(Paddusb, paddusb)
AVX_OP3_XO(Pand, pand)
AVX_OP3_XO(Pcmpeqb, pcmpeqb)
AVX_OP3_XO(Pcmpeqw, pcmpeqw)
AVX_OP3_XO(Pcmpeqd, pcmpeqd)
......
......@@ -3146,7 +3146,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kSSEI8x16Shl: {
case kIA32I8x16Shl: {
XMMRegister dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
Register shift = i.InputRegister(1);
......@@ -3155,36 +3155,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// Take shift value modulo 8.
__ and_(shift, 7);
// Mask off the unwanted bits before word-shifting.
__ pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ mov(tmp, shift);
__ add(tmp, Immediate(8));
__ movd(tmp_simd, tmp);
__ psrlw(kScratchDoubleReg, tmp_simd);
__ packuswb(kScratchDoubleReg, kScratchDoubleReg);
__ pand(dst, kScratchDoubleReg);
__ movd(tmp_simd, shift);
__ psllw(dst, tmp_simd);
break;
}
case kAVXI8x16Shl: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
Register shift = i.InputRegister(1);
Register tmp = i.ToRegister(instr->TempAt(0));
XMMRegister tmp_simd = i.TempSimd128Register(1);
// Take shift value modulo 8.
__ and_(shift, 7);
// Mask off the unwanted bits before word-shifting.
__ vpcmpeqw(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ Pcmpeqw(kScratchDoubleReg, kScratchDoubleReg);
__ mov(tmp, shift);
__ add(tmp, Immediate(8));
__ movd(tmp_simd, tmp);
__ vpsrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
__ vpackuswb(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpand(dst, src, kScratchDoubleReg);
__ movd(tmp_simd, shift);
__ vpsllw(dst, dst, tmp_simd);
__ Movd(tmp_simd, tmp);
__ Psrlw(kScratchDoubleReg, kScratchDoubleReg, tmp_simd);
__ Packuswb(kScratchDoubleReg, kScratchDoubleReg);
__ Pand(dst, kScratchDoubleReg);
__ Movd(tmp_simd, shift);
__ Psllw(dst, dst, tmp_simd);
break;
}
case kIA32I8x16ShrS: {
......
......@@ -289,8 +289,7 @@ namespace compiler {
V(SSEI8x16SConvertI16x8) \
V(AVXI8x16SConvertI16x8) \
V(IA32I8x16Neg) \
V(SSEI8x16Shl) \
V(AVXI8x16Shl) \
V(IA32I8x16Shl) \
V(IA32I8x16ShrS) \
V(SSEI8x16Add) \
V(AVXI8x16Add) \
......
......@@ -270,8 +270,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEI8x16SConvertI16x8:
case kAVXI8x16SConvertI16x8:
case kIA32I8x16Neg:
case kSSEI8x16Shl:
case kAVXI8x16Shl:
case kIA32I8x16Shl:
case kIA32I8x16ShrS:
case kSSEI8x16Add:
case kAVXI8x16Add:
......
......@@ -2451,13 +2451,8 @@ void InstructionSelector::VisitI8x16Shl(Node* node) {
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
InstructionOperand operand1 = g.UseUniqueRegister(node->InputAt(1));
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
if (IsSupported(AVX)) {
Emit(kAVXI8x16Shl, g.DefineAsRegister(node), operand0, operand1,
arraysize(temps), temps);
} else {
Emit(kSSEI8x16Shl, g.DefineSameAsFirst(node), operand0, operand1,
Emit(kIA32I8x16Shl, g.DefineSameAsFirst(node), operand0, operand1,
arraysize(temps), temps);
}
}
void InstructionSelector::VisitInt32AbsWithOverflow(Node* node) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment