Commit fd535190 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] AVX codegen for some conversion opcodes

Bug: v8:9561
Change-Id: Ie3231038312495c2d8f77062ee5b81b2b55ab4d7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1980502Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65617}
parent c855532a
......@@ -1495,6 +1495,12 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
emit(mask);
}
void vpalignr(XMMRegister dst, XMMRegister src1, XMMRegister src2,
uint8_t imm8) {
vinstr(0x0F, dst, src1, src2, k66, k0F3A, kWIG);
emit(imm8);
}
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, Operand src2);
void vps(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
......
......@@ -218,6 +218,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignd, psignd)
AVX_OP_SSSE3(Palignr, palignr)
AVX_OP_SSE4_1(Pmulld, pmulld)
AVX_OP_SSE4_1(Pminsd, pminsd)
AVX_OP_SSE4_1(Pminud, pminud)
......
......@@ -2958,14 +2958,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
case kX64I32x4SConvertI16x8Low: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Pmovsxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64I32x4SConvertI16x8High: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ palignr(dst, i.InputSimd128Register(0), 8);
__ pmovsxwd(dst, dst);
__ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
__ Pmovsxwd(dst, dst);
break;
}
case kX64I32x4Neg: {
......@@ -3080,15 +3079,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I32x4UConvertI16x8Low: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Pmovzxwd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64I32x4UConvertI16x8High: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ palignr(dst, i.InputSimd128Register(0), 8);
__ pmovzxwd(dst, dst);
__ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
__ Pmovzxwd(dst, dst);
break;
}
case kX64I32x4ShrU: {
......@@ -3169,15 +3166,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I16x8SConvertI8x16Low: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
__ Pmovsxbw(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64I16x8SConvertI8x16High: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ palignr(dst, i.InputSimd128Register(0), 8);
__ pmovsxbw(dst, dst);
__ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
__ Pmovsxbw(dst, dst);
break;
}
case kX64I16x8Neg: {
......@@ -3281,10 +3276,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I16x8UConvertI8x16High: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
__ palignr(dst, i.InputSimd128Register(0), 8);
__ pmovzxbw(dst, dst);
__ Palignr(dst, i.InputSimd128Register(0), static_cast<uint8_t>(8));
__ Pmovzxbw(dst, dst);
break;
}
case kX64I16x8ShrU: {
......
......@@ -953,6 +953,12 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x0F:
AppendToBuffer("vpalignr %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",0x%x", *current++);
break;
case 0x14:
AppendToBuffer("vpextrb ");
current += PrintRightByteOperand(current);
......
......@@ -780,6 +780,7 @@ TEST(DisasmX64) {
__ vshufps(xmm3, xmm2, xmm3, 3);
__ vpblendw(xmm1, xmm2, xmm3, 23);
__ vpblendw(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 23);
__ vpalignr(xmm1, xmm2, xmm3, 4);
__ vmovddup(xmm1, xmm2);
__ vmovddup(xmm1, Operand(rbx, rcx, times_4, 10000));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment