Commit 237e8c2c authored by Deepti Gandluri's avatar Deepti Gandluri Committed by Commit Bot

[wasm-simd] Use movddup instead of pshufd for 64x2 splats

Performance is comparable on newer hardware, movddup performs slightly
better on older chips

Change-Id: Ic3248dd2807bf2c49311cba45ba4f0e8baa47730
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1715981Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62968}
parent 8a5a1a68
......@@ -4719,6 +4719,26 @@ void Assembler::lddqu(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src);
}
void Assembler::movddup(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this);
emit(0xF2);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x12);
emit_sse_operand(dst, src);
}
void Assembler::movddup(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(SSE3));
EnsureSpace ensure_space(this);
emit(0xF2);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x12);
emit_sse_operand(dst, src);
}
void Assembler::psrldq(XMMRegister dst, uint8_t shift) {
EnsureSpace ensure_space(this);
emit(0x66);
......
......@@ -916,6 +916,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
// SSE3
void lddqu(XMMRegister dst, Operand src);
void movddup(XMMRegister dst, Operand src);
void movddup(XMMRegister dst, XMMRegister src);
// SSSE3
void ssse3_instr(XMMRegister dst, XMMRegister src, byte prefix, byte escape1,
......
......@@ -2256,11 +2256,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64F64x2Splat: {
CpuFeatureScope sse_scope(tasm(), SSE3);
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsFPRegister()) {
__ pshufd(dst, i.InputDoubleRegister(0), 0x44);
__ movddup(dst, i.InputDoubleRegister(0));
} else {
__ pshufd(dst, i.InputOperand(0), 0x44);
__ movddup(dst, i.InputOperand(0));
}
break;
}
......@@ -2478,13 +2479,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
case kX64I64x2Splat: {
CpuFeatureScope sse_scope(tasm(), SSE3);
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsRegister()) {
__ movq(dst, i.InputRegister(0));
} else {
__ movq(dst, i.InputOperand(0));
}
__ pshufd(dst, dst, 0x44);
__ movddup(dst, dst);
break;
}
case kX64I64x2ExtractLane: {
......
......@@ -1991,6 +1991,11 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
AppendToBuffer("%s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
}
} else if (opcode == 0x12) {
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("movddup %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x2A) {
// CVTSI2SD: integer to XMM double conversion.
int mod, regop, rm;
......
......@@ -517,6 +517,8 @@ TEST(DisasmX64) {
__ haddps(xmm1, xmm0);
__ haddps(xmm1, Operand(rbx, rcx, times_4, 10000));
__ lddqu(xmm1, Operand(rdx, 4));
__ movddup(xmm1, Operand(rax, 5));
__ movddup(xmm1, xmm2);
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment