Commit 4972b2c8 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Add AVX for movddup and pinsrq

Bug: v8:9561
Change-Id: I39a3148570664909eb08f1559b2cb418477a6c15
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1948717
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65322}
parent 77da0c80
......@@ -3524,6 +3524,14 @@ void Assembler::movmskps(Register dst, XMMRegister src) {
// AVX instructions
void Assembler::vmovddup(XMMRegister dst, XMMRegister src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF2, k0F, kWIG);
emit(0x12);
emit_sse_operand(dst, src);
}
void Assembler::vmovddup(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
......
......@@ -1118,6 +1118,7 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void movlhps(XMMRegister dst, XMMRegister src);
// AVX instruction
void vmovddup(XMMRegister dst, XMMRegister src);
void vmovddup(XMMRegister dst, Operand src);
void vbroadcastss(XMMRegister dst, Operand src);
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
......@@ -1628,6 +1629,16 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
vinstr(0x22, dst, src1, src2, k66, k0F3A, kW0);
emit(imm8);
}
void vpinsrq(XMMRegister dst, XMMRegister src1, Register src2, int8_t imm8) {
XMMRegister isrc = XMMRegister::from_code(src2.code());
vinstr(0x22, dst, src1, isrc, k66, k0F3A, kW1);
emit(imm8);
}
void vpinsrq(XMMRegister dst, XMMRegister src1, Operand src2, int8_t imm8) {
vinstr(0x22, dst, src1, src2, k66, k0F3A, kW1);
emit(imm8);
}
void vpshufd(XMMRegister dst, XMMRegister src, uint8_t imm8) {
vinstr(0x70, dst, xmm0, src, k66, k0F, kWIG);
emit(imm8);
......
......@@ -215,6 +215,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Pmaxud, pmaxud)
AVX_OP_SSE4_1(Extractps, extractps)
AVX_OP_SSE4_1(Insertps, insertps)
AVX_OP_SSE4_1(Pinsrq, pinsrq)
#undef AVX_OP
......
......@@ -2265,9 +2265,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSE3);
XMMRegister dst = i.OutputSimd128Register();
if (instr->InputAt(0)->IsFPRegister()) {
__ movddup(dst, i.InputDoubleRegister(0));
__ Movddup(dst, i.InputDoubleRegister(0));
} else {
__ movddup(dst, i.InputOperand(0));
__ Movddup(dst, i.InputOperand(0));
}
break;
}
......@@ -2275,9 +2275,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsFPRegister()) {
__ movq(kScratchRegister, i.InputDoubleRegister(2));
__ pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
__ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
} else {
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
}
break;
}
......@@ -2628,11 +2628,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSE3);
XMMRegister dst = i.OutputSimd128Register();
if (HasRegisterInput(instr, 0)) {
__ movq(dst, i.InputRegister(0));
__ Movq(dst, i.InputRegister(0));
} else {
__ movq(dst, i.InputOperand(0));
__ Movq(dst, i.InputOperand(0));
}
__ movddup(dst, dst);
__ Movddup(dst, dst);
break;
}
case kX64I64x2ExtractLane: {
......@@ -2643,7 +2643,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64I64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (HasRegisterInput(instr, 2)) {
__ pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
__ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1));
} else {
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
......
......@@ -763,11 +763,14 @@ TEST(DisasmX64) {
__ vpinsrw(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 5);
__ vpinsrd(xmm1, xmm2, rax, 2);
__ vpinsrd(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 2);
__ vpinsrq(xmm1, xmm2, rax, 9);
__ vpinsrq(xmm1, xmm2, Operand(rbx, rcx, times_4, 10000), 9);
__ vpshufd(xmm1, xmm2, 85);
__ vpshuflw(xmm1, xmm2, 85);
__ vpshuflw(xmm1, Operand(rbx, rcx, times_4, 10000), 85);
__ vshufps(xmm3, xmm2, xmm3, 3);
__ vmovddup(xmm1, xmm2);
__ vmovddup(xmm1, Operand(rbx, rcx, times_4, 10000));
__ vbroadcastss(xmm1, Operand(rbx, rcx, times_4, 10000));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment