Commit acc96e1f authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Add AVX for movlhps and some avx codegen

Bug: v8:9561
Change-Id: I18c832737cbea89e08af2ca166de7b01b7fe51b0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1986256Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65674}
parent 6bef631d
...@@ -3919,14 +3919,6 @@ void Assembler::movups(Operand dst, XMMRegister src) { ...@@ -3919,14 +3919,6 @@ void Assembler::movups(Operand dst, XMMRegister src) {
emit_sse_operand(src, dst); emit_sse_operand(src, dst);
} }
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x16);
emit_sse_operand(dst, src);
}
void Assembler::sse_instr(XMMRegister dst, XMMRegister src, byte escape, void Assembler::sse_instr(XMMRegister dst, XMMRegister src, byte escape,
byte opcode) { byte opcode) {
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
......
...@@ -1133,7 +1133,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1133,7 +1133,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle); void pshuflw(XMMRegister dst, XMMRegister src, uint8_t shuffle);
void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle); void pshuflw(XMMRegister dst, Operand src, uint8_t shuffle);
void movlhps(XMMRegister dst, XMMRegister src); void movlhps(XMMRegister dst, XMMRegister src) {
sse_instr(dst, src, 0x0F, 0x16);
}
// AVX instruction // AVX instruction
void vmovddup(XMMRegister dst, XMMRegister src); void vmovddup(XMMRegister dst, XMMRegister src);
...@@ -1228,6 +1230,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1228,6 +1230,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
SSE2_INSTRUCTION_LIST_SHIFT_IMM(AVX_SSE2_SHIFT_IMM) SSE2_INSTRUCTION_LIST_SHIFT_IMM(AVX_SSE2_SHIFT_IMM)
#undef AVX_SSE2_SHIFT_IMM #undef AVX_SSE2_SHIFT_IMM
void vmovlhps(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x16, dst, src1, src2, kNone, k0F, kWIG);
}
void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG); vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
} }
......
...@@ -186,6 +186,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -186,6 +186,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Por, por) AVX_OP(Por, por)
AVX_OP(Pxor, pxor) AVX_OP(Pxor, pxor)
AVX_OP(Psubd, psubd) AVX_OP(Psubd, psubd)
AVX_OP(Psubq, psubq)
AVX_OP(Pslld, pslld) AVX_OP(Pslld, pslld)
AVX_OP(Pavgb, pavgb) AVX_OP(Pavgb, pavgb)
AVX_OP(Pavgw, pavgw) AVX_OP(Pavgw, pavgw)
...@@ -215,6 +216,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -215,6 +216,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Pshufd, pshufd) AVX_OP(Pshufd, pshufd)
AVX_OP(Cmpps, cmpps) AVX_OP(Cmpps, cmpps)
AVX_OP(Cmppd, cmppd) AVX_OP(Cmppd, cmppd)
AVX_OP(Movlhps, movlhps)
AVX_OP_SSE3(Movddup, movddup) AVX_OP_SSE3(Movddup, movddup)
AVX_OP_SSSE3(Pshufb, pshufb) AVX_OP_SSSE3(Pshufb, pshufb)
AVX_OP_SSSE3(Psignd, psignd) AVX_OP_SSSE3(Psignd, psignd)
......
...@@ -2274,7 +2274,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2274,7 +2274,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64F64x2ReplaceLane: { case kX64F64x2ReplaceLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1); CpuFeatureScope sse_scope(tasm(), SSE4_1);
if (instr->InputAt(2)->IsFPRegister()) { if (instr->InputAt(2)->IsFPRegister()) {
__ movq(kScratchRegister, i.InputDoubleRegister(2)); __ Movq(kScratchRegister, i.InputDoubleRegister(2));
__ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1)); __ Pinsrq(i.OutputSimd128Register(), kScratchRegister, i.InputInt8(1));
} else { } else {
__ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); __ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
...@@ -2304,15 +2304,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2304,15 +2304,15 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_EQ(dst, i.InputSimd128Register(0));
// Extract high quardword. // Extract high quardword.
__ pextrq(tmp, dst, 1); __ Pextrq(tmp, dst, static_cast<int8_t>(1));
// We cannot convert directly into dst, as the next call to Cvtqui2sd will // We cannot convert directly into dst, as the next call to Cvtqui2sd will
// zero it out, so be careful to make sure dst is unique to tmp_xmm. // zero it out, so be careful to make sure dst is unique to tmp_xmm.
__ Cvtqui2sd(tmp_xmm, tmp); __ Cvtqui2sd(tmp_xmm, tmp);
// Extract low quadword and convert. // Extract low quadword and convert.
__ movq(tmp, dst); __ Movq(tmp, dst);
__ Cvtqui2sd(dst, tmp); __ Cvtqui2sd(dst, tmp);
// Move converted high quadword to top of dst. // Move converted high quadword to top of dst.
__ movlhps(dst, tmp_xmm); __ Movlhps(dst, tmp_xmm);
break; break;
} }
case kX64F64x2ExtractLane: { case kX64F64x2ExtractLane: {
...@@ -2650,7 +2650,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2650,7 +2650,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2), __ Pinsrq(i.OutputSimd128Register(), i.InputRegister(2),
i.InputInt8(1)); i.InputInt8(1));
} else { } else {
__ pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1)); __ Pinsrq(i.OutputSimd128Register(), i.InputOperand(2), i.InputInt8(1));
} }
break; break;
} }
...@@ -2658,11 +2658,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2658,11 +2658,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0); XMMRegister src = i.InputSimd128Register(0);
if (dst == src) { if (dst == src) {
__ movapd(kScratchDoubleReg, src); __ Movapd(kScratchDoubleReg, src);
src = kScratchDoubleReg; src = kScratchDoubleReg;
} }
__ pxor(dst, dst); __ Pxor(dst, dst);
__ psubq(dst, src); __ Psubq(dst, src);
break; break;
} }
case kX64I64x2Shl: { case kX64I64x2Shl: {
......
...@@ -1297,6 +1297,11 @@ int DisassemblerX64::AVXInstruction(byte* data) { ...@@ -1297,6 +1297,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop)); AppendToBuffer(",%s", NameOfXMMRegister(regop));
break; break;
case 0x16:
AppendToBuffer("vmovlhps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x28: case 0x28:
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop)); AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current); current += PrintRightXMMOperand(current);
......
...@@ -663,6 +663,7 @@ TEST(DisasmX64) { ...@@ -663,6 +663,7 @@ TEST(DisasmX64) {
__ vmovups(xmm5, xmm1); __ vmovups(xmm5, xmm1);
__ vmovups(xmm5, Operand(rdx, 4)); __ vmovups(xmm5, Operand(rdx, 4));
__ vmovups(Operand(rdx, 4), xmm5); __ vmovups(Operand(rdx, 4), xmm5);
__ vmovlhps(xmm1, xmm3, xmm5);
__ vandps(xmm0, xmm9, xmm2); __ vandps(xmm0, xmm9, xmm2);
__ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000)); __ vandps(xmm9, xmm1, Operand(rbx, rcx, times_4, 10000));
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment