Commit 8158c8c0 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][ia32] Remove some SSE<->AVX transitions

In our codegen, we are mixing SSE and AVX. This can potentially cause
transition delays. Ideally we should stick to one. We add some new AVX
versions of movss and movsd, then use the macro-assembler methods to
generate AVX instructions if supported.

Bug: v8:11190
Change-Id: Iff7c0fb892cea85731f880ac2895480621b3092f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2554257Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71349}
parent 86991d05
......@@ -3079,6 +3079,16 @@ void Assembler::sse4_instr(XMMRegister dst, Operand src, byte prefix,
emit_sse_operand(dst, src);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(src1, kL128, pp, m, w);
EMIT(op);
emit_sse_operand(dst, src2);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w) {
DCHECK(IsEnabled(AVX));
......
......@@ -1334,10 +1334,29 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vsqrtpd(XMMRegister dst, Operand src) {
vinstr(0x51, dst, xmm0, src, k66, k0F, kWIG);
}
void vmovss(Operand dst, XMMRegister src) {
vinstr(0x11, src, xmm0, dst, kF3, k0F, kWIG);
}
void vmovss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x10, dst, src1, src2, kF3, k0F, kWIG);
}
void vmovss(XMMRegister dst, Operand src) {
vinstr(0x10, dst, xmm0, src, kF3, k0F, kWIG);
}
void vmovsd(Operand dst, XMMRegister src) {
vinstr(0x11, src, xmm0, dst, kF2, k0F, kWIG);
}
void vmovsd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x10, dst, src1, src2, kF2, k0F, kWIG);
}
void vmovsd(XMMRegister dst, Operand src) {
vinstr(0x10, dst, xmm0, src, kF2, k0F, kWIG);
}
void vmovaps(XMMRegister dst, XMMRegister src) { vmovaps(dst, Operand(src)); }
void vmovaps(XMMRegister dst, Operand src) { vps(0x28, dst, xmm0, src); }
void vmovapd(XMMRegister dst, XMMRegister src) { vmovapd(dst, Operand(src)); }
void vmovapd(XMMRegister dst, Operand src) { vpd(0x28, dst, xmm0, src); }
void vmovups(Operand dst, XMMRegister src) { vps(0x11, src, xmm0, dst); }
void vmovups(XMMRegister dst, XMMRegister src) { vmovups(dst, Operand(src)); }
void vmovups(XMMRegister dst, Operand src) { vps(0x10, dst, xmm0, src); }
void vmovupd(XMMRegister dst, Operand src) { vpd(0x10, dst, xmm0, src); }
......@@ -1806,6 +1825,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
byte escape2, byte opcode);
void sse4_instr(XMMRegister dst, Operand src, byte prefix, byte escape1,
byte escape2, byte opcode);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, XMMRegister src2,
SIMDPrefix pp, LeadingOpcode m, VexW w);
void vinstr(byte op, XMMRegister dst, XMMRegister src1, Operand src2,
SIMDPrefix pp, LeadingOpcode m, VexW w);
// Most BMI instructions are similar.
......
......@@ -285,6 +285,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
} \
}
AVX_OP2_WITH_TYPE(Movss, movss, Operand, XMMRegister)
AVX_OP2_WITH_TYPE(Movss, movss, XMMRegister, Operand)
AVX_OP2_WITH_TYPE(Movsd, movsd, Operand, XMMRegister)
AVX_OP2_WITH_TYPE(Movsd, movsd, XMMRegister, Operand)
AVX_OP2_WITH_TYPE(Rcpps, rcpps, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Rsqrtps, rsqrtps, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movdqu, movdqu, XMMRegister, Operand)
......@@ -300,6 +304,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP2_WITH_TYPE(Sqrtpd, sqrtpd, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Sqrtpd, sqrtpd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movaps, movaps, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movups, movups, XMMRegister, Operand)
AVX_OP2_WITH_TYPE(Movups, movups, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movups, movups, Operand, XMMRegister)
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, XMMRegister)
AVX_OP2_WITH_TYPE(Movapd, movapd, XMMRegister, const Operand&)
AVX_OP2_WITH_TYPE(Movupd, movupd, XMMRegister, const Operand&)
......
......@@ -863,6 +863,16 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x10:
AppendToBuffer("vmovsd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x11:
AppendToBuffer("vmovsd ");
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x12:
AppendToBuffer("vmovddup %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
......@@ -920,6 +930,16 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm);
switch (opcode) {
case 0x10:
AppendToBuffer("vmovss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x11:
AppendToBuffer("vmovss ");
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x51:
AppendToBuffer("vsqrtss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -1078,6 +1098,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer("vmovups %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x11:
AppendToBuffer("vmovups ");
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x28:
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment