Commit 8ee7d962 authored by Deepti Gandluri's avatar Deepti Gandluri Committed by Commit Bot

Fix intermixing of AVX/SSE instructions

 - Add vmovdqu to the assembler
 - Fix bugs in macro assembler for instructions with immediates
 - Fix codegen

Bug: v8:9499
Change-Id: Id9a521561ed5481eb617b2d97e4af933aac7a54e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1707577Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Deepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#62792}
parent 58d1e842
...@@ -4187,6 +4187,22 @@ void Assembler::vmovq(Register dst, XMMRegister src) { ...@@ -4187,6 +4187,22 @@ void Assembler::vmovq(Register dst, XMMRegister src) {
emit_sse_operand(src, dst); emit_sse_operand(src, dst);
} }
void Assembler::vmovdqu(XMMRegister dst, Operand src) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF3, k0F, kWIG);
emit(0x6F);
emit_sse_operand(dst, src);
}
void Assembler::vmovdqu(Operand src, XMMRegister dst) {
DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this);
emit_vex_prefix(dst, xmm0, src, kL128, kF3, k0F, kWIG);
emit(0x7F);
emit_sse_operand(dst, src);
}
void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1, void Assembler::vinstr(byte op, XMMRegister dst, XMMRegister src1,
XMMRegister src2, SIMDPrefix pp, LeadingOpcode m, XMMRegister src2, SIMDPrefix pp, LeadingOpcode m,
VexW w) { VexW w) {
......
...@@ -1305,6 +1305,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1305,6 +1305,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
} }
void vmovsd(XMMRegister dst, Operand src) { vsd(0x10, dst, xmm0, src); } void vmovsd(XMMRegister dst, Operand src) { vsd(0x10, dst, xmm0, src); }
void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); } void vmovsd(Operand dst, XMMRegister src) { vsd(0x11, src, xmm0, dst); }
void vmovdqu(XMMRegister dst, Operand src);
void vmovdqu(Operand dst, XMMRegister src);
#define AVX_SP_3(instr, opcode) \ #define AVX_SP_3(instr, opcode) \
AVX_S_3(instr, opcode) \ AVX_S_3(instr, opcode) \
......
...@@ -1771,6 +1771,42 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8) { ...@@ -1771,6 +1771,42 @@ void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8) {
} }
} }
void TurboAssembler::Psllq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsllq(dst, dst, imm8);
} else {
psllq(dst, imm8);
}
}
void TurboAssembler::Psrlq(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrlq(dst, dst, imm8);
} else {
psrlq(dst, imm8);
}
}
void TurboAssembler::Pslld(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpslld(dst, dst, imm8);
} else {
pslld(dst, imm8);
}
}
void TurboAssembler::Psrld(XMMRegister dst, byte imm8) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpsrld(dst, dst, imm8);
} else {
psrld(dst, imm8);
}
}
void TurboAssembler::Lzcntl(Register dst, Register src) { void TurboAssembler::Lzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(LZCNT)) { if (CpuFeatures::IsSupported(LZCNT)) {
CpuFeatureScope scope(this, LZCNT); CpuFeatureScope scope(this, LZCNT);
......
...@@ -80,7 +80,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -80,7 +80,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
template <typename Dst, typename... Args> template <typename Dst, typename... Args>
struct AvxHelper { struct AvxHelper {
Assembler* assm; Assembler* assm;
// Call an method where the AVX version expects the dst argument to be // Call a method where the AVX version expects the dst argument to be
// duplicated. // duplicated.
template <void (Assembler::*avx)(Dst, Dst, Args...), template <void (Assembler::*avx)(Dst, Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)> void (Assembler::*no_avx)(Dst, Args...)>
...@@ -93,7 +93,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -93,7 +93,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
} }
} }
// Call an method where the AVX version expects no duplicated dst argument. // Call a method where the AVX version expects no duplicated dst argument.
template <void (Assembler::*avx)(Dst, Args...), template <void (Assembler::*avx)(Dst, Args...),
void (Assembler::*no_avx)(Dst, Args...)> void (Assembler::*no_avx)(Dst, Args...)>
void emit(Dst dst, Args... args) { void emit(Dst dst, Args... args) {
...@@ -127,11 +127,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -127,11 +127,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Movmskpd, movmskpd) AVX_OP(Movmskpd, movmskpd)
AVX_OP(Movss, movss) AVX_OP(Movss, movss)
AVX_OP(Movsd, movsd) AVX_OP(Movsd, movsd)
AVX_OP(Movdqu, movdqu)
AVX_OP(Pcmpeqd, pcmpeqd) AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Pslld, pslld)
AVX_OP(Psllq, psllq)
AVX_OP(Psrld, psrld)
AVX_OP(Psrlq, psrlq)
AVX_OP(Addss, addss) AVX_OP(Addss, addss)
AVX_OP(Addsd, addsd) AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd) AVX_OP(Mulsd, mulsd)
...@@ -370,6 +367,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -370,6 +367,11 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Pinsrd(XMMRegister dst, Register src, int8_t imm8); void Pinsrd(XMMRegister dst, Register src, int8_t imm8);
void Pinsrd(XMMRegister dst, Operand src, int8_t imm8); void Pinsrd(XMMRegister dst, Operand src, int8_t imm8);
void Psllq(XMMRegister dst, byte imm8);
void Psrlq(XMMRegister dst, byte imm8);
void Pslld(XMMRegister dst, byte imm8);
void Psrld(XMMRegister dst, byte imm8);
void CompareRoot(Register with, RootIndex index); void CompareRoot(Register with, RootIndex index);
void CompareRoot(Operand with, RootIndex index); void CompareRoot(Operand with, RootIndex index);
......
...@@ -1318,16 +1318,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1318,16 +1318,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
case kSSEFloat32Abs: { case kSSEFloat32Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants. // TODO(bmeurer): Use RIP relative 128-bit constants.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrlq(kScratchDoubleReg, 33); __ Psrlq(kScratchDoubleReg, 33);
__ andps(i.OutputDoubleRegister(), kScratchDoubleReg); __ Andps(i.OutputDoubleRegister(), kScratchDoubleReg);
break; break;
} }
case kSSEFloat32Neg: { case kSSEFloat32Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants. // TODO(bmeurer): Use RIP relative 128-bit constants.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 31); __ Psllq(kScratchDoubleReg, 31);
__ xorps(i.OutputDoubleRegister(), kScratchDoubleReg); __ Xorps(i.OutputDoubleRegister(), kScratchDoubleReg);
break; break;
} }
case kSSEFloat32Sqrt: case kSSEFloat32Sqrt:
...@@ -1528,16 +1528,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1528,16 +1528,16 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
case kSSEFloat64Abs: { case kSSEFloat64Abs: {
// TODO(bmeurer): Use RIP relative 128-bit constants. // TODO(bmeurer): Use RIP relative 128-bit constants.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrlq(kScratchDoubleReg, 1); __ Psrlq(kScratchDoubleReg, 1);
__ andpd(i.OutputDoubleRegister(), kScratchDoubleReg); __ Andpd(i.OutputDoubleRegister(), kScratchDoubleReg);
break; break;
} }
case kSSEFloat64Neg: { case kSSEFloat64Neg: {
// TODO(bmeurer): Use RIP relative 128-bit constants. // TODO(bmeurer): Use RIP relative 128-bit constants.
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg); __ Pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psllq(kScratchDoubleReg, 63); __ Psllq(kScratchDoubleReg, 63);
__ xorpd(i.OutputDoubleRegister(), kScratchDoubleReg); __ Xorpd(i.OutputDoubleRegister(), kScratchDoubleReg);
break; break;
} }
case kSSEFloat64Sqrt: case kSSEFloat64Sqrt:
...@@ -2021,11 +2021,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2021,11 +2021,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kX64Movss: case kX64Movss:
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) { if (instr->HasOutput()) {
__ movss(i.OutputDoubleRegister(), i.MemoryOperand()); __ Movss(i.OutputDoubleRegister(), i.MemoryOperand());
} else { } else {
size_t index = 0; size_t index = 0;
Operand operand = i.MemoryOperand(&index); Operand operand = i.MemoryOperand(&index);
__ movss(operand, i.InputDoubleRegister(index)); __ Movss(operand, i.InputDoubleRegister(index));
} }
break; break;
case kX64Movsd: { case kX64Movsd: {
...@@ -2054,11 +2054,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2054,11 +2054,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
CpuFeatureScope sse_scope(tasm(), SSSE3); CpuFeatureScope sse_scope(tasm(), SSSE3);
EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset()); EmitOOLTrapIfNeeded(zone(), this, opcode, instr, i, __ pc_offset());
if (instr->HasOutput()) { if (instr->HasOutput()) {
__ movdqu(i.OutputSimd128Register(), i.MemoryOperand()); __ Movdqu(i.OutputSimd128Register(), i.MemoryOperand());
} else { } else {
size_t index = 0; size_t index = 0;
Operand operand = i.MemoryOperand(&index); Operand operand = i.MemoryOperand(&index);
__ movdqu(operand, i.InputSimd128Register(index)); __ Movdqu(operand, i.InputSimd128Register(index));
} }
break; break;
} }
...@@ -2080,7 +2080,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2080,7 +2080,7 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
if (instr->InputAt(0)->IsRegister()) { if (instr->InputAt(0)->IsRegister()) {
__ Movd(i.OutputDoubleRegister(), i.InputRegister(0)); __ Movd(i.OutputDoubleRegister(), i.InputRegister(0));
} else { } else {
__ movss(i.OutputDoubleRegister(), i.InputOperand(0)); __ Movss(i.OutputDoubleRegister(), i.InputOperand(0));
} }
break; break;
case kX64BitcastLD: case kX64BitcastLD:
...@@ -4076,7 +4076,7 @@ void CodeGenerator::AssembleConstructFrame() { ...@@ -4076,7 +4076,7 @@ void CodeGenerator::AssembleConstructFrame() {
int slot_idx = 0; int slot_idx = 0;
for (int i = 0; i < XMMRegister::kNumRegisters; i++) { for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
if (!((1 << i) & saves_fp)) continue; if (!((1 << i) & saves_fp)) continue;
__ movdqu(Operand(rsp, kQuadWordSize * slot_idx), __ Movdqu(Operand(rsp, kQuadWordSize * slot_idx),
XMMRegister::from_code(i)); XMMRegister::from_code(i));
slot_idx++; slot_idx++;
} }
...@@ -4118,7 +4118,7 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) { ...@@ -4118,7 +4118,7 @@ void CodeGenerator::AssembleReturn(InstructionOperand* pop) {
int slot_idx = 0; int slot_idx = 0;
for (int i = 0; i < XMMRegister::kNumRegisters; i++) { for (int i = 0; i < XMMRegister::kNumRegisters; i++) {
if (!((1 << i) & saves_fp)) continue; if (!((1 << i) & saves_fp)) continue;
__ movdqu(XMMRegister::from_code(i), __ Movdqu(XMMRegister::from_code(i),
Operand(rsp, kQuadWordSize * slot_idx)); Operand(rsp, kQuadWordSize * slot_idx));
slot_idx++; slot_idx++;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment