Commit ca9f4dab authored by Zhang, Shiyu's avatar Zhang, Shiyu Committed by Commit Bot

[x64] Use scratch double register to break dependency for vcvtsi2sd and vcvtsi2ss

Change-Id: I3c83a3e9b0a9aa58d31968dbcb5bcb483eb47926
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1965927
Commit-Queue: Shiyu Zhang <shiyu.zhang@intel.com>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65512}
parent 9cceda90
...@@ -726,8 +726,7 @@ void TurboAssembler::Cvtsd2ss(XMMRegister dst, Operand src) { ...@@ -726,8 +726,7 @@ void TurboAssembler::Cvtsd2ss(XMMRegister dst, Operand src) {
void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Register src) { void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, dst); vcvtlsi2sd(dst, kScratchDoubleReg, src);
vcvtlsi2sd(dst, dst, src);
} else { } else {
xorpd(dst, dst); xorpd(dst, dst);
cvtlsi2sd(dst, src); cvtlsi2sd(dst, src);
...@@ -737,8 +736,7 @@ void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Register src) { ...@@ -737,8 +736,7 @@ void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Register src) {
void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Operand src) { void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, dst); vcvtlsi2sd(dst, kScratchDoubleReg, src);
vcvtlsi2sd(dst, dst, src);
} else { } else {
xorpd(dst, dst); xorpd(dst, dst);
cvtlsi2sd(dst, src); cvtlsi2sd(dst, src);
...@@ -748,8 +746,7 @@ void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Operand src) { ...@@ -748,8 +746,7 @@ void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Operand src) {
void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Register src) { void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vxorps(dst, dst, dst); vcvtlsi2ss(dst, kScratchDoubleReg, src);
vcvtlsi2ss(dst, dst, src);
} else { } else {
xorps(dst, dst); xorps(dst, dst);
cvtlsi2ss(dst, src); cvtlsi2ss(dst, src);
...@@ -759,8 +756,7 @@ void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Register src) { ...@@ -759,8 +756,7 @@ void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Register src) {
void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Operand src) { void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vxorps(dst, dst, dst); vcvtlsi2ss(dst, kScratchDoubleReg, src);
vcvtlsi2ss(dst, dst, src);
} else { } else {
xorps(dst, dst); xorps(dst, dst);
cvtlsi2ss(dst, src); cvtlsi2ss(dst, src);
...@@ -770,8 +766,7 @@ void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Operand src) { ...@@ -770,8 +766,7 @@ void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Operand src) {
void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Register src) { void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vxorps(dst, dst, dst); vcvtqsi2ss(dst, kScratchDoubleReg, src);
vcvtqsi2ss(dst, dst, src);
} else { } else {
xorps(dst, dst); xorps(dst, dst);
cvtqsi2ss(dst, src); cvtqsi2ss(dst, src);
...@@ -781,8 +776,7 @@ void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Register src) { ...@@ -781,8 +776,7 @@ void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Register src) {
void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Operand src) { void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vxorps(dst, dst, dst); vcvtqsi2ss(dst, kScratchDoubleReg, src);
vcvtqsi2ss(dst, dst, src);
} else { } else {
xorps(dst, dst); xorps(dst, dst);
cvtqsi2ss(dst, src); cvtqsi2ss(dst, src);
...@@ -792,8 +786,7 @@ void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Operand src) { ...@@ -792,8 +786,7 @@ void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Operand src) {
void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Register src) { void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, dst); vcvtqsi2sd(dst, kScratchDoubleReg, src);
vcvtqsi2sd(dst, dst, src);
} else { } else {
xorpd(dst, dst); xorpd(dst, dst);
cvtqsi2sd(dst, src); cvtqsi2sd(dst, src);
...@@ -803,8 +796,7 @@ void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Register src) { ...@@ -803,8 +796,7 @@ void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Register src) {
void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Operand src) { void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, dst); vcvtqsi2sd(dst, kScratchDoubleReg, src);
vcvtqsi2sd(dst, dst, src);
} else { } else {
xorpd(dst, dst); xorpd(dst, dst);
cvtqsi2sd(dst, src); cvtqsi2sd(dst, src);
......
...@@ -291,12 +291,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -291,12 +291,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Cvttss2si(Register dst, Operand src); void Cvttss2si(Register dst, Operand src);
void Cvttss2siq(Register dst, XMMRegister src); void Cvttss2siq(Register dst, XMMRegister src);
void Cvttss2siq(Register dst, Operand src); void Cvttss2siq(Register dst, Operand src);
void Cvtqsi2ss(XMMRegister dst, Register src);
void Cvtqsi2ss(XMMRegister dst, Operand src);
void Cvtqsi2sd(XMMRegister dst, Register src);
void Cvtqsi2sd(XMMRegister dst, Operand src);
void Cvtlsi2ss(XMMRegister dst, Register src);
void Cvtlsi2ss(XMMRegister dst, Operand src);
void Cvtlui2ss(XMMRegister dst, Register src); void Cvtlui2ss(XMMRegister dst, Register src);
void Cvtlui2ss(XMMRegister dst, Operand src); void Cvtlui2ss(XMMRegister dst, Operand src);
void Cvtlui2sd(XMMRegister dst, Register src); void Cvtlui2sd(XMMRegister dst, Register src);
...@@ -310,9 +304,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -310,9 +304,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Cvttss2uiq(Register dst, Operand src, Label* fail = nullptr); void Cvttss2uiq(Register dst, Operand src, Label* fail = nullptr);
void Cvttss2uiq(Register dst, XMMRegister src, Label* fail = nullptr); void Cvttss2uiq(Register dst, XMMRegister src, Label* fail = nullptr);
// cvtsi2sd instruction only writes to the low 64-bit of dst register, which // cvtsi2sd and cvtsi2ss instructions only write to the low 64/32-bit of dst
// hinders register renaming and makes dependence chains longer. So we use // register, which hinders register renaming and makes dependence chains
// xorpd to clear the dst register before cvtsi2sd to solve this issue. // longer. So we use xorpd to clear the dst register before cvtsi2sd for
// non-AVX and a scratch XMM register as first src for AVX to solve this
// issue.
void Cvtqsi2ss(XMMRegister dst, Register src);
void Cvtqsi2ss(XMMRegister dst, Operand src);
void Cvtqsi2sd(XMMRegister dst, Register src);
void Cvtqsi2sd(XMMRegister dst, Operand src);
void Cvtlsi2ss(XMMRegister dst, Register src);
void Cvtlsi2ss(XMMRegister dst, Operand src);
void Cvtlsi2sd(XMMRegister dst, Register src); void Cvtlsi2sd(XMMRegister dst, Register src);
void Cvtlsi2sd(XMMRegister dst, Operand src); void Cvtlsi2sd(XMMRegister dst, Operand src);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment