Commit ca9f4dab authored by Zhang, Shiyu's avatar Zhang, Shiyu Committed by Commit Bot

[x64] Use scratch double register to break dependency for vcvtsi2sd and vcvtsi2ss

Change-Id: I3c83a3e9b0a9aa58d31968dbcb5bcb483eb47926
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1965927
Commit-Queue: Shiyu Zhang <shiyu.zhang@intel.com>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#65512}
parent 9cceda90
......@@ -726,8 +726,7 @@ void TurboAssembler::Cvtsd2ss(XMMRegister dst, Operand src) {
void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, dst);
vcvtlsi2sd(dst, dst, src);
vcvtlsi2sd(dst, kScratchDoubleReg, src);
} else {
xorpd(dst, dst);
cvtlsi2sd(dst, src);
......@@ -737,8 +736,7 @@ void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Register src) {
void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, dst);
vcvtlsi2sd(dst, dst, src);
vcvtlsi2sd(dst, kScratchDoubleReg, src);
} else {
xorpd(dst, dst);
cvtlsi2sd(dst, src);
......@@ -748,8 +746,7 @@ void TurboAssembler::Cvtlsi2sd(XMMRegister dst, Operand src) {
void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(dst, dst, dst);
vcvtlsi2ss(dst, dst, src);
vcvtlsi2ss(dst, kScratchDoubleReg, src);
} else {
xorps(dst, dst);
cvtlsi2ss(dst, src);
......@@ -759,8 +756,7 @@ void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Register src) {
void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(dst, dst, dst);
vcvtlsi2ss(dst, dst, src);
vcvtlsi2ss(dst, kScratchDoubleReg, src);
} else {
xorps(dst, dst);
cvtlsi2ss(dst, src);
......@@ -770,8 +766,7 @@ void TurboAssembler::Cvtlsi2ss(XMMRegister dst, Operand src) {
void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(dst, dst, dst);
vcvtqsi2ss(dst, dst, src);
vcvtqsi2ss(dst, kScratchDoubleReg, src);
} else {
xorps(dst, dst);
cvtqsi2ss(dst, src);
......@@ -781,8 +776,7 @@ void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Register src) {
void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorps(dst, dst, dst);
vcvtqsi2ss(dst, dst, src);
vcvtqsi2ss(dst, kScratchDoubleReg, src);
} else {
xorps(dst, dst);
cvtqsi2ss(dst, src);
......@@ -792,8 +786,7 @@ void TurboAssembler::Cvtqsi2ss(XMMRegister dst, Operand src) {
void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Register src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, dst);
vcvtqsi2sd(dst, dst, src);
vcvtqsi2sd(dst, kScratchDoubleReg, src);
} else {
xorpd(dst, dst);
cvtqsi2sd(dst, src);
......@@ -803,8 +796,7 @@ void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Register src) {
void TurboAssembler::Cvtqsi2sd(XMMRegister dst, Operand src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vxorpd(dst, dst, dst);
vcvtqsi2sd(dst, dst, src);
vcvtqsi2sd(dst, kScratchDoubleReg, src);
} else {
xorpd(dst, dst);
cvtqsi2sd(dst, src);
......
......@@ -291,12 +291,6 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Cvttss2si(Register dst, Operand src);
void Cvttss2siq(Register dst, XMMRegister src);
void Cvttss2siq(Register dst, Operand src);
void Cvtqsi2ss(XMMRegister dst, Register src);
void Cvtqsi2ss(XMMRegister dst, Operand src);
void Cvtqsi2sd(XMMRegister dst, Register src);
void Cvtqsi2sd(XMMRegister dst, Operand src);
void Cvtlsi2ss(XMMRegister dst, Register src);
void Cvtlsi2ss(XMMRegister dst, Operand src);
void Cvtlui2ss(XMMRegister dst, Register src);
void Cvtlui2ss(XMMRegister dst, Operand src);
void Cvtlui2sd(XMMRegister dst, Register src);
......@@ -310,9 +304,17 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Cvttss2uiq(Register dst, Operand src, Label* fail = nullptr);
void Cvttss2uiq(Register dst, XMMRegister src, Label* fail = nullptr);
// cvtsi2sd instruction only writes to the low 64-bit of dst register, which
// hinders register renaming and makes dependence chains longer. So we use
// xorpd to clear the dst register before cvtsi2sd to solve this issue.
// cvtsi2sd and cvtsi2ss instructions only write to the low 64/32-bit of dst
// register, which hinders register renaming and makes dependence chains
// longer. So we use xorpd to clear the dst register before cvtsi2sd for
// non-AVX and a scratch XMM register as first src for AVX to solve this
// issue.
void Cvtqsi2ss(XMMRegister dst, Register src);
void Cvtqsi2ss(XMMRegister dst, Operand src);
void Cvtqsi2sd(XMMRegister dst, Register src);
void Cvtqsi2sd(XMMRegister dst, Operand src);
void Cvtlsi2ss(XMMRegister dst, Register src);
void Cvtlsi2ss(XMMRegister dst, Operand src);
void Cvtlsi2sd(XMMRegister dst, Register src);
void Cvtlsi2sd(XMMRegister dst, Operand src);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment