Commit c887e40c authored by Clemens Hammacher's avatar Clemens Hammacher Committed by Commit Bot

[assembler][ia32] Don't clobber random registers

The fallback for {Pinsrd} and {Pextrd} for the non-avx and non-sse
variant clobbered the {xmm0} register. This CL fixes this by storing
the values on the stack and modifying them there instead.
The alternative would have been to pass in a scratch register. But this
path is not commonly used and we cannot express in the API whether the
scratch register is needed or not. So we would sometimes have to spill a
register to pass it as scratch register even though it is then unused.

R=titzer@chromium.org
CC=​mstarzinger@chromium.org

Bug: v8:6600
Change-Id: Ieae53b892cc55eed4fcfa3d0e7f82f3e1afe72be
Reviewed-on: https://chromium-review.googlesource.com/1219633Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarBen Titzer <titzer@chromium.org>
Commit-Queue: Clemens Hammacher <clemensh@chromium.org>
Cr-Commit-Position: refs/heads/master@{#55800}
parent ae9a577c
...@@ -1463,10 +1463,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1463,10 +1463,10 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
} }
break; break;
case kSSEFloat64InsertLowWord32: case kSSEFloat64InsertLowWord32:
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0, true); __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
break; break;
case kSSEFloat64InsertHighWord32: case kSSEFloat64InsertHighWord32:
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1, true); __ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
break; break;
case kSSEFloat64LoadLowWord32: case kSSEFloat64LoadLowWord32:
__ movd(i.OutputDoubleRegister(), i.InputOperand(0)); __ movd(i.OutputDoubleRegister(), i.InputOperand(0));
......
...@@ -1509,37 +1509,44 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) { ...@@ -1509,37 +1509,44 @@ void TurboAssembler::Pextrd(Register dst, XMMRegister src, int8_t imm8) {
pextrd(dst, src, imm8); pextrd(dst, src, imm8);
return; return;
} }
DCHECK_LT(imm8, 4); // Without AVX or SSE, we can only have 64-bit values in xmm registers.
pshufd(xmm0, src, imm8); // We don't have an xmm scratch register, so move the data via the stack. This
movd(dst, xmm0); // path is rarely required, so it's acceptable to be slow.
DCHECK_LT(imm8, 2);
sub(esp, Immediate(kDoubleSize));
movsd(Operand(esp, 0), src);
mov(dst, Operand(esp, imm8 * kUInt32Size));
add(esp, Immediate(kDoubleSize));
} }
void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8, void TurboAssembler::Pinsrd(XMMRegister dst, Operand src, int8_t imm8) {
bool is_64_bits) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vpinsrd(dst, dst, src, imm8);
return;
}
if (CpuFeatures::IsSupported(SSE4_1)) { if (CpuFeatures::IsSupported(SSE4_1)) {
CpuFeatureScope sse_scope(this, SSE4_1); CpuFeatureScope sse_scope(this, SSE4_1);
pinsrd(dst, src, imm8); pinsrd(dst, src, imm8);
return; return;
} }
if (is_64_bits) { // Without AVX or SSE, we can only have 64-bit values in xmm registers.
movd(xmm0, src); // We don't have an xmm scratch register, so move the data via the stack. This
if (imm8 == 1) { // path is rarely required, so it's acceptable to be slow.
punpckldq(dst, xmm0); DCHECK_LT(imm8, 2);
} else { sub(esp, Immediate(kDoubleSize));
DCHECK_EQ(0, imm8); // Write original content of {dst} to the stack.
psrlq(dst, 32); movsd(Operand(esp, 0), dst);
punpckldq(xmm0, dst); // Overwrite the portion specified in {imm8}.
movaps(dst, xmm0); if (src.is_reg_only()) {
} mov(Operand(esp, imm8 * kUInt32Size), src.reg());
} else { } else {
DCHECK_LT(imm8, 4); movss(dst, src);
push(eax); movss(Operand(esp, imm8 * kUInt32Size), dst);
mov(eax, src);
pinsrw(dst, eax, imm8 * 2);
shr(eax, 16);
pinsrw(dst, eax, imm8 * 2 + 1);
pop(eax);
} }
// Load back the full value into {dst}.
movsd(dst, Operand(esp, 0));
add(esp, Immediate(kDoubleSize));
} }
void TurboAssembler::Lzcnt(Register dst, Operand src) { void TurboAssembler::Lzcnt(Register dst, Operand src) {
......
...@@ -401,12 +401,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -401,12 +401,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void Pextrb(Register dst, XMMRegister src, int8_t imm8); void Pextrb(Register dst, XMMRegister src, int8_t imm8);
void Pextrw(Register dst, XMMRegister src, int8_t imm8); void Pextrw(Register dst, XMMRegister src, int8_t imm8);
void Pextrd(Register dst, XMMRegister src, int8_t imm8); void Pextrd(Register dst, XMMRegister src, int8_t imm8);
void Pinsrd(XMMRegister dst, Register src, int8_t imm8, void Pinsrd(XMMRegister dst, Register src, int8_t imm8) {
bool is_64_bits = false) { Pinsrd(dst, Operand(src), imm8);
Pinsrd(dst, Operand(src), imm8, is_64_bits);
} }
void Pinsrd(XMMRegister dst, Operand src, int8_t imm8, void Pinsrd(XMMRegister dst, Operand src, int8_t imm8);
bool is_64_bits = false);
// Expression support // Expression support
// cvtsi2sd instruction only writes to the low 64-bit of dst register, which // cvtsi2sd instruction only writes to the low 64-bit of dst register, which
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment