Commit f331901d authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[ia32] Use AVX for some Float ops if supported

By delegating to the macro-assembler, emit AVX instructions for some
float opcodes (float sqrt, round, conversions to and from int,
extract/insert/load word).

Since they now support AVX, we rename the instruction ops to remove the
SSE prefix, changing it to be IA32.

Bug: v8:12148
Change-Id: Ib488f03928756e7d85ab78e6cb28eb869e0641f9
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3131374Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76755}
parent 5bf82ba4
......@@ -3167,6 +3167,11 @@ void Assembler::vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2,
EMIT(offset);
}
void Assembler::vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode) {
vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
EMIT(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void Assembler::vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
vinstr(0x08, dst, xmm0, Operand(src), k66, k0F3A, kWIG);
EMIT(static_cast<byte>(mode) | 0x8); // Mask precision exception.
......
......@@ -1501,6 +1501,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
}
void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode);
void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
......@@ -1528,6 +1530,34 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vcvttpd2dq(XMMRegister dst, XMMRegister src) {
vinstr(0xE6, dst, xmm0, src, k66, k0F, kWIG);
}
void vcvttsd2si(Register dst, XMMRegister src) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
}
void vcvttsd2si(Register dst, Operand src) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x2c, idst, xmm0, src, kF2, k0F, kW0);
}
void vcvtsd2ss(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x5a, dst, src1, src2, kF2, k0F, kWIG);
}
void vcvtsd2ss(XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(0x5a, dst, src1, src2, kF2, k0F, kWIG);
}
void vcvtss2sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
}
void vcvtss2sd(XMMRegister dst, XMMRegister src1, Operand src2) {
vinstr(0x5a, dst, src1, src2, kF3, k0F, kWIG);
}
void vcvttss2si(Register dst, XMMRegister src) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
}
void vcvttss2si(Register dst, Operand src) {
XMMRegister idst = XMMRegister::from_code(dst.code());
vinstr(0x2c, idst, xmm0, src, kF3, k0F, kW0);
}
void vmovddup(XMMRegister dst, Operand src) {
vinstr(0x12, dst, xmm0, src, kF2, k0F, kWIG);
......
......@@ -218,7 +218,11 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP(Cvtdq2ps, cvtdq2ps)
AVX_OP(Cvtpd2ps, cvtpd2ps)
AVX_OP(Cvtps2pd, cvtps2pd)
AVX_OP(Cvtsd2ss, cvtsd2ss)
AVX_OP(Cvtss2sd, cvtss2sd)
AVX_OP(Cvttps2dq, cvttps2dq)
AVX_OP(Cvttsd2si, cvttsd2si)
AVX_OP(Cvttss2si, cvttss2si)
AVX_OP(Divpd, divpd)
AVX_OP(Divps, divps)
AVX_OP(Divsd, divsd)
......@@ -355,6 +359,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Ptest, ptest)
AVX_OP_SSE4_1(Roundpd, roundpd)
AVX_OP_SSE4_1(Roundps, roundps)
AVX_OP_SSE4_1(Roundsd, roundsd)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void F64x2ReplaceLane(XMMRegister dst, XMMRegister src, DoubleRegister rep,
......
......@@ -1410,72 +1410,71 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ mov(esp, tmp);
break;
}
case kSSEFloat64Sqrt:
__ sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
case kIA32Float64Sqrt:
__ Sqrtsd(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEFloat64Round: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
case kIA32Float64Round: {
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
__ Roundsd(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
break;
}
case kSSEFloat32ToFloat64:
__ cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
case kIA32Float32ToFloat64:
__ Cvtss2sd(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEFloat64ToFloat32:
__ cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
case kIA32Float64ToFloat32:
__ Cvtsd2ss(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEFloat32ToInt32:
__ cvttss2si(i.OutputRegister(), i.InputOperand(0));
case kIA32Float32ToInt32:
__ Cvttss2si(i.OutputRegister(), i.InputOperand(0));
break;
case kSSEFloat32ToUint32:
case kIA32Float32ToUint32:
__ Cvttss2ui(i.OutputRegister(), i.InputOperand(0),
i.TempSimd128Register(0));
break;
case kSSEFloat64ToInt32:
__ cvttsd2si(i.OutputRegister(), i.InputOperand(0));
case kIA32Float64ToInt32:
__ Cvttsd2si(i.OutputRegister(), i.InputOperand(0));
break;
case kSSEFloat64ToUint32:
case kIA32Float64ToUint32:
__ Cvttsd2ui(i.OutputRegister(), i.InputOperand(0),
i.TempSimd128Register(0));
break;
case kSSEInt32ToFloat32:
__ cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
case kIA32Int32ToFloat32:
__ Cvtsi2ss(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEUint32ToFloat32:
case kIA32Uint32ToFloat32:
__ Cvtui2ss(i.OutputDoubleRegister(), i.InputOperand(0),
i.TempRegister(0));
break;
case kSSEInt32ToFloat64:
__ cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
case kIA32Int32ToFloat64:
__ Cvtsi2sd(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEUint32ToFloat64:
case kIA32Uint32ToFloat64:
__ Cvtui2sd(i.OutputDoubleRegister(), i.InputOperand(0),
i.TempRegister(0));
break;
case kSSEFloat64ExtractLowWord32:
case kIA32Float64ExtractLowWord32:
if (instr->InputAt(0)->IsFPStackSlot()) {
__ mov(i.OutputRegister(), i.InputOperand(0));
} else {
__ movd(i.OutputRegister(), i.InputDoubleRegister(0));
__ Movd(i.OutputRegister(), i.InputDoubleRegister(0));
}
break;
case kSSEFloat64ExtractHighWord32:
case kIA32Float64ExtractHighWord32:
if (instr->InputAt(0)->IsFPStackSlot()) {
__ mov(i.OutputRegister(), i.InputOperand(0, kDoubleSize / 2));
} else {
__ Pextrd(i.OutputRegister(), i.InputDoubleRegister(0), 1);
}
break;
case kSSEFloat64InsertLowWord32:
case kIA32Float64InsertLowWord32:
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 0);
break;
case kSSEFloat64InsertHighWord32:
case kIA32Float64InsertHighWord32:
__ Pinsrd(i.OutputDoubleRegister(), i.InputOperand(1), 1);
break;
case kSSEFloat64LoadLowWord32:
__ movd(i.OutputDoubleRegister(), i.InputOperand(0));
case kIA32Float64LoadLowWord32:
__ Movd(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kFloat32Add: {
__ Addss(i.OutputDoubleRegister(), i.InputDoubleRegister(0),
......
......@@ -56,23 +56,23 @@ namespace compiler {
V(SSEFloat64Max) \
V(SSEFloat32Min) \
V(SSEFloat64Min) \
V(SSEFloat64Sqrt) \
V(SSEFloat64Round) \
V(SSEFloat32ToFloat64) \
V(SSEFloat64ToFloat32) \
V(SSEFloat32ToInt32) \
V(SSEFloat32ToUint32) \
V(SSEFloat64ToInt32) \
V(SSEFloat64ToUint32) \
V(SSEInt32ToFloat32) \
V(SSEUint32ToFloat32) \
V(SSEInt32ToFloat64) \
V(SSEUint32ToFloat64) \
V(SSEFloat64ExtractLowWord32) \
V(SSEFloat64ExtractHighWord32) \
V(SSEFloat64InsertLowWord32) \
V(SSEFloat64InsertHighWord32) \
V(SSEFloat64LoadLowWord32) \
V(IA32Float64Sqrt) \
V(IA32Float64Round) \
V(IA32Float32ToFloat64) \
V(IA32Float64ToFloat32) \
V(IA32Float32ToInt32) \
V(IA32Float32ToUint32) \
V(IA32Float64ToInt32) \
V(IA32Float64ToUint32) \
V(IA32Int32ToFloat32) \
V(IA32Uint32ToFloat32) \
V(IA32Int32ToFloat64) \
V(IA32Uint32ToFloat64) \
V(IA32Float64ExtractLowWord32) \
V(IA32Float64ExtractHighWord32) \
V(IA32Float64InsertLowWord32) \
V(IA32Float64InsertHighWord32) \
V(IA32Float64LoadLowWord32) \
V(SSEFloat64SilenceNaN) \
V(Float32Add) \
V(Float32Sub) \
......
......@@ -57,23 +57,23 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kSSEFloat64Max:
case kSSEFloat32Min:
case kSSEFloat64Min:
case kSSEFloat64Sqrt:
case kSSEFloat64Round:
case kSSEFloat32ToFloat64:
case kSSEFloat64ToFloat32:
case kSSEFloat32ToInt32:
case kSSEFloat32ToUint32:
case kSSEFloat64ToInt32:
case kSSEFloat64ToUint32:
case kSSEInt32ToFloat32:
case kSSEUint32ToFloat32:
case kSSEInt32ToFloat64:
case kSSEUint32ToFloat64:
case kSSEFloat64ExtractLowWord32:
case kSSEFloat64ExtractHighWord32:
case kSSEFloat64InsertLowWord32:
case kSSEFloat64InsertHighWord32:
case kSSEFloat64LoadLowWord32:
case kIA32Float64Sqrt:
case kIA32Float64Round:
case kIA32Float32ToFloat64:
case kIA32Float64ToFloat32:
case kIA32Float32ToInt32:
case kIA32Float32ToUint32:
case kIA32Float64ToInt32:
case kIA32Float64ToUint32:
case kIA32Int32ToFloat32:
case kIA32Uint32ToFloat32:
case kIA32Int32ToFloat64:
case kIA32Uint32ToFloat64:
case kIA32Float64ExtractLowWord32:
case kIA32Float64ExtractHighWord32:
case kIA32Float64InsertLowWord32:
case kIA32Float64InsertHighWord32:
case kIA32Float64LoadLowWord32:
case kSSEFloat64SilenceNaN:
case kFloat32Add:
case kFloat32Sub:
......@@ -422,17 +422,17 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
return 5;
case kFloat32Mul:
return 4;
case kSSEFloat32ToFloat64:
case kSSEFloat64ToFloat32:
case kIA32Float32ToFloat64:
case kIA32Float64ToFloat32:
return 6;
case kSSEFloat32Round:
case kSSEFloat64Round:
case kSSEFloat32ToInt32:
case kSSEFloat64ToInt32:
case kIA32Float64Round:
case kIA32Float32ToInt32:
case kIA32Float64ToInt32:
return 8;
case kSSEFloat32ToUint32:
case kIA32Float32ToUint32:
return 21;
case kSSEFloat64ToUint32:
case kIA32Float64ToUint32:
return 15;
case kIA32Idiv:
return 33;
......@@ -443,7 +443,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kFloat64Div:
return 63;
case kSSEFloat32Sqrt:
case kSSEFloat64Sqrt:
case kIA32Float64Sqrt:
return 25;
case kSSEFloat64Mod:
return 50;
......
......@@ -1123,53 +1123,53 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
VisitShift(this, node, kIA32Ror);
}
#define RO_OP_LIST(V) \
V(Word32Clz, kIA32Lzcnt) \
V(Word32Ctz, kIA32Tzcnt) \
V(Word32Popcnt, kIA32Popcnt) \
V(ChangeFloat32ToFloat64, kSSEFloat32ToFloat64) \
V(RoundInt32ToFloat32, kSSEInt32ToFloat32) \
V(ChangeInt32ToFloat64, kSSEInt32ToFloat64) \
V(TruncateFloat32ToInt32, kSSEFloat32ToInt32) \
V(ChangeFloat64ToInt32, kSSEFloat64ToInt32) \
V(TruncateFloat64ToFloat32, kSSEFloat64ToFloat32) \
V(RoundFloat64ToInt32, kSSEFloat64ToInt32) \
V(BitcastFloat32ToInt32, kIA32BitcastFI) \
V(BitcastInt32ToFloat32, kIA32BitcastIF) \
V(Float32Sqrt, kSSEFloat32Sqrt) \
V(Float64Sqrt, kSSEFloat64Sqrt) \
V(Float64ExtractLowWord32, kSSEFloat64ExtractLowWord32) \
V(Float64ExtractHighWord32, kSSEFloat64ExtractHighWord32) \
V(SignExtendWord8ToInt32, kIA32Movsxbl) \
V(SignExtendWord16ToInt32, kIA32Movsxwl) \
#define RO_OP_LIST(V) \
V(Word32Clz, kIA32Lzcnt) \
V(Word32Ctz, kIA32Tzcnt) \
V(Word32Popcnt, kIA32Popcnt) \
V(ChangeFloat32ToFloat64, kIA32Float32ToFloat64) \
V(RoundInt32ToFloat32, kIA32Int32ToFloat32) \
V(ChangeInt32ToFloat64, kIA32Int32ToFloat64) \
V(TruncateFloat32ToInt32, kIA32Float32ToInt32) \
V(ChangeFloat64ToInt32, kIA32Float64ToInt32) \
V(TruncateFloat64ToFloat32, kIA32Float64ToFloat32) \
V(RoundFloat64ToInt32, kIA32Float64ToInt32) \
V(BitcastFloat32ToInt32, kIA32BitcastFI) \
V(BitcastInt32ToFloat32, kIA32BitcastIF) \
V(Float32Sqrt, kSSEFloat32Sqrt) \
V(Float64Sqrt, kIA32Float64Sqrt) \
V(Float64ExtractLowWord32, kIA32Float64ExtractLowWord32) \
V(Float64ExtractHighWord32, kIA32Float64ExtractHighWord32) \
V(SignExtendWord8ToInt32, kIA32Movsxbl) \
V(SignExtendWord16ToInt32, kIA32Movsxwl) \
V(F64x2Sqrt, kIA32F64x2Sqrt)
#define RO_WITH_TEMP_OP_LIST(V) V(ChangeUint32ToFloat64, kSSEUint32ToFloat64)
#define RO_WITH_TEMP_SIMD_OP_LIST(V) \
V(TruncateFloat32ToUint32, kSSEFloat32ToUint32) \
V(ChangeFloat64ToUint32, kSSEFloat64ToUint32) \
V(TruncateFloat64ToUint32, kSSEFloat64ToUint32)
#define RR_OP_LIST(V) \
V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
V(Float32RoundDown, kSSEFloat32Round | MiscField::encode(kRoundDown)) \
V(Float64RoundDown, kSSEFloat64Round | MiscField::encode(kRoundDown)) \
V(Float32RoundUp, kSSEFloat32Round | MiscField::encode(kRoundUp)) \
V(Float64RoundUp, kSSEFloat64Round | MiscField::encode(kRoundUp)) \
V(Float32RoundTruncate, kSSEFloat32Round | MiscField::encode(kRoundToZero)) \
V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTiesEven, \
kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \
V(Float64RoundTiesEven, \
kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \
V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \
V(F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \
V(F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \
V(F32x4NearestInt, kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \
V(F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \
V(F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \
V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \
#define RO_WITH_TEMP_OP_LIST(V) V(ChangeUint32ToFloat64, kIA32Uint32ToFloat64)
#define RO_WITH_TEMP_SIMD_OP_LIST(V) \
V(TruncateFloat32ToUint32, kIA32Float32ToUint32) \
V(ChangeFloat64ToUint32, kIA32Float64ToUint32) \
V(TruncateFloat64ToUint32, kIA32Float64ToUint32)
#define RR_OP_LIST(V) \
V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
V(Float32RoundDown, kSSEFloat32Round | MiscField::encode(kRoundDown)) \
V(Float64RoundDown, kIA32Float64Round | MiscField::encode(kRoundDown)) \
V(Float32RoundUp, kSSEFloat32Round | MiscField::encode(kRoundUp)) \
V(Float64RoundUp, kIA32Float64Round | MiscField::encode(kRoundUp)) \
V(Float32RoundTruncate, kSSEFloat32Round | MiscField::encode(kRoundToZero)) \
V(Float64RoundTruncate, kIA32Float64Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTiesEven, \
kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \
V(Float64RoundTiesEven, \
kIA32Float64Round | MiscField::encode(kRoundToNearest)) \
V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \
V(F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \
V(F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \
V(F32x4NearestInt, kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \
V(F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \
V(F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \
V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \
V(F64x2NearestInt, kIA32F64x2Round | MiscField::encode(kRoundToNearest))
#define RRO_FLOAT_OP_LIST(V) \
......@@ -1347,7 +1347,7 @@ void InstructionSelector::VisitUint32Mod(Node* node) {
void InstructionSelector::VisitRoundUint32ToFloat32(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister()};
Emit(kSSEUint32ToFloat32, g.DefineAsRegister(node), g.Use(node->InputAt(0)),
Emit(kIA32Uint32ToFloat32, g.DefineAsRegister(node), g.Use(node->InputAt(0)),
arraysize(temps), temps);
}
......@@ -1965,10 +1965,10 @@ void InstructionSelector::VisitFloat64InsertLowWord32(Node* node) {
Float64Matcher mleft(left);
if (mleft.HasResolvedValue() &&
(bit_cast<uint64_t>(mleft.ResolvedValue()) >> 32) == 0u) {
Emit(kSSEFloat64LoadLowWord32, g.DefineAsRegister(node), g.Use(right));
Emit(kIA32Float64LoadLowWord32, g.DefineAsRegister(node), g.Use(right));
return;
}
Emit(kSSEFloat64InsertLowWord32, g.DefineSameAsFirst(node),
Emit(kIA32Float64InsertLowWord32, g.DefineSameAsFirst(node),
g.UseRegister(left), g.Use(right));
}
......@@ -1976,7 +1976,7 @@ void InstructionSelector::VisitFloat64InsertHighWord32(Node* node) {
IA32OperandGenerator g(this);
Node* left = node->InputAt(0);
Node* right = node->InputAt(1);
Emit(kSSEFloat64InsertHighWord32, g.DefineSameAsFirst(node),
Emit(kIA32Float64InsertHighWord32, g.DefineSameAsFirst(node),
g.UseRegister(left), g.Use(right));
}
......
......@@ -818,6 +818,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer(",%d", Imm8_U(current));
current++;
break;
case 0x0b:
AppendToBuffer("vroundsd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%d", Imm8_U(current));
current++;
break;
case 0x0E:
AppendToBuffer("vpblendw %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -910,6 +917,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer("vmovddup %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x2c:
AppendToBuffer("vcvttsd2si %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x51:
AppendToBuffer("vsqrtsd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -925,6 +936,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5a:
AppendToBuffer("vcvtsd2ss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5C:
AppendToBuffer("vsubsd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -977,6 +993,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer("vmovshdup %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x2c:
AppendToBuffer("vcvttss2si %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x51:
AppendToBuffer("vsqrtss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -992,6 +1012,11 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5a:
AppendToBuffer("vcvtss2sd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x5B:
AppendToBuffer("vcvttps2dq %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
......
......@@ -822,6 +822,15 @@ TEST(DisasmIa320) {
__ vcvttps2dq(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vcvttpd2dq(xmm1, xmm0);
__ vcvtsd2ss(xmm2, xmm3, Operand(ebx, ecx, times_4, 10000));
__ vcvtsd2ss(xmm2, xmm3, xmm6);
__ vcvtss2sd(xmm2, xmm3, Operand(ebx, ecx, times_1, 10000));
__ vcvtss2sd(xmm2, xmm3, xmm6);
__ vcvttsd2si(eax, Operand(ebx, ecx, times_4, 10000));
__ vcvttsd2si(ebx, xmm6);
__ vcvttss2si(eax, Operand(ebx, ecx, times_4, 10000));
__ vcvttss2si(ebx, xmm6);
__ vmovddup(xmm1, xmm2);
__ vmovddup(xmm1, Operand(ebx, ecx, times_4, 10000));
__ vmovshdup(xmm1, xmm2);
......@@ -841,6 +850,8 @@ TEST(DisasmIa320) {
__ vpcmpgtq(xmm0, xmm1, xmm2);
__ vroundsd(xmm0, xmm3, xmm2, kRoundDown);
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm7, xmm5, xmm1); \
__ v##instruction(xmm7, xmm5, Operand(edx, 4));
......
......@@ -95,7 +95,7 @@ TEST_F(InstructionSelectorTest, ChangeFloat32ToFloat64WithParameter) {
m.Return(m.ChangeFloat32ToFloat64(m.Parameter(0)));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat32ToFloat64, s[0]->arch_opcode());
EXPECT_EQ(kIA32Float32ToFloat64, s[0]->arch_opcode());
EXPECT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
......@@ -106,7 +106,7 @@ TEST_F(InstructionSelectorTest, TruncateFloat64ToFloat32WithParameter) {
m.Return(m.TruncateFloat64ToFloat32(m.Parameter(0)));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEFloat64ToFloat32, s[0]->arch_opcode());
EXPECT_EQ(kIA32Float64ToFloat32, s[0]->arch_opcode());
EXPECT_EQ(1U, s[0]->InputCount());
EXPECT_EQ(1U, s[0]->OutputCount());
}
......@@ -161,7 +161,7 @@ TEST_F(InstructionSelectorTest, ChangeUint32ToFloat64WithParameter) {
m.Return(m.ChangeUint32ToFloat64(m.Parameter(0)));
Stream s = m.Build();
ASSERT_EQ(1U, s.size());
EXPECT_EQ(kSSEUint32ToFloat64, s[0]->arch_opcode());
EXPECT_EQ(kIA32Uint32ToFloat64, s[0]->arch_opcode());
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment