Commit c2ac4811 authored by Ng Zhi An's avatar Ng Zhi An Committed by V8 LUCI CQ

[ia32] Use AVX if supported

This is a follow-up on https://crrev.com/c/3131374 to support more
instructions, float32 sqrt, cmp, round, float64 cmp.

Rename the opcodes since they are no longer SSE specific.

Bug: v8:12148
Change-Id: Ie5f74bc1b4510092cbfbcb7e420ef82cb1c39a14
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3154983Reviewed-by: 's avatarAdam Klein <adamk@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/main@{#76777}
parent 0f980ffa
......@@ -3104,6 +3104,11 @@ void Assembler::vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
vinstr(0x0b, dst, src1, src2, k66, k0F3A, kWIG);
EMIT(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void Assembler::vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode) {
vinstr(0x0a, dst, src1, src2, k66, k0F3A, kWIG);
EMIT(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void Assembler::vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
vinstr(0x08, dst, xmm0, Operand(src), k66, k0F3A, kWIG);
EMIT(static_cast<byte>(mode) | 0x8); // Mask precision exception.
......
......@@ -1439,6 +1439,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vroundsd(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode);
void vroundss(XMMRegister dst, XMMRegister src1, XMMRegister src2,
RoundingMode mode);
void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
......@@ -1530,6 +1532,19 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void vpmovmskb(Register dst, XMMRegister src);
void vucomisd(XMMRegister dst, XMMRegister src) {
vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG);
}
void vucomisd(XMMRegister dst, Operand src) {
vinstr(0x2E, dst, xmm0, src, k66, k0F, kWIG);
}
void vucomiss(XMMRegister dst, XMMRegister src) {
vinstr(0x2E, dst, xmm0, src, kNone, k0F, kWIG);
}
void vucomiss(XMMRegister dst, Operand src) {
vinstr(0x2E, dst, xmm0, src, kNone, k0F, kWIG);
}
// BMI instruction
void andn(Register dst, Register src1, Register src2) {
andn(dst, src1, Operand(src2));
......
......@@ -306,6 +306,8 @@ class V8_EXPORT_PRIVATE TurboAssembler
AVX_OP(Pcmpeqb, pcmpeqb)
AVX_OP(Pcmpeqw, pcmpeqw)
AVX_OP(Pcmpeqd, pcmpeqd)
AVX_OP(Ucomisd, ucomisd)
AVX_OP(Ucomiss, ucomiss)
AVX_OP_SSE4_1(Pcmpeqq, pcmpeqq)
// Macro for instructions that have 2 operands for AVX version and 1 operand for
......
......@@ -363,6 +363,7 @@ class V8_EXPORT_PRIVATE SharedTurboAssembler : public TurboAssemblerBase {
AVX_OP_SSE4_1(Roundpd, roundpd)
AVX_OP_SSE4_1(Roundps, roundps)
AVX_OP_SSE4_1(Roundsd, roundsd)
AVX_OP_SSE4_1(Roundss, roundss)
void F64x2ExtractLane(DoubleRegister dst, XMMRegister src, uint8_t lane);
void F64x2ReplaceLane(XMMRegister dst, XMMRegister src, DoubleRegister rep,
......
......@@ -63,7 +63,6 @@ class V8_EXPORT_PRIVATE TurboAssembler
using SharedTurboAssemblerBase<TurboAssembler>::SharedTurboAssemblerBase;
AVX_OP(Ucomisd, ucomisd)
AVX_OP(Ucomiss, ucomiss)
AVX_OP_SSE4_1(Roundss, roundss)
// Define movq here instead of using AVX_OP. movq is defined using templates
// and there is a function template `void movq(P1)`, while technically
......
......@@ -1255,21 +1255,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kIA32LFence:
__ lfence();
break;
case kSSEFloat32Cmp:
__ ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
case kIA32Float32Cmp:
__ Ucomiss(i.InputDoubleRegister(0), i.InputOperand(1));
break;
case kSSEFloat32Sqrt:
__ sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
case kIA32Float32Sqrt:
__ Sqrtss(i.OutputDoubleRegister(), i.InputOperand(0));
break;
case kSSEFloat32Round: {
case kIA32Float32Round: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
__ Roundss(i.OutputDoubleRegister(), i.InputDoubleRegister(0), mode);
break;
}
case kSSEFloat64Cmp:
__ ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
case kIA32Float64Cmp:
__ Ucomisd(i.InputDoubleRegister(0), i.InputOperand(1));
break;
case kSSEFloat32Max: {
Label compare_swap, done_compare;
......
......@@ -47,10 +47,10 @@ namespace compiler {
V(IA32Bswap) \
V(IA32MFence) \
V(IA32LFence) \
V(SSEFloat32Cmp) \
V(SSEFloat32Sqrt) \
V(SSEFloat32Round) \
V(SSEFloat64Cmp) \
V(IA32Float32Cmp) \
V(IA32Float32Sqrt) \
V(IA32Float32Round) \
V(IA32Float64Cmp) \
V(SSEFloat64Mod) \
V(SSEFloat32Max) \
V(SSEFloat64Max) \
......
......@@ -48,10 +48,10 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Popcnt:
case kIA32Bswap:
case kIA32Lea:
case kSSEFloat32Cmp:
case kSSEFloat32Sqrt:
case kSSEFloat32Round:
case kSSEFloat64Cmp:
case kIA32Float32Cmp:
case kIA32Float32Sqrt:
case kIA32Float32Round:
case kIA32Float64Cmp:
case kSSEFloat64Mod:
case kSSEFloat32Max:
case kSSEFloat64Max:
......@@ -406,8 +406,8 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kIA32Imul:
case kIA32ImulHigh:
return 5;
case kSSEFloat32Cmp:
case kSSEFloat64Cmp:
case kIA32Float32Cmp:
case kIA32Float64Cmp:
return 9;
case kFloat32Add:
case kFloat32Sub:
......@@ -425,7 +425,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
case kIA32Float32ToFloat64:
case kIA32Float64ToFloat32:
return 6;
case kSSEFloat32Round:
case kIA32Float32Round:
case kIA32Float64Round:
case kIA32Float32ToInt32:
case kIA32Float64ToInt32:
......@@ -442,7 +442,7 @@ int InstructionScheduler::GetInstructionLatency(const Instruction* instr) {
return 35;
case kFloat64Div:
return 63;
case kSSEFloat32Sqrt:
case kIA32Float32Sqrt:
case kIA32Float64Sqrt:
return 25;
case kSSEFloat64Mod:
......
......@@ -1136,7 +1136,7 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(RoundFloat64ToInt32, kIA32Float64ToInt32) \
V(BitcastFloat32ToInt32, kIA32BitcastFI) \
V(BitcastInt32ToFloat32, kIA32BitcastIF) \
V(Float32Sqrt, kSSEFloat32Sqrt) \
V(Float32Sqrt, kIA32Float32Sqrt) \
V(Float64Sqrt, kIA32Float64Sqrt) \
V(Float64ExtractLowWord32, kIA32Float64ExtractLowWord32) \
V(Float64ExtractHighWord32, kIA32Float64ExtractHighWord32) \
......@@ -1153,14 +1153,14 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
#define RR_OP_LIST(V) \
V(TruncateFloat64ToWord32, kArchTruncateDoubleToI) \
V(Float32RoundDown, kSSEFloat32Round | MiscField::encode(kRoundDown)) \
V(Float32RoundDown, kIA32Float32Round | MiscField::encode(kRoundDown)) \
V(Float64RoundDown, kIA32Float64Round | MiscField::encode(kRoundDown)) \
V(Float32RoundUp, kSSEFloat32Round | MiscField::encode(kRoundUp)) \
V(Float32RoundUp, kIA32Float32Round | MiscField::encode(kRoundUp)) \
V(Float64RoundUp, kIA32Float64Round | MiscField::encode(kRoundUp)) \
V(Float32RoundTruncate, kSSEFloat32Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTruncate, kIA32Float32Round | MiscField::encode(kRoundToZero)) \
V(Float64RoundTruncate, kIA32Float64Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTiesEven, \
kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \
kIA32Float32Round | MiscField::encode(kRoundToNearest)) \
V(Float64RoundTiesEven, \
kIA32Float64Round | MiscField::encode(kRoundToNearest)) \
V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \
......@@ -1622,7 +1622,7 @@ void VisitFloat32Compare(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
Node* const left = node->InputAt(0);
Node* const right = node->InputAt(1);
VisitCompare(selector, kSSEFloat32Cmp, right, left, cont, false);
VisitCompare(selector, kIA32Float32Cmp, right, left, cont, false);
}
// Shared routine for multiple float64 compare operations (inputs commuted).
......@@ -1630,7 +1630,7 @@ void VisitFloat64Compare(InstructionSelector* selector, Node* node,
FlagsContinuation* cont) {
Node* const left = node->InputAt(0);
Node* const right = node->InputAt(1);
VisitCompare(selector, kSSEFloat64Cmp, right, left, cont, false);
VisitCompare(selector, kIA32Float64Cmp, right, left, cont, false);
}
// Shared routine for multiple word compare operations.
......
......@@ -818,6 +818,13 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer(",%d", Imm8_U(current));
current++;
break;
case 0x0a:
AppendToBuffer("vroundss %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%d", Imm8_U(current));
current++;
break;
case 0x0b:
AppendToBuffer("vroundsd %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
......@@ -1170,6 +1177,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer("vmovaps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x2e:
AppendToBuffer("vucomiss %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x50:
AppendToBuffer("vmovmskps %s,%s", NameOfCPURegister(regop),
NameOfXMMRegister(rm));
......@@ -1275,6 +1286,10 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
AppendToBuffer("vmovapd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x2e:
AppendToBuffer("vucomisd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
case 0x50:
AppendToBuffer("vmovmskpd %s,%s", NameOfCPURegister(regop),
NameOfXMMRegister(rm));
......
......@@ -658,6 +658,10 @@ TEST(DisasmIa320) {
__ vmaxss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vsqrtss(xmm0, xmm1, xmm2);
__ vsqrtss(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
__ vucomisd(xmm0, xmm1);
__ vucomisd(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vucomiss(xmm0, xmm1);
__ vucomiss(xmm0, Operand(ebx, ecx, times_4, 10000));
__ vandps(xmm0, xmm1, xmm2);
__ vandps(xmm0, xmm1, Operand(ebx, ecx, times_4, 10000));
......@@ -823,6 +827,7 @@ TEST(DisasmIa320) {
__ vpcmpgtq(xmm0, xmm1, xmm2);
__ vroundsd(xmm0, xmm3, xmm2, kRoundDown);
__ vroundss(xmm0, xmm3, xmm2, kRoundDown);
#define EMIT_SSE2_AVXINSTR(instruction, notUsed1, notUsed2, notUsed3) \
__ v##instruction(xmm7, xmm5, xmm1); \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment