Commit fda438c6 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[ia32] Unify I32x4SConvertF32x4 SSE and AVX opcodes

Drive-by cleanup: IWYU for macro-assembler-ia32.cc.

IWYU added src/heap/basic-memory-chunk.h which failed a presubmit, so I
updated src/DEPS to allow for including it.

Bug: v8:11217,v8:7490
Change-Id: I63662bfb2b34e354e94f6052edfcb92f1341da58
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2583675Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71729}
parent d628e5e1
...@@ -8,6 +8,7 @@ include_rules = [ ...@@ -8,6 +8,7 @@ include_rules = [
"+src/compiler/code-assembler.h", "+src/compiler/code-assembler.h",
"+src/compiler/wasm-compiler.h", "+src/compiler/wasm-compiler.h",
"-src/heap", "-src/heap",
"+src/heap/basic-memory-chunk.h",
"+src/heap/combined-heap.h", "+src/heap/combined-heap.h",
"+src/heap/embedder-tracing.h", "+src/heap/embedder-tracing.h",
"+src/heap/factory.h", "+src/heap/factory.h",
......
...@@ -4,24 +4,58 @@ ...@@ -4,24 +4,58 @@
#if V8_TARGET_ARCH_IA32 #if V8_TARGET_ARCH_IA32
#include <stdint.h>
#include "include/v8-internal.h"
#include "src/base/bits.h" #include "src/base/bits.h"
#include "src/base/division-by-constant.h" #include "src/base/logging.h"
#include "src/base/utils/random-number-generator.h" #include "src/base/macros.h"
#include "src/codegen/callable.h" #include "src/base/platform/platform.h"
#include "src/builtins/builtins.h"
#include "src/codegen/assembler.h"
#include "src/codegen/bailout-reason.h"
#include "src/codegen/code-factory.h" #include "src/codegen/code-factory.h"
#include "src/codegen/cpu-features.h" #include "src/codegen/cpu-features.h"
#include "src/codegen/external-reference-table.h" #include "src/codegen/external-reference.h"
#include "src/codegen/ia32/assembler-ia32-inl.h" #include "src/codegen/ia32/assembler-ia32.h"
#include "src/codegen/ia32/register-ia32.h"
#include "src/codegen/interface-descriptors.h"
#include "src/codegen/label.h"
#include "src/codegen/macro-assembler.h" #include "src/codegen/macro-assembler.h"
#include "src/debug/debug.h" #include "src/codegen/register.h"
#include "src/codegen/reglist.h"
#include "src/codegen/reloc-info.h"
#include "src/codegen/turbo-assembler.h"
#include "src/common/globals.h"
#include "src/deoptimizer/deoptimizer.h"
#include "src/execution/frame-constants.h" #include "src/execution/frame-constants.h"
#include "src/execution/frames-inl.h" #include "src/execution/frames.h"
#include "src/execution/isolate-data.h"
#include "src/execution/isolate.h"
#include "src/flags/flags.h"
#include "src/handles/handles-inl.h"
#include "src/handles/handles.h"
#include "src/heap/basic-memory-chunk.h"
#include "src/heap/factory-inl.h"
#include "src/heap/factory.h"
#include "src/heap/memory-chunk.h" #include "src/heap/memory-chunk.h"
#include "src/init/bootstrapper.h"
#include "src/logging/counters.h" #include "src/logging/counters.h"
#include "src/objects/code.h"
#include "src/objects/contexts.h"
#include "src/objects/fixed-array.h"
#include "src/objects/heap-object.h"
#include "src/objects/js-function.h"
#include "src/objects/map.h"
#include "src/objects/objects.h"
#include "src/objects/oddball.h"
#include "src/objects/shared-function-info.h"
#include "src/objects/slots-inl.h"
#include "src/objects/smi.h"
#include "src/roots/roots-inl.h"
#include "src/roots/roots.h"
#include "src/runtime/runtime.h" #include "src/runtime/runtime.h"
#include "src/snapshot/embedded/embedded-data.h" #include "src/snapshot/embedded/embedded-data.h"
#include "src/snapshot/snapshot.h" #include "src/utils/utils.h"
// Satisfy cpplint check, but don't include platform-specific header. It is // Satisfy cpplint check, but don't include platform-specific header. It is
// included recursively via macro-assembler.h. // included recursively via macro-assembler.h.
...@@ -1506,6 +1540,17 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) { ...@@ -1506,6 +1540,17 @@ void TurboAssembler::Move(XMMRegister dst, uint64_t src) {
} }
} }
void TurboAssembler::Cmpeqps(XMMRegister dst, XMMRegister src1,
XMMRegister src2) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vcmpeqps(dst, src1, src2);
} else {
movaps(dst, src1);
cmpeqps(dst, src2);
}
}
void TurboAssembler::Pshufhw(XMMRegister dst, Operand src, uint8_t shuffle) { void TurboAssembler::Pshufhw(XMMRegister dst, Operand src, uint8_t shuffle) {
if (CpuFeatures::IsSupported(AVX)) { if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX); CpuFeatureScope scope(this, AVX);
......
...@@ -280,6 +280,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -280,6 +280,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// may be bigger than 2^16 - 1. Requires a scratch register. // may be bigger than 2^16 - 1. Requires a scratch register.
void Ret(int bytes_dropped, Register scratch); void Ret(int bytes_dropped, Register scratch);
// Three-operand cmpeqps that moves src1 to dst if AVX is not supported.
void Cmpeqps(XMMRegister dst, XMMRegister src1, XMMRegister src2);
void Pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) { void Pshufhw(XMMRegister dst, XMMRegister src, uint8_t shuffle) {
Pshufhw(dst, Operand(src), shuffle); Pshufhw(dst, Operand(src), shuffle);
} }
...@@ -435,6 +438,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -435,6 +438,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_PACKED_OP3(Pmuludq, pmuludq) AVX_PACKED_OP3(Pmuludq, pmuludq)
AVX_PACKED_OP3(Pavgb, pavgb) AVX_PACKED_OP3(Pavgb, pavgb)
AVX_PACKED_OP3(Pavgw, pavgw) AVX_PACKED_OP3(Pavgw, pavgw)
AVX_PACKED_OP3(Pand, pand)
#undef AVX_PACKED_OP3 #undef AVX_PACKED_OP3
AVX_PACKED_OP3_WITH_TYPE(Psllw, psllw, XMMRegister, uint8_t) AVX_PACKED_OP3_WITH_TYPE(Psllw, psllw, XMMRegister, uint8_t)
......
...@@ -2640,40 +2640,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2640,40 +2640,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1)); __ Pextrd(i.OutputRegister(), i.InputSimd128Register(0), i.InputInt8(1));
break; break;
} }
case kSSEI32x4SConvertF32x4: { case kIA32I32x4SConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister dst = i.OutputSimd128Register();
// NAN->0
__ movaps(kScratchDoubleReg, dst);
__ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
__ pand(dst, kScratchDoubleReg);
// Set top bit if >= 0 (but not -0.0!)
__ pxor(kScratchDoubleReg, dst);
// Convert
__ cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0
__ pand(kScratchDoubleReg, dst);
__ psrad(kScratchDoubleReg, 31);
// Set positive overflow lanes to 0x7FFFFFFF
__ pxor(dst, kScratchDoubleReg);
break;
}
case kAVXI32x4SConvertF32x4: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0); XMMRegister src = i.InputSimd128Register(0);
// NAN->0 // NAN->0
__ vcmpeqps(kScratchDoubleReg, src, src); __ Cmpeqps(kScratchDoubleReg, src, src);
__ vpand(dst, src, kScratchDoubleReg); __ Pand(dst, src, kScratchDoubleReg);
// Set top bit if >= 0 (but not -0.0!) // Set top bit if >= 0 (but not -0.0!)
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst); __ Pxor(kScratchDoubleReg, dst);
// Convert // Convert
__ vcvttps2dq(dst, dst); __ Cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0 // Set top bit if >=0 is now < 0
__ vpand(kScratchDoubleReg, kScratchDoubleReg, dst); __ Pand(kScratchDoubleReg, dst);
__ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31); __ Psrad(kScratchDoubleReg, kScratchDoubleReg, 31);
// Set positive overflow lanes to 0x7FFFFFFF // Set positive overflow lanes to 0x7FFFFFFF
__ vpxor(dst, dst, kScratchDoubleReg); __ Pxor(dst, kScratchDoubleReg);
break; break;
} }
case kIA32I32x4SConvertI16x8Low: { case kIA32I32x4SConvertI16x8Low: {
......
...@@ -194,8 +194,7 @@ namespace compiler { ...@@ -194,8 +194,7 @@ namespace compiler {
V(IA32F32x4Round) \ V(IA32F32x4Round) \
V(IA32I32x4Splat) \ V(IA32I32x4Splat) \
V(IA32I32x4ExtractLane) \ V(IA32I32x4ExtractLane) \
V(SSEI32x4SConvertF32x4) \ V(IA32I32x4SConvertF32x4) \
V(AVXI32x4SConvertF32x4) \
V(IA32I32x4SConvertI16x8Low) \ V(IA32I32x4SConvertI16x8Low) \
V(IA32I32x4SConvertI16x8High) \ V(IA32I32x4SConvertI16x8High) \
V(IA32I32x4Neg) \ V(IA32I32x4Neg) \
......
...@@ -173,8 +173,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -173,8 +173,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F32x4Round: case kIA32F32x4Round:
case kIA32I32x4Splat: case kIA32I32x4Splat:
case kIA32I32x4ExtractLane: case kIA32I32x4ExtractLane:
case kSSEI32x4SConvertF32x4: case kIA32I32x4SConvertF32x4:
case kAVXI32x4SConvertF32x4:
case kIA32I32x4SConvertI16x8Low: case kIA32I32x4SConvertI16x8Low:
case kIA32I32x4SConvertI16x8High: case kIA32I32x4SConvertI16x8High:
case kIA32I32x4Neg: case kIA32I32x4Neg:
......
...@@ -2439,7 +2439,7 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) { ...@@ -2439,7 +2439,7 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
} }
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) { void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
VisitRRSimd(this, node, kAVXI32x4SConvertF32x4, kSSEI32x4SConvertF32x4); VisitRRSimd(this, node, kIA32I32x4SConvertF32x4);
} }
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) { void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment