Commit 054dd559 authored by jing.bao's avatar jing.bao Committed by Commit Bot

[ia32][wasm] Add I32x4SConvertF32x4, I32x4UConvertF32x4

Change-Id: Ic2c4f02d5e451c4a3a6612ae91e5cc8231d62448
Reviewed-on: https://chromium-review.googlesource.com/1119773
Commit-Queue: Jing Bao <jing.bao@intel.com>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#54307}
parent a0d8c293
......@@ -2010,6 +2010,42 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputOperand(2), i.InputInt8(1));
break;
}
case kSSEI32x4SConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
XMMRegister dst = i.OutputSimd128Register();
// NAN->0
__ movaps(kScratchDoubleReg, dst);
__ cmpeqps(kScratchDoubleReg, kScratchDoubleReg);
__ pand(dst, kScratchDoubleReg);
// Set top bit if >= 0 (but not -0.0!)
__ pxor(kScratchDoubleReg, dst);
// Convert
__ cvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0
__ pand(kScratchDoubleReg, dst);
__ psrad(kScratchDoubleReg, 31);
// Set positive overflow lanes to 0x7FFFFFFF
__ pxor(dst, kScratchDoubleReg);
break;
}
case kAVXI32x4SConvertF32x4: {
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
// NAN->0
__ vcmpeqps(kScratchDoubleReg, src, src);
__ vpand(dst, src, kScratchDoubleReg);
// Set top bit if >= 0 (but not -0.0!)
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, dst);
// Convert
__ vcvttps2dq(dst, dst);
// Set top bit if >=0 is now < 0
__ vpand(kScratchDoubleReg, kScratchDoubleReg, dst);
__ vpsrad(kScratchDoubleReg, kScratchDoubleReg, 31);
// Set positive overflow lanes to 0x7FFFFFFF
__ vpxor(dst, dst, kScratchDoubleReg);
break;
}
case kIA32I32x4SConvertI16x8Low: {
__ Pmovsxwd(i.OutputSimd128Register(), i.InputOperand(0));
break;
......@@ -2179,6 +2215,61 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vpcmpeqd(i.OutputSimd128Register(), kScratchDoubleReg, src2);
break;
}
case kSSEI32x4UConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope sse_scope(tasm(), SSE4_1);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// NAN->0, negative->0
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
__ maxps(dst, kScratchDoubleReg);
// scratch: float representation of max_signed
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ psrld(kScratchDoubleReg, 1); // 0x7fffffff
__ cvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
// tmp: convert (src-max_signed).
// Positive overflow lanes -> 0x7FFFFFFF
// Negative lanes -> 0
__ movaps(tmp, dst);
__ subps(tmp, kScratchDoubleReg);
__ cmpleps(kScratchDoubleReg, tmp);
__ cvttps2dq(tmp, tmp);
__ pxor(tmp, kScratchDoubleReg);
__ pxor(kScratchDoubleReg, kScratchDoubleReg);
__ pmaxsd(tmp, kScratchDoubleReg);
// convert. Overflow lanes above max_signed will be 0x80000000
__ cvttps2dq(dst, dst);
// Add (src-max_signed) for overflow lanes.
__ paddd(dst, tmp);
break;
}
case kAVXI32x4UConvertF32x4: {
DCHECK_EQ(i.OutputSimd128Register(), i.InputSimd128Register(0));
CpuFeatureScope avx_scope(tasm(), AVX);
XMMRegister dst = i.OutputSimd128Register();
XMMRegister tmp = i.ToSimd128Register(instr->TempAt(0));
// NAN->0, negative->0
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vmaxps(dst, dst, kScratchDoubleReg);
// scratch: float representation of max_signed
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpsrld(kScratchDoubleReg, kScratchDoubleReg, 1); // 0x7fffffff
__ vcvtdq2ps(kScratchDoubleReg, kScratchDoubleReg); // 0x4f000000
// tmp: convert (src-max_signed).
// Positive overflow lanes -> 0x7FFFFFFF
// Negative lanes -> 0
__ vsubps(tmp, dst, kScratchDoubleReg);
__ vcmpleps(kScratchDoubleReg, kScratchDoubleReg, tmp);
__ vcvttps2dq(tmp, tmp);
__ vpxor(tmp, tmp, kScratchDoubleReg);
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpmaxsd(tmp, tmp, kScratchDoubleReg);
// convert. Overflow lanes above max_signed will be 0x80000000
__ vcvttps2dq(dst, dst);
// Add (src-max_signed) for overflow lanes.
__ vpaddd(dst, dst, tmp);
break;
}
case kIA32I32x4UConvertI16x8Low: {
__ Pmovzxwd(i.OutputSimd128Register(), i.InputOperand(0));
break;
......
......@@ -154,6 +154,8 @@ namespace compiler {
V(IA32I32x4ExtractLane) \
V(SSEI32x4ReplaceLane) \
V(AVXI32x4ReplaceLane) \
V(SSEI32x4SConvertF32x4) \
V(AVXI32x4SConvertF32x4) \
V(IA32I32x4SConvertI16x8Low) \
V(IA32I32x4SConvertI16x8High) \
V(IA32I32x4Neg) \
......@@ -181,6 +183,8 @@ namespace compiler {
V(AVXI32x4GtS) \
V(SSEI32x4GeS) \
V(AVXI32x4GeS) \
V(SSEI32x4UConvertF32x4) \
V(AVXI32x4UConvertF32x4) \
V(IA32I32x4UConvertI16x8Low) \
V(IA32I32x4UConvertI16x8High) \
V(SSEI32x4ShrU) \
......
......@@ -136,6 +136,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32I32x4ExtractLane:
case kSSEI32x4ReplaceLane:
case kAVXI32x4ReplaceLane:
case kSSEI32x4SConvertF32x4:
case kAVXI32x4SConvertF32x4:
case kIA32I32x4SConvertI16x8Low:
case kIA32I32x4SConvertI16x8High:
case kIA32I32x4Neg:
......@@ -163,6 +165,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXI32x4GtS:
case kSSEI32x4GeS:
case kAVXI32x4GeS:
case kSSEI32x4UConvertF32x4:
case kAVXI32x4UConvertF32x4:
case kIA32I32x4UConvertI16x8Low:
case kIA32I32x4UConvertI16x8High:
case kSSEI32x4ShrU:
......
......@@ -1877,6 +1877,19 @@ void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
VisitRRSimd(this, node, kAVXF32x4UConvertI32x4, kSSEF32x4UConvertI32x4);
}
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
VisitRRSimd(this, node, kAVXI32x4SConvertF32x4, kSSEI32x4SConvertF32x4);
}
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand temps[] = {g.TempSimd128Register()};
InstructionCode opcode =
IsSupported(AVX) ? kAVXI32x4UConvertF32x4 : kSSEI32x4UConvertF32x4;
Emit(opcode, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)),
arraysize(temps), temps);
}
void InstructionSelector::VisitI8x16Mul(Node* node) {
IA32OperandGenerator g(this);
InstructionOperand operand0 = g.UseUniqueRegister(node->InputAt(0));
......
......@@ -2392,7 +2392,7 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
UNIMPLEMENTED();
}
......@@ -2400,11 +2400,7 @@ void InstructionSelector::VisitI32x4SConvertF32x4(Node* node) {
void InstructionSelector::VisitI32x4UConvertF32x4(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI32x4SConvertI16x8Low(Node* node) {
UNIMPLEMENTED();
}
......@@ -2451,11 +2447,7 @@ void InstructionSelector::VisitI8x16SConvertI16x8(Node* node) {
void InstructionSelector::VisitI8x16UConvertI16x8(Node* node) {
UNIMPLEMENTED();
}
#endif // !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS
// && !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_ARM && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_MIPS && \
!V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitI8x16Shl(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitI8x16ShrS(Node* node) { UNIMPLEMENTED(); }
......
......@@ -821,7 +821,7 @@ WASM_SIMD_TEST(I8x16ReplaceLane) {
}
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
int32_t ConvertToInt(double val, bool unsigned_integer) {
if (std::isnan(val)) return 0;
......@@ -860,11 +860,7 @@ WASM_SIMD_TEST(I32x4ConvertF32x4) {
CHECK_EQ(1, r.Call(*i, signed_value, unsigned_value));
}
}
#endif // V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS ||
// V8_TARGET_ARCH_MIPS64
#if V8_TARGET_ARCH_ARM || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_MIPS || \
V8_TARGET_ARCH_MIPS64 || V8_TARGET_ARCH_IA32
// Tests both signed and unsigned conversion from I16x8 (unpacking).
WASM_SIMD_TEST(I32x4ConvertI16x8) {
WasmRunner<int32_t, int32_t, int32_t, int32_t, int32_t> r(execution_mode,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment