Commit 840b7609 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

wasm][simd][x64] Fix the other float vector min and max sequences

- Uses fast path technique to speed up F64x2Min/Max on x64.

Bug: v8:8639
Change-Id: I766752ba9c515bbeb94709460429a71d9f34fd2e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2232940
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68217}
parent 2f7f90b5
......@@ -215,6 +215,27 @@ class OutOfLineF32x4Min final : public OutOfLineCode {
XMMRegister const error_;
};
class OutOfLineF64x2Min final : public OutOfLineCode {
public:
OutOfLineF64x2Min(CodeGenerator* gen, XMMRegister result, XMMRegister error)
: OutOfLineCode(gen), result_(result), error_(error) {}
void Generate() final {
// |result| is the partial result, |kScratchDoubleReg| is the error.
// propagate -0's and NaNs (possibly non-canonical) from the error.
__ Orpd(error_, result_);
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmppd(result_, error_, int8_t{3});
__ Orpd(error_, result_);
__ Psrlq(result_, 13);
__ Andnpd(result_, error_);
}
private:
XMMRegister const result_;
XMMRegister const error_;
};
class OutOfLineF32x4Max final : public OutOfLineCode {
public:
OutOfLineF32x4Max(CodeGenerator* gen, XMMRegister result, XMMRegister error)
......@@ -238,6 +259,29 @@ class OutOfLineF32x4Max final : public OutOfLineCode {
XMMRegister const error_;
};
class OutOfLineF64x2Max final : public OutOfLineCode {
public:
OutOfLineF64x2Max(CodeGenerator* gen, XMMRegister result, XMMRegister error)
: OutOfLineCode(gen), result_(result), error_(error) {}
void Generate() final {
// |result| is the partial result, |kScratchDoubleReg| is the error.
// Propagate NaNs (possibly non-canonical).
__ Orpd(result_, error_);
// Propagate sign errors and (subtle) quiet NaNs.
__ Subpd(result_, error_);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmppd(error_, result_, int8_t{3});
__ Psrlq(error_, byte{13});
__ Andnpd(error_, result_);
__ Movapd(result_, error_);
}
private:
XMMRegister const result_;
XMMRegister const error_;
};
class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
public:
OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
......@@ -2372,18 +2416,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The minpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform minpd in both orders, merge the resuls, and adjust.
// The minpd instruction doesn't propagate NaNs and -0's in its first
// operand. Perform minpd in both orders and compare results. Handle the
// unlikely case of discrepancies out of line.
__ Movapd(kScratchDoubleReg, src1);
__ Minpd(kScratchDoubleReg, dst);
__ Minpd(dst, src1);
// propagate -0's and NaNs, which may be non-canonical.
__ Orpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Orpd(kScratchDoubleReg, dst);
__ Psrlq(dst, 13);
__ Andnpd(dst, kScratchDoubleReg);
// Most likely there is no difference and we're done.
__ Xorpd(kScratchDoubleReg, dst);
__ Ptest(kScratchDoubleReg, kScratchDoubleReg);
auto ool = new (zone()) OutOfLineF64x2Min(this, dst, kScratchDoubleReg);
__ j(not_zero, ool->entry());
__ bind(ool->exit());
break;
}
case kX64F64x2Max: {
......@@ -2391,20 +2435,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxpd in both orders, merge the resuls, and adjust.
// operand. Perform maxpd in both orders and compare results. Handle the
// unlikely case of discrepancies out of line.
__ Movapd(kScratchDoubleReg, src1);
__ Maxpd(kScratchDoubleReg, dst);
__ Maxpd(dst, src1);
// Find discrepancies.
__ Xorpd(dst, kScratchDoubleReg);
// Propagate NaNs, which may be non-canonical.
__ Orpd(kScratchDoubleReg, dst);
// Propagate sign discrepancy and (subtle) quiet NaNs.
__ Subpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Psrlq(dst, 13);
__ Andnpd(dst, kScratchDoubleReg);
// Most likely there is no difference and we're done.
__ Xorpd(kScratchDoubleReg, dst);
__ Ptest(kScratchDoubleReg, kScratchDoubleReg);
auto ool = new (zone()) OutOfLineF64x2Max(this, dst, kScratchDoubleReg);
__ j(not_zero, ool->entry());
__ bind(ool->exit());
break;
}
case kX64F64x2Eq: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment