Commit 840b7609 authored by Bill Budge's avatar Bill Budge Committed by Commit Bot

wasm][simd][x64] Fix the other float vector min and max sequences

- Uses fast path technique to speed up F64x2Min/Max on x64.

Bug: v8:8639
Change-Id: I766752ba9c515bbeb94709460429a71d9f34fd2e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2232940
Commit-Queue: Bill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68217}
parent 2f7f90b5
...@@ -215,6 +215,27 @@ class OutOfLineF32x4Min final : public OutOfLineCode { ...@@ -215,6 +215,27 @@ class OutOfLineF32x4Min final : public OutOfLineCode {
XMMRegister const error_; XMMRegister const error_;
}; };
class OutOfLineF64x2Min final : public OutOfLineCode {
public:
OutOfLineF64x2Min(CodeGenerator* gen, XMMRegister result, XMMRegister error)
: OutOfLineCode(gen), result_(result), error_(error) {}
void Generate() final {
// |result| is the partial result, |kScratchDoubleReg| is the error.
// propagate -0's and NaNs (possibly non-canonical) from the error.
__ Orpd(error_, result_);
// Canonicalize NaNs by quieting and clearing the payload.
__ Cmppd(result_, error_, int8_t{3});
__ Orpd(error_, result_);
__ Psrlq(result_, 13);
__ Andnpd(result_, error_);
}
private:
XMMRegister const result_;
XMMRegister const error_;
};
class OutOfLineF32x4Max final : public OutOfLineCode { class OutOfLineF32x4Max final : public OutOfLineCode {
public: public:
OutOfLineF32x4Max(CodeGenerator* gen, XMMRegister result, XMMRegister error) OutOfLineF32x4Max(CodeGenerator* gen, XMMRegister result, XMMRegister error)
...@@ -238,6 +259,29 @@ class OutOfLineF32x4Max final : public OutOfLineCode { ...@@ -238,6 +259,29 @@ class OutOfLineF32x4Max final : public OutOfLineCode {
XMMRegister const error_; XMMRegister const error_;
}; };
class OutOfLineF64x2Max final : public OutOfLineCode {
public:
OutOfLineF64x2Max(CodeGenerator* gen, XMMRegister result, XMMRegister error)
: OutOfLineCode(gen), result_(result), error_(error) {}
void Generate() final {
// |result| is the partial result, |kScratchDoubleReg| is the error.
// Propagate NaNs (possibly non-canonical).
__ Orpd(result_, error_);
// Propagate sign errors and (subtle) quiet NaNs.
__ Subpd(result_, error_);
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmppd(error_, result_, int8_t{3});
__ Psrlq(error_, byte{13});
__ Andnpd(error_, result_);
__ Movapd(result_, error_);
}
private:
XMMRegister const result_;
XMMRegister const error_;
};
class OutOfLineTruncateDoubleToI final : public OutOfLineCode { class OutOfLineTruncateDoubleToI final : public OutOfLineCode {
public: public:
OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result, OutOfLineTruncateDoubleToI(CodeGenerator* gen, Register result,
...@@ -2372,18 +2416,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2372,18 +2416,18 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
XMMRegister src1 = i.InputSimd128Register(1), XMMRegister src1 = i.InputSimd128Register(1),
dst = i.OutputSimd128Register(); dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_EQ(dst, i.InputSimd128Register(0));
// The minpd instruction doesn't propagate NaNs and +0's in its first // The minpd instruction doesn't propagate NaNs and -0's in its first
// operand. Perform minpd in both orders, merge the resuls, and adjust. // operand. Perform minpd in both orders and compare results. Handle the
// unlikely case of discrepancies out of line.
__ Movapd(kScratchDoubleReg, src1); __ Movapd(kScratchDoubleReg, src1);
__ Minpd(kScratchDoubleReg, dst); __ Minpd(kScratchDoubleReg, dst);
__ Minpd(dst, src1); __ Minpd(dst, src1);
// propagate -0's and NaNs, which may be non-canonical. // Most likely there is no difference and we're done.
__ Orpd(kScratchDoubleReg, dst); __ Xorpd(kScratchDoubleReg, dst);
// Canonicalize NaNs by quieting and clearing the payload. __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
__ Cmppd(dst, kScratchDoubleReg, int8_t{3}); auto ool = new (zone()) OutOfLineF64x2Min(this, dst, kScratchDoubleReg);
__ Orpd(kScratchDoubleReg, dst); __ j(not_zero, ool->entry());
__ Psrlq(dst, 13); __ bind(ool->exit());
__ Andnpd(dst, kScratchDoubleReg);
break; break;
} }
case kX64F64x2Max: { case kX64F64x2Max: {
...@@ -2391,20 +2435,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2391,20 +2435,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
dst = i.OutputSimd128Register(); dst = i.OutputSimd128Register();
DCHECK_EQ(dst, i.InputSimd128Register(0)); DCHECK_EQ(dst, i.InputSimd128Register(0));
// The maxpd instruction doesn't propagate NaNs and +0's in its first // The maxpd instruction doesn't propagate NaNs and +0's in its first
// operand. Perform maxpd in both orders, merge the resuls, and adjust. // operand. Perform maxpd in both orders and compare results. Handle the
// unlikely case of discrepancies out of line.
__ Movapd(kScratchDoubleReg, src1); __ Movapd(kScratchDoubleReg, src1);
__ Maxpd(kScratchDoubleReg, dst); __ Maxpd(kScratchDoubleReg, dst);
__ Maxpd(dst, src1); __ Maxpd(dst, src1);
// Find discrepancies. // Most likely there is no difference and we're done.
__ Xorpd(dst, kScratchDoubleReg); __ Xorpd(kScratchDoubleReg, dst);
// Propagate NaNs, which may be non-canonical. __ Ptest(kScratchDoubleReg, kScratchDoubleReg);
__ Orpd(kScratchDoubleReg, dst); auto ool = new (zone()) OutOfLineF64x2Max(this, dst, kScratchDoubleReg);
// Propagate sign discrepancy and (subtle) quiet NaNs. __ j(not_zero, ool->entry());
__ Subpd(kScratchDoubleReg, dst); __ bind(ool->exit());
// Canonicalize NaNs by clearing the payload. Sign is non-deterministic.
__ Cmppd(dst, kScratchDoubleReg, int8_t{3});
__ Psrlq(dst, 13);
__ Andnpd(dst, kScratchDoubleReg);
break; break;
} }
case kX64F64x2Eq: { case kX64F64x2Eq: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment