Commit 8a4194dd authored by Zhao Jiazhong's avatar Zhao Jiazhong Committed by Commit Bot

[mips64][wasm-simd] Fix codegen for floating-point min/max ops

dst may be the same register as src0/src1, so it shouldn't be
overwritten if we still need src0 and src1's values.

And the NaN was not properly canonicalized, this CL adds fmin/fmax
instructions to canonicalize the result.

Change-Id: Ia65829015eb6c4de160298719d694ca9490883b7
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2465775Reviewed-by: 's avatarZhi An Ng <zhin@chromium.org>
Commit-Queue: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Cr-Commit-Position: refs/heads/master@{#70519}
parent 752aba9d
......@@ -2196,9 +2196,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
// MSA follows IEEE 754-2008 comparision rules:
// 1. All NaN-related comparsions get false.
// 2. +0.0 equals to -0.0.
// If inputs are -0.0. and +0.0, then write -0.0 to scratch1.
// scratch1 = (src0 == src1) ? (src0 | src1) : (src1 | src1).
......@@ -2208,9 +2205,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// scratch0 = isNaN(src0) ? src0 : scratch1.
__ fseq_d(scratch0, src0, src0);
__ bsel_v(scratch0, src0, scratch1);
// dst = (src0 < scratch0) ? src0 : scratch0.
__ fslt_d(dst, src0, scratch0);
__ bsel_v(dst, scratch0, src0);
// scratch1 = (src0 < scratch0) ? src0 : scratch0.
__ fslt_d(scratch1, src0, scratch0);
__ bsel_v(scratch1, scratch0, src0);
// Canonicalize the result.
__ fmin_d(dst, scratch1, scratch1);
break;
}
case kMips64F64x2Max: {
......@@ -2220,9 +2219,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
// MSA follows IEEE 754-2008 comparision rules:
// 1. All NaN-related comparsions get false.
// 2. +0.0 equals to -0.0.
// If inputs are -0.0. and +0.0, then write +0.0 to scratch1.
// scratch1 = (src0 == src1) ? (src0 & src1) : (src1 & src1).
......@@ -2232,9 +2228,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// scratch0 = isNaN(src0) ? src0 : scratch1.
__ fseq_d(scratch0, src0, src0);
__ bsel_v(scratch0, src0, scratch1);
// dst = (scratch0 < src0) ? src0 : scratch0.
__ fslt_d(dst, scratch0, src0);
__ bsel_v(dst, scratch0, src0);
// scratch1 = (scratch0 < src0) ? src0 : scratch0.
__ fslt_d(scratch1, scratch0, src0);
__ bsel_v(scratch1, scratch0, src0);
// Canonicalize the result.
__ fmax_d(dst, scratch1, scratch1);
break;
}
case kMips64F64x2Eq: {
......@@ -2590,9 +2588,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
// MSA follows IEEE 754-2008 comparision rules:
// 1. All NaN-related comparsions get false.
// 2. +0.0 equals to -0.0.
// If inputs are -0.0. and +0.0, then write +0.0 to scratch1.
// scratch1 = (src0 == src1) ? (src0 & src1) : (src1 & src1).
......@@ -2602,9 +2597,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// scratch0 = isNaN(src0) ? src0 : scratch1.
__ fseq_w(scratch0, src0, src0);
__ bsel_v(scratch0, src0, scratch1);
// dst = (scratch0 < src0) ? src0 : scratch0.
__ fslt_w(dst, scratch0, src0);
__ bsel_v(dst, scratch0, src0);
// scratch1 = (scratch0 < src0) ? src0 : scratch0.
__ fslt_w(scratch1, scratch0, src0);
__ bsel_v(scratch1, scratch0, src0);
// Canonicalize the result.
__ fmax_w(dst, scratch1, scratch1);
break;
}
case kMips64F32x4Min: {
......@@ -2614,9 +2611,6 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register src1 = i.InputSimd128Register(1);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
// MSA follows IEEE 754-2008 comparision rules:
// 1. All NaN-related comparsions get false.
// 2. +0.0 equals to -0.0.
// If inputs are -0.0. and +0.0, then write -0.0 to scratch1.
// scratch1 = (src0 == src1) ? (src0 | src1) : (src1 | src1).
......@@ -2626,9 +2620,11 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
// scratch0 = isNaN(src0) ? src0 : scratch1.
__ fseq_w(scratch0, src0, src0);
__ bsel_v(scratch0, src0, scratch1);
// dst = (src0 < scratch0) ? src0 : scratch0.
__ fslt_w(dst, src0, scratch0);
__ bsel_v(dst, scratch0, src0);
// scratch1 = (src0 < scratch0) ? src0 : scratch0.
__ fslt_w(scratch1, src0, scratch0);
__ bsel_v(scratch1, scratch0, src0);
// Canonicalize the result.
__ fmin_w(dst, scratch1, scratch1);
break;
}
case kMips64F32x4Eq: {
......
......@@ -2265,6 +2265,8 @@ void LiftoffAssembler::emit_f32x4_min(LiftoffRegister dst, LiftoffRegister lhs,
// dst = (scratch1 <= scratch0) ? scratch1 : scratch0.
fsle_w(dst_msa, scratch1, scratch0);
bsel_v(dst_msa, scratch0, scratch1);
// Canonicalize the result.
fmin_w(dst_msa, dst_msa, dst_msa);
}
void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2285,6 +2287,8 @@ void LiftoffAssembler::emit_f32x4_max(LiftoffRegister dst, LiftoffRegister lhs,
// dst = (scratch0 <= scratch1) ? scratch1 : scratch0.
fsle_w(dst_msa, scratch0, scratch1);
bsel_v(dst_msa, scratch0, scratch1);
// Canonicalize the result.
fmax_w(dst_msa, dst_msa, dst_msa);
}
void LiftoffAssembler::emit_f32x4_pmin(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2384,6 +2388,8 @@ void LiftoffAssembler::emit_f64x2_min(LiftoffRegister dst, LiftoffRegister lhs,
// dst = (scratch1 <= scratch0) ? scratch1 : scratch0.
fsle_d(dst_msa, scratch1, scratch0);
bsel_v(dst_msa, scratch0, scratch1);
// Canonicalize the result.
fmin_d(dst_msa, dst_msa, dst_msa);
}
void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
......@@ -2404,6 +2410,8 @@ void LiftoffAssembler::emit_f64x2_max(LiftoffRegister dst, LiftoffRegister lhs,
// dst = (scratch0 <= scratch1) ? scratch1 : scratch0.
fsle_d(dst_msa, scratch0, scratch1);
bsel_v(dst_msa, scratch0, scratch1);
// Canonicalize the result.
fmax_d(dst_msa, dst_msa, dst_msa);
}
void LiftoffAssembler::emit_f64x2_pmin(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment