Commit dfe7c465 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][arm64] Prototype double precision conversion

Prototype these 6 instructions on arm64:

- f64x2.convert_low_i32x4_s
- f64x2.convert_low_i32x4_u
- i32x4.trunc_sat_f64x2_s_zero
- i32x4.trunc_sat_f64x2_u_zero
- f32x4.demote_f64x2_zero
- f64x2.promote_low_f32x4

Drive-by fix:

- f64x2.promote_low_f32x4 accesses out of bounds for the global, the
result only has 2 doubles
- fcvtn in simulator needs to clear top bits of the Q reg

Bug: v8:11265
Change-Id: Icfb3338942f0d0374448fdcfef3847a6e3ce8ff6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2644066Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarJakob Kummerow <jkummerow@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72373}
parent cf380f59
......@@ -261,8 +261,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
V(faddp, Faddp) \
V(fcvtas, Fcvtas) \
V(fcvtau, Fcvtau) \
V(fcvtl, Fcvtl) \
V(fcvtms, Fcvtms) \
V(fcvtmu, Fcvtmu) \
V(fcvtn, Fcvtn) \
V(fcvtns, Fcvtns) \
V(fcvtnu, Fcvtnu) \
V(fcvtps, Fcvtps) \
......
......@@ -1955,6 +1955,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(0).Format(narrow));
break;
}
case kArm64F64x2ConvertLowI32x4S: {
VRegister dst = i.OutputSimd128Register().V2D();
__ Sxtl(dst, i.InputSimd128Register(0).V2S());
__ Scvtf(dst, dst);
break;
}
case kArm64F64x2ConvertLowI32x4U: {
VRegister dst = i.OutputSimd128Register().V2D();
__ Uxtl(dst, i.InputSimd128Register(0).V2S());
__ Ucvtf(dst, dst);
break;
}
case kArm64I32x4TruncSatF64x2SZero: {
VRegister dst = i.OutputSimd128Register();
__ Fcvtzs(dst.V2D(), i.InputSimd128Register(0).V2D());
__ Sqxtn(dst.V2S(), dst.V2D());
break;
}
case kArm64I32x4TruncSatF64x2UZero: {
VRegister dst = i.OutputSimd128Register();
__ Fcvtzu(dst.V2D(), i.InputSimd128Register(0).V2D());
__ Uqxtn(dst.V2S(), dst.V2D());
break;
}
case kArm64F32x4DemoteF64x2Zero: {
__ Fcvtn(i.OutputSimd128Register().V2S(),
i.InputSimd128Register(0).V2D());
break;
}
case kArm64F64x2PromoteLowF32x4: {
__ Fcvtl(i.OutputSimd128Register().V2D(),
i.InputSimd128Register(0).V2S());
break;
}
case kArm64F64x2Splat: {
__ Dup(i.OutputSimd128Register().V2D(), i.InputSimd128Register(0).D(), 0);
break;
......
......@@ -198,6 +198,9 @@ namespace compiler {
V(Arm64F64x2Qfms) \
V(Arm64F64x2Pmin) \
V(Arm64F64x2Pmax) \
V(Arm64F64x2ConvertLowI32x4S) \
V(Arm64F64x2ConvertLowI32x4U) \
V(Arm64F64x2PromoteLowF32x4) \
V(Arm64F32x4Splat) \
V(Arm64F32x4ExtractLane) \
V(Arm64F32x4ReplaceLane) \
......@@ -223,6 +226,7 @@ namespace compiler {
V(Arm64F32x4Qfms) \
V(Arm64F32x4Pmin) \
V(Arm64F32x4Pmax) \
V(Arm64F32x4DemoteF64x2Zero) \
V(Arm64I64x2Splat) \
V(Arm64I64x2ExtractLane) \
V(Arm64I64x2ReplaceLane) \
......@@ -263,6 +267,8 @@ namespace compiler {
V(Arm64I32x4Abs) \
V(Arm64I32x4BitMask) \
V(Arm64I32x4DotI16x8S) \
V(Arm64I32x4TruncSatF64x2SZero) \
V(Arm64I32x4TruncSatF64x2UZero) \
V(Arm64I16x8Splat) \
V(Arm64I16x8ExtractLaneU) \
V(Arm64I16x8ExtractLaneS) \
......
......@@ -163,6 +163,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F64x2Qfms:
case kArm64F64x2Pmin:
case kArm64F64x2Pmax:
case kArm64F64x2ConvertLowI32x4S:
case kArm64F64x2ConvertLowI32x4U:
case kArm64F64x2PromoteLowF32x4:
case kArm64F32x4Splat:
case kArm64F32x4ExtractLane:
case kArm64F32x4ReplaceLane:
......@@ -188,6 +191,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64F32x4Qfms:
case kArm64F32x4Pmin:
case kArm64F32x4Pmax:
case kArm64F32x4DemoteF64x2Zero:
case kArm64I64x2Splat:
case kArm64I64x2ExtractLane:
case kArm64I64x2ReplaceLane:
......@@ -232,6 +236,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArm64I32x4Abs:
case kArm64I32x4BitMask:
case kArm64I32x4DotI16x8S:
case kArm64I32x4TruncSatF64x2SZero:
case kArm64I32x4TruncSatF64x2UZero:
case kArm64I16x8Splat:
case kArm64I16x8ExtractLaneU:
case kArm64I16x8ExtractLaneS:
......
......@@ -3399,36 +3399,42 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8) \
V(I8x16)
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs, kArm64F64x2Abs) \
V(F64x2Neg, kArm64F64x2Neg) \
V(F64x2Sqrt, kArm64F64x2Sqrt) \
V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \
V(F32x4Abs, kArm64F32x4Abs) \
V(F32x4Neg, kArm64F32x4Neg) \
V(F32x4Sqrt, kArm64F32x4Sqrt) \
V(F32x4RecipApprox, kArm64F32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \
V(I64x2Neg, kArm64I64x2Neg) \
V(I64x2BitMask, kArm64I64x2BitMask) \
V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \
V(I32x4Neg, kArm64I32x4Neg) \
V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \
V(I32x4Abs, kArm64I32x4Abs) \
V(I32x4BitMask, kArm64I32x4BitMask) \
V(I16x8Neg, kArm64I16x8Neg) \
V(I16x8Abs, kArm64I16x8Abs) \
V(I16x8BitMask, kArm64I16x8BitMask) \
V(I8x16Neg, kArm64I8x16Neg) \
V(I8x16Abs, kArm64I8x16Abs) \
V(I8x16BitMask, kArm64I8x16BitMask) \
V(S128Not, kArm64S128Not) \
V(V32x4AnyTrue, kArm64V128AnyTrue) \
V(V32x4AllTrue, kArm64V32x4AllTrue) \
V(V16x8AnyTrue, kArm64V128AnyTrue) \
V(V16x8AllTrue, kArm64V16x8AllTrue) \
V(V8x16AnyTrue, kArm64V128AnyTrue) \
#define SIMD_UNOP_LIST(V) \
V(F64x2Abs, kArm64F64x2Abs) \
V(F64x2Neg, kArm64F64x2Neg) \
V(F64x2Sqrt, kArm64F64x2Sqrt) \
V(F64x2ConvertLowI32x4S, kArm64F64x2ConvertLowI32x4S) \
V(F64x2ConvertLowI32x4U, kArm64F64x2ConvertLowI32x4U) \
V(F64x2PromoteLowF32x4, kArm64F64x2PromoteLowF32x4) \
V(F32x4SConvertI32x4, kArm64F32x4SConvertI32x4) \
V(F32x4UConvertI32x4, kArm64F32x4UConvertI32x4) \
V(F32x4Abs, kArm64F32x4Abs) \
V(F32x4Neg, kArm64F32x4Neg) \
V(F32x4Sqrt, kArm64F32x4Sqrt) \
V(F32x4RecipApprox, kArm64F32x4RecipApprox) \
V(F32x4RecipSqrtApprox, kArm64F32x4RecipSqrtApprox) \
V(F32x4DemoteF64x2Zero, kArm64F32x4DemoteF64x2Zero) \
V(I64x2Neg, kArm64I64x2Neg) \
V(I64x2BitMask, kArm64I64x2BitMask) \
V(I32x4SConvertF32x4, kArm64I32x4SConvertF32x4) \
V(I32x4Neg, kArm64I32x4Neg) \
V(I32x4UConvertF32x4, kArm64I32x4UConvertF32x4) \
V(I32x4Abs, kArm64I32x4Abs) \
V(I32x4BitMask, kArm64I32x4BitMask) \
V(I32x4TruncSatF64x2SZero, kArm64I32x4TruncSatF64x2SZero) \
V(I32x4TruncSatF64x2UZero, kArm64I32x4TruncSatF64x2UZero) \
V(I16x8Neg, kArm64I16x8Neg) \
V(I16x8Abs, kArm64I16x8Abs) \
V(I16x8BitMask, kArm64I16x8BitMask) \
V(I8x16Neg, kArm64I8x16Neg) \
V(I8x16Abs, kArm64I8x16Abs) \
V(I8x16BitMask, kArm64I8x16BitMask) \
V(S128Not, kArm64S128Not) \
V(V32x4AnyTrue, kArm64V128AnyTrue) \
V(V32x4AllTrue, kArm64V32x4AllTrue) \
V(V16x8AnyTrue, kArm64V128AnyTrue) \
V(V16x8AllTrue, kArm64V16x8AllTrue) \
V(V8x16AnyTrue, kArm64V128AnyTrue) \
V(V8x16AllTrue, kArm64V8x16AllTrue)
#define SIMD_SHIFT_OP_LIST(V) \
......
......@@ -2800,7 +2800,7 @@ void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
// && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitF64x2ConvertLowI32x4S(Node* node) {
UNIMPLEMENTED();
}
......@@ -2819,7 +2819,7 @@ void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
UNIMPLEMENTED();
}
#endif //! V8_TARGET_ARCH_X64
#endif //! V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_X64
// TODO(v8:11297) Prototype i32x4.widen_i8x16_u
......
......@@ -3856,6 +3856,7 @@ LogicVRegister Simulator::fcvtn(VectorFormat vform, LogicVRegister dst,
dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven));
}
}
dst.ClearForWrite(vform);
return dst;
}
......
......@@ -10888,6 +10888,26 @@ TEST(fcvtmu) {
CHECK_EQUAL_64(0x0UL, x30);
}
TEST(fcvtn) {
INIT_V8();
SETUP();
START();
double src[2] = {1.0f, 1.0f};
uintptr_t src_base = reinterpret_cast<uintptr_t>(src);
__ Mov(x0, src_base);
__ Ldr(q0, MemOperand(x0, 0));
__ Fcvtn(q0.V2S(), q0.V2D());
END();
RUN();
// Ensure top half is cleared.
CHECK_EQUAL_128(0, 0x3f800000'3f800000, q0);
}
TEST(fcvtns) {
INIT_V8();
SETUP();
......
......@@ -1273,7 +1273,7 @@ WASM_SIMD_TEST(F64x2NearestInt) {
}
// TODO(v8:11265): Prototyping double precision conversions.
#if V8_TARGET_ARCH_X64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
template <typename SrcType>
void RunF64x2ConvertLowI32x4Test(TestExecutionTier execution_tier,
LowerSimd lower_simd, WasmOpcode opcode) {
......@@ -1388,13 +1388,9 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2PromoteLowF32x4) {
double actual = ReadLittleEndianValue<double>(&g[i]);
CheckDoubleResult(x, x, expected, actual, true);
}
for (int i = 2; i < 4; i++) {
double actual = ReadLittleEndianValue<double>(&g[i]);
CheckDoubleResult(x, x, 0, actual, true);
}
}
}
#endif // V8_TARGET_ARCH_X64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
void RunF64x2BinOpTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleBinOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment