Commit f22837db authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement f64x2 splat extract replace for arm

Bug: v8:9813
Change-Id: I9ab0d0aafb0a2620a317d99c10f56dbcaa7fdf04
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1849206
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64339}
parent 02a06c4d
...@@ -1125,6 +1125,12 @@ void TurboAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src, ...@@ -1125,6 +1125,12 @@ void TurboAssembler::ExtractLane(SwVfpRegister dst, QwNeonRegister src,
VmovExtended(dst.code(), s_code); VmovExtended(dst.code(), s_code);
} }
void TurboAssembler::ExtractLane(DwVfpRegister dst, QwNeonRegister src,
int lane) {
DwVfpRegister double_dst = DwVfpRegister::from_code(src.code() * 2 + lane);
vmov(dst, double_dst);
}
void TurboAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, void TurboAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
Register src_lane, NeonDataType dt, int lane) { Register src_lane, NeonDataType dt, int lane) {
Move(dst, src); Move(dst, src);
...@@ -1145,6 +1151,13 @@ void TurboAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src, ...@@ -1145,6 +1151,13 @@ void TurboAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
VmovExtended(s_code, src_lane.code()); VmovExtended(s_code, src_lane.code());
} }
void TurboAssembler::ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
DwVfpRegister src_lane, int lane) {
Move(dst, src);
DwVfpRegister double_dst = DwVfpRegister::from_code(dst.code() * 2 + lane);
vmov(double_dst, src_lane);
}
void TurboAssembler::LslPair(Register dst_low, Register dst_high, void TurboAssembler::LslPair(Register dst_low, Register dst_high,
Register src_low, Register src_high, Register src_low, Register src_high,
Register shift) { Register shift) {
......
...@@ -440,10 +440,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -440,10 +440,13 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void ExtractLane(Register dst, QwNeonRegister src, NeonDataType dt, int lane); void ExtractLane(Register dst, QwNeonRegister src, NeonDataType dt, int lane);
void ExtractLane(Register dst, DwVfpRegister src, NeonDataType dt, int lane); void ExtractLane(Register dst, DwVfpRegister src, NeonDataType dt, int lane);
void ExtractLane(SwVfpRegister dst, QwNeonRegister src, int lane); void ExtractLane(SwVfpRegister dst, QwNeonRegister src, int lane);
void ExtractLane(DwVfpRegister dst, QwNeonRegister src, int lane);
void ReplaceLane(QwNeonRegister dst, QwNeonRegister src, Register src_lane, void ReplaceLane(QwNeonRegister dst, QwNeonRegister src, Register src_lane,
NeonDataType dt, int lane); NeonDataType dt, int lane);
void ReplaceLane(QwNeonRegister dst, QwNeonRegister src, void ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
SwVfpRegister src_lane, int lane); SwVfpRegister src_lane, int lane);
void ReplaceLane(QwNeonRegister dst, QwNeonRegister src,
DwVfpRegister src_lane, int lane);
// Register move. May do nothing if the registers are identical. // Register move. May do nothing if the registers are identical.
void Move(Register dst, Smi smi); void Move(Register dst, Smi smi);
......
...@@ -1763,6 +1763,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -1763,6 +1763,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ and_(i.OutputRegister(0), i.InputRegister(0), __ and_(i.OutputRegister(0), i.InputRegister(0),
Operand(kSpeculationPoisonRegister)); Operand(kSpeculationPoisonRegister));
break; break;
case kArmF64x2Splat: {
Simd128Register dst = i.OutputSimd128Register();
DoubleRegister src = i.InputDoubleRegister(0);
__ Move(dst.low(), src);
__ Move(dst.high(), src);
break;
}
case kArmF64x2ExtractLane: {
__ ExtractLane(i.OutputDoubleRegister(), i.InputSimd128Register(0),
i.InputInt8(1));
break;
}
case kArmF64x2ReplaceLane: {
__ ReplaceLane(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputDoubleRegister(2), i.InputInt8(1));
break;
}
case kArmF32x4Splat: { case kArmF32x4Splat: {
int src_code = i.InputFloatRegister(0).code(); int src_code = i.InputFloatRegister(0).code();
__ vdup(Neon32, i.OutputSimd128Register(), __ vdup(Neon32, i.OutputSimd128Register(),
......
...@@ -128,6 +128,9 @@ namespace compiler { ...@@ -128,6 +128,9 @@ namespace compiler {
V(ArmPeek) \ V(ArmPeek) \
V(ArmDmbIsh) \ V(ArmDmbIsh) \
V(ArmDsbIsb) \ V(ArmDsbIsb) \
V(ArmF64x2Splat) \
V(ArmF64x2ExtractLane) \
V(ArmF64x2ReplaceLane) \
V(ArmF32x4Splat) \ V(ArmF32x4Splat) \
V(ArmF32x4ExtractLane) \ V(ArmF32x4ExtractLane) \
V(ArmF32x4ReplaceLane) \ V(ArmF32x4ReplaceLane) \
......
...@@ -108,6 +108,9 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -108,6 +108,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kArmFloat32Min: case kArmFloat32Min:
case kArmFloat64Min: case kArmFloat64Min:
case kArmFloat64SilenceNaN: case kArmFloat64SilenceNaN:
case kArmF64x2Splat:
case kArmF64x2ExtractLane:
case kArmF64x2ReplaceLane:
case kArmF32x4Splat: case kArmF32x4Splat:
case kArmF32x4ExtractLane: case kArmF32x4ExtractLane:
case kArmF32x4ReplaceLane: case kArmF32x4ReplaceLane:
......
...@@ -2498,6 +2498,7 @@ void InstructionSelector::VisitS128Zero(Node* node) { ...@@ -2498,6 +2498,7 @@ void InstructionSelector::VisitS128Zero(Node* node) {
VisitRR(this, kArm##Type##Splat, node); \ VisitRR(this, kArm##Type##Splat, node); \
} }
SIMD_TYPE_LIST(SIMD_VISIT_SPLAT) SIMD_TYPE_LIST(SIMD_VISIT_SPLAT)
SIMD_VISIT_SPLAT(F64x2)
#undef SIMD_VISIT_SPLAT #undef SIMD_VISIT_SPLAT
#define SIMD_VISIT_EXTRACT_LANE(Type) \ #define SIMD_VISIT_EXTRACT_LANE(Type) \
...@@ -2505,6 +2506,7 @@ SIMD_TYPE_LIST(SIMD_VISIT_SPLAT) ...@@ -2505,6 +2506,7 @@ SIMD_TYPE_LIST(SIMD_VISIT_SPLAT)
VisitRRI(this, kArm##Type##ExtractLane, node); \ VisitRRI(this, kArm##Type##ExtractLane, node); \
} }
SIMD_TYPE_LIST(SIMD_VISIT_EXTRACT_LANE) SIMD_TYPE_LIST(SIMD_VISIT_EXTRACT_LANE)
SIMD_VISIT_EXTRACT_LANE(F64x2)
#undef SIMD_VISIT_EXTRACT_LANE #undef SIMD_VISIT_EXTRACT_LANE
#define SIMD_VISIT_REPLACE_LANE(Type) \ #define SIMD_VISIT_REPLACE_LANE(Type) \
...@@ -2512,6 +2514,7 @@ SIMD_TYPE_LIST(SIMD_VISIT_EXTRACT_LANE) ...@@ -2512,6 +2514,7 @@ SIMD_TYPE_LIST(SIMD_VISIT_EXTRACT_LANE)
VisitRRIR(this, kArm##Type##ReplaceLane, node); \ VisitRRIR(this, kArm##Type##ReplaceLane, node); \
} }
SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE) SIMD_TYPE_LIST(SIMD_VISIT_REPLACE_LANE)
SIMD_VISIT_REPLACE_LANE(F64x2)
#undef SIMD_VISIT_REPLACE_LANE #undef SIMD_VISIT_REPLACE_LANE
#undef SIMD_TYPE_LIST #undef SIMD_TYPE_LIST
......
...@@ -2638,9 +2638,6 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) { ...@@ -2638,9 +2638,6 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
#if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_X64
#if !V8_TARGET_ARCH_ARM64 #if !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_IA32 #if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ReplaceLane(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Neg(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Sqrt(Node* node) { UNIMPLEMENTED(); }
......
...@@ -1088,6 +1088,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2GeU) { ...@@ -1088,6 +1088,7 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2GeU) {
UnsignedGreaterEqual); UnsignedGreaterEqual);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) { WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) {
WasmRunner<int32_t, double> r(execution_tier, lower_simd); WasmRunner<int32_t, double> r(execution_tier, lower_simd);
...@@ -1111,18 +1112,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) { ...@@ -1111,18 +1112,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Splat) {
} }
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLaneWithI64x2) {
WasmRunner<int64_t> r(execution_tier, lower_simd);
BUILD(r, WASM_IF_ELSE_L(
WASM_F64_EQ(WASM_SIMD_F64x2_EXTRACT_LANE(
0, WASM_SIMD_I64x2_SPLAT(WASM_I64V(1e15))),
WASM_F64_REINTERPRET_I64(WASM_I64V(1e15))),
WASM_I64V(1), WASM_I64V(0)));
CHECK_EQ(1, r.Call());
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) { WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) {
WasmRunner<double, double> r(execution_tier, lower_simd); WasmRunner<double, double> r(execution_tier, lower_simd);
byte param1 = 0; byte param1 = 0;
...@@ -1145,18 +1134,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) { ...@@ -1145,18 +1134,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLane) {
} }
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) {
WasmRunner<int64_t> r(execution_tier, lower_simd);
BUILD(r, WASM_IF_ELSE_L(
WASM_I64_EQ(WASM_SIMD_I64x2_EXTRACT_LANE(
0, WASM_SIMD_F64x2_SPLAT(WASM_F64(1e15))),
WASM_I64_REINTERPRET_F64(WASM_F64(1e15))),
WASM_I64V(1), WASM_I64V(0)));
CHECK_EQ(1, r.Call());
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) { WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) {
WasmRunner<int32_t> r(execution_tier, lower_simd); WasmRunner<int32_t> r(execution_tier, lower_simd);
// Set up a global to hold input/output vector. // Set up a global to hold input/output vector.
...@@ -1176,6 +1153,29 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) { ...@@ -1176,6 +1153,29 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ReplaceLane) {
} }
} }
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
WASM_SIMD_TEST_NO_LOWERING(F64x2ExtractLaneWithI64x2) {
WasmRunner<int64_t> r(execution_tier, lower_simd);
BUILD(r, WASM_IF_ELSE_L(
WASM_F64_EQ(WASM_SIMD_F64x2_EXTRACT_LANE(
0, WASM_SIMD_I64x2_SPLAT(WASM_I64V(1e15))),
WASM_F64_REINTERPRET_I64(WASM_I64V(1e15))),
WASM_I64V(1), WASM_I64V(0)));
CHECK_EQ(1, r.Call());
}
WASM_SIMD_TEST_NO_LOWERING(I64x2ExtractWithF64x2) {
WasmRunner<int64_t> r(execution_tier, lower_simd);
BUILD(r, WASM_IF_ELSE_L(
WASM_I64_EQ(WASM_SIMD_I64x2_EXTRACT_LANE(
0, WASM_SIMD_F64x2_SPLAT(WASM_F64(1e15))),
WASM_I64_REINTERPRET_F64(WASM_F64(1e15))),
WASM_I64V(1), WASM_I64V(0)));
CHECK_EQ(1, r.Call());
}
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
bool IsExtreme(double x) { bool IsExtreme(double x) {
double abs_x = std::fabs(x); double abs_x = std::fabs(x);
const double kSmallFloatThreshold = 1.0e-298; const double kSmallFloatThreshold = 1.0e-298;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment