Commit 1215f2a8 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][arm64][liftoff] Implement extended multiply

The codegen is the same as on TurboFan, using Smull, Umull, Smull2, and
Umull2. The rest of the changes are adding skeleton functions to the
different archs, which bailout for now. Actual codegen will come in
future patches.

Bug: v8:11262
Change-Id: I06f0b018b85b5cca29382ab730510959e1a81ab6
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2589064Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#71859}
parent 20feaf9a
......@@ -2851,6 +2851,30 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
vmlal(NeonU32, dst_neon, tmp1.low(), tmp2.low());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp());
......@@ -3016,6 +3040,30 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
vpadd(Neon32, dest.high(), scratch.low(), scratch.high());
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
vdup(Neon16, liftoff::GetSimd128Register(dst), src.gp());
......@@ -3201,6 +3249,30 @@ void LiftoffAssembler::emit_i16x8_replace_lane(LiftoffRegister dst,
imm_lane_idx);
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_high_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_extmul_high_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
LiftoffRegister lhs,
LiftoffRegister rhs,
......
......@@ -1981,6 +1981,30 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Add(dst.fp().V2D(), dst.fp().V2D(), tmp1.V2D());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull(dst.fp().V2D(), src1.fp().V2S(), src2.fp().V2S());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull(dst.fp().V2D(), src1.fp().V2S(), src2.fp().V2S());
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull2(dst.fp().V2D(), src1.fp().V4S(), src2.fp().V4S());
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull2(dst.fp().V2D(), src1.fp().V4S(), src2.fp().V4S());
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V4S(), src.gp().W());
......@@ -2122,6 +2146,30 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
Addp(dst.fp().V4S(), tmp1, tmp2);
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull(dst.fp().V4S(), src1.fp().V4H(), src2.fp().V4H());
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull(dst.fp().V4S(), src1.fp().V4H(), src2.fp().V4H());
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull2(dst.fp().V4S(), src1.fp().V8H(), src2.fp().V8H());
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull2(dst.fp().V4S(), src1.fp().V8H(), src2.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_splat(LiftoffRegister dst,
LiftoffRegister src) {
Dup(dst.fp().V8H(), src.gp().W());
......@@ -2800,6 +2848,30 @@ void LiftoffAssembler::emit_i16x8_abs(LiftoffRegister dst,
Abs(dst.fp().V8H(), src.fp().V8H());
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull(dst.fp().V8H(), src1.fp().V8B(), src2.fp().V8B());
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull(dst.fp().V8H(), src1.fp().V8B(), src2.fp().V8B());
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Smull2(dst.fp().V8H(), src1.fp().V16B(), src2.fp().V16B());
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
Umull2(dst.fp().V8H(), src1.fp().V16B(), src2.fp().V16B());
}
void LiftoffAssembler::emit_i32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
Abs(dst.fp().V4S(), src.fp().V4S());
......
......@@ -3539,6 +3539,30 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_high_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_extmul_high_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
......@@ -3656,6 +3680,30 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
DoubleRegister reg =
......@@ -3780,6 +3828,30 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Paddq(dst.fp(), dst.fp(), tmp2.fp());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
......
......@@ -1020,6 +1020,18 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs);
inline void emit_i16x8_max_u(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_v32x4_anytrue(LiftoffRegister dst, LiftoffRegister src);
inline void emit_v32x4_alltrue(LiftoffRegister dst, LiftoffRegister src);
......@@ -1052,6 +1064,18 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs);
inline void emit_i32x4_dot_i16x8_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i64x2_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i64x2_shl(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
......@@ -1071,6 +1095,18 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs);
inline void emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2);
inline void emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src);
......
......@@ -2876,6 +2876,18 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_s);
case wasm::kExprI16x8MaxU:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i16x8_max_u);
case wasm::kExprI16x8ExtMulLowI8x16S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s);
case wasm::kExprI16x8ExtMulLowI8x16U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u);
case wasm::kExprI16x8ExtMulHighI8x16S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s);
case wasm::kExprI16x8ExtMulHighI8x16U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u);
case wasm::kExprI32x4Neg:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_neg);
case wasm::kExprV32x4AnyTrue:
......@@ -2910,6 +2922,18 @@ class LiftoffCompiler {
case wasm::kExprI32x4DotI16x8S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_dot_i16x8_s);
case wasm::kExprI32x4ExtMulLowI16x8S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s);
case wasm::kExprI32x4ExtMulLowI16x8U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u);
case wasm::kExprI32x4ExtMulHighI16x8S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s);
case wasm::kExprI32x4ExtMulHighI16x8U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u);
case wasm::kExprI64x2Neg:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_neg);
case wasm::kExprI64x2Shl:
......@@ -2927,6 +2951,18 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_sub);
case wasm::kExprI64x2Mul:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_mul);
case wasm::kExprI64x2ExtMulLowI32x4S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s);
case wasm::kExprI64x2ExtMulLowI32x4U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u);
case wasm::kExprI64x2ExtMulHighI32x4S:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s);
case wasm::kExprI64x2ExtMulHighI32x4U:
return EmitBinOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u);
case wasm::kExprF32x4Abs:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_abs);
case wasm::kExprF32x4Neg:
......
......@@ -3156,6 +3156,30 @@ void LiftoffAssembler::emit_i16x8_max_u(LiftoffRegister dst,
this, dst, lhs, rhs, base::Optional<CpuFeature>(SSE4_1));
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_low_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_low_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8.extmul_high_i8x16_s unsupported");
}
void LiftoffAssembler::emit_i16x8_extmul_high_i8x16_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i16x8_extmul_high_i8x16_u unsupported");
}
void LiftoffAssembler::emit_i32x4_neg(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
......@@ -3273,6 +3297,30 @@ void LiftoffAssembler::emit_i32x4_dot_i16x8_s(LiftoffRegister dst,
this, dst, lhs, rhs);
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_low_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_low_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_s unsupported");
}
void LiftoffAssembler::emit_i32x4_extmul_high_i16x8_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i32x4_extmul_high_i16x8_u unsupported");
}
void LiftoffAssembler::emit_i64x2_neg(LiftoffRegister dst,
LiftoffRegister src) {
DoubleRegister reg = dst.fp() == src.fp() ? kScratchDoubleReg : dst.fp();
......@@ -3362,6 +3410,30 @@ void LiftoffAssembler::emit_i64x2_mul(LiftoffRegister dst, LiftoffRegister lhs,
Paddq(dst.fp(), tmp2.fp());
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_low_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_low_i32x4_u unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_s(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_s unsupported");
}
void LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u(LiftoffRegister dst,
LiftoffRegister src1,
LiftoffRegister src2) {
bailout(kSimd, "i64x2_extmul_high_i32x4_u unsupported");
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) {
if (dst.fp() == src.fp()) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment