Commit 8c52f66f authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][liftoff][x64] Implement i64x2 widen i32x4

Implement these 4 instructions for x64 Liftoff:
- i64x2.widen_low_i32x4_s
- i64x2.widen_high_i32x4_s
- i64x2.widen_low_i32x4_u
- i64x2.widen_high_i32x4_u

We move the codegen for the *high* instructions into macro-assembler to
allow sharing of the optimized code sequence between TurboFan and
Liftoff.

Bug: v8:10972
Change-Id: I900b24f96ee55784220656cb2664283b03c32110
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2621862
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72055}
parent 88d48c53
...@@ -2118,6 +2118,28 @@ void TurboAssembler::I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src) { ...@@ -2118,6 +2118,28 @@ void TurboAssembler::I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src) {
} }
} }
void TurboAssembler::I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpunpckhqdq(dst, src, src);
vpmovsxdq(dst, dst);
} else {
pshufd(dst, src, 0xEE);
pmovsxdq(dst, dst);
}
}
void TurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
vpunpckhdq(dst, src, kScratchDoubleReg);
} else {
pshufd(dst, src, 0xEE);
pmovzxdq(dst, dst);
}
}
// 1. Unpack src0, src0 into even-number elements of scratch. // 1. Unpack src0, src0 into even-number elements of scratch.
// 2. Unpack src1, src1 into even-number elements of dst. // 2. Unpack src1, src1 into even-number elements of dst.
// 3. Multiply 1. with 2. // 3. Multiply 1. with 2.
......
...@@ -577,6 +577,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -577,6 +577,8 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src); void I16x8UConvertI8x16High(XMMRegister dst, XMMRegister src);
void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src); void I32x4SConvertI16x8High(XMMRegister dst, XMMRegister src);
void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src); void I32x4UConvertI16x8High(XMMRegister dst, XMMRegister src);
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src);
// Requires dst == mask when AVX is not supported. // Requires dst == mask when AVX is not supported.
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1, void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
......
...@@ -2881,15 +2881,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2881,15 +2881,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I64x2SConvertI32x4High: { case kX64I64x2SConvertI32x4High: {
XMMRegister dst = i.OutputSimd128Register(); __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
XMMRegister src = i.InputSimd128Register(0); i.InputSimd128Register(0));
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpunpckhqdq(dst, src, src);
} else {
__ pshufd(dst, src, 0xEE);
}
__ Pmovsxdq(dst, dst);
break; break;
} }
case kX64I64x2UConvertI32x4Low: { case kX64I64x2UConvertI32x4Low: {
...@@ -2897,17 +2890,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2897,17 +2890,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I64x2UConvertI32x4High: { case kX64I64x2UConvertI32x4High: {
XMMRegister dst = i.OutputSimd128Register(); __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
XMMRegister src = i.InputSimd128Register(0); i.InputSimd128Register(0));
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpunpckhdq(dst, src, kScratchDoubleReg);
} else {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pshufd(dst, src, 0xEE);
__ pmovzxdq(dst, dst);
}
break; break;
} }
case kX64I32x4Splat: { case kX64I32x4Splat: {
......
...@@ -2893,6 +2893,26 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst, ...@@ -2893,6 +2893,26 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
bailout(kSimd, "i64x2_bitmask"); bailout(kSimd, "i64x2_bitmask");
} }
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_sconvert_i32x4_low");
}
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_sconvert_i32x4_high");
}
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_uconvert_i32x4_low");
}
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_uconvert_i32x4_high");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp()); vdup(Neon32, liftoff::GetSimd128Register(dst), src.gp());
......
...@@ -2017,6 +2017,26 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst, ...@@ -2017,6 +2017,26 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
I64x2BitMask(dst.gp(), src.fp()); I64x2BitMask(dst.gp(), src.fp());
} }
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_sconvert_i32x4_low");
}
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_sconvert_i32x4_high");
}
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_uconvert_i32x4_low");
}
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_uconvert_i32x4_high");
}
void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst, void LiftoffAssembler::emit_i32x4_splat(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
Dup(dst.fp().V4S(), src.gp().W()); Dup(dst.fp().V4S(), src.gp().W());
......
...@@ -3903,6 +3903,26 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst, ...@@ -3903,6 +3903,26 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
Movmskpd(dst.gp(), src.fp()); Movmskpd(dst.gp(), src.fp());
} }
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_sconvert_i32x4_low");
}
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_sconvert_i32x4_high");
}
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_uconvert_i32x4_low");
}
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) {
bailout(kSimd, "i64x2_uconvert_i32x4_high");
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (dst.fp() == src.fp()) { if (dst.fp() == src.fp()) {
......
...@@ -1112,6 +1112,14 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -1112,6 +1112,14 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister src1, LiftoffRegister src1,
LiftoffRegister src2); LiftoffRegister src2);
inline void emit_i64x2_bitmask(LiftoffRegister dst, LiftoffRegister src); inline void emit_i64x2_bitmask(LiftoffRegister dst, LiftoffRegister src);
inline void emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src);
inline void emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src); inline void emit_f32x4_abs(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src); inline void emit_f32x4_neg(LiftoffRegister dst, LiftoffRegister src);
inline void emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src); inline void emit_f32x4_sqrt(LiftoffRegister dst, LiftoffRegister src);
......
...@@ -3012,6 +3012,18 @@ class LiftoffCompiler { ...@@ -3012,6 +3012,18 @@ class LiftoffCompiler {
&LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u); &LiftoffAssembler::emit_i64x2_extmul_high_i32x4_u);
case wasm::kExprI64x2BitMask: case wasm::kExprI64x2BitMask:
return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_i64x2_bitmask); return EmitUnOp<kS128, kI32>(&LiftoffAssembler::emit_i64x2_bitmask);
case wasm::kExprI64x2SConvertI32x4Low:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_sconvert_i32x4_low);
case wasm::kExprI64x2SConvertI32x4High:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_sconvert_i32x4_high);
case wasm::kExprI64x2UConvertI32x4Low:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_uconvert_i32x4_low);
case wasm::kExprI64x2UConvertI32x4High:
return EmitUnOp<kS128, kS128>(
&LiftoffAssembler::emit_i64x2_uconvert_i32x4_high);
case wasm::kExprF32x4Abs: case wasm::kExprF32x4Abs:
return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_abs); return EmitUnOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_abs);
case wasm::kExprF32x4Neg: case wasm::kExprF32x4Neg:
......
...@@ -3465,6 +3465,26 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst, ...@@ -3465,6 +3465,26 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
Movmskpd(dst.gp(), src.fp()); Movmskpd(dst.gp(), src.fp());
} }
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) {
Pmovsxdq(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) {
I64x2SConvertI32x4High(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) {
Pmovzxdq(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) {
I64x2UConvertI32x4High(dst.fp(), src.fp());
}
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
if (dst.fp() == src.fp()) { if (dst.fp() == src.fp()) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment