Commit e61272a4 authored by Zhi An Ng's avatar Zhi An Ng Committed by Commit Bot

[wasm-simd][liftoff][ia32] Implement i64x2 widen i32x4

Implement these 4 instructions for ia32 Liftoff:
- i64x2.widen_low_i32x4_s
- i64x2.widen_high_i32x4_s
- i64x2.widen_low_i32x4_u
- i64x2.widen_high_i32x4_u

We move the codegen for the *high* instructions into macro-assembler to
allow sharing of the optimized code sequence between TurboFan and
Liftoff.

Bug: v8:10972
Change-Id: Ib5c6cbf6d4a39ef298298b75516f5221cb8ec249
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2621863
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72059}
parent 6f521386
...@@ -738,6 +738,31 @@ void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask, ...@@ -738,6 +738,31 @@ void TurboAssembler::S128Select(XMMRegister dst, XMMRegister mask,
} }
} }
void TurboAssembler::I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpunpckhqdq(dst, src, src);
vpmovsxdq(dst, dst);
} else {
CpuFeatureScope sse_scope(this, SSE4_1);
pshufd(dst, src, 0xEE);
pmovsxdq(dst, dst);
}
}
void TurboAssembler::I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpxor(scratch, scratch, scratch);
vpunpckhdq(dst, src, scratch);
} else {
CpuFeatureScope sse_scope(this, SSE4_1);
pshufd(dst, src, 0xEE);
pmovzxdq(dst, dst);
}
}
void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) { void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) {
DCHECK_GE(63, shift); DCHECK_GE(63, shift);
if (shift >= 32) { if (shift >= 32) {
......
...@@ -625,6 +625,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -625,6 +625,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
// Requires dst == mask when AVX is not supported. // Requires dst == mask when AVX is not supported.
void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1, void S128Select(XMMRegister dst, XMMRegister mask, XMMRegister src1,
XMMRegister src2, XMMRegister scratch); XMMRegister src2, XMMRegister scratch);
void I64x2SConvertI32x4High(XMMRegister dst, XMMRegister src);
void I64x2UConvertI32x4High(XMMRegister dst, XMMRegister src,
XMMRegister scratch);
void Push(Register src) { push(src); } void Push(Register src) { push(src); }
void Push(Operand src) { push(src); } void Push(Operand src) { push(src); }
......
...@@ -2273,15 +2273,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2273,15 +2273,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kIA32I64x2SConvertI32x4High: { case kIA32I64x2SConvertI32x4High: {
XMMRegister dst = i.OutputSimd128Register(); __ I64x2SConvertI32x4High(i.OutputSimd128Register(),
XMMRegister src = i.InputSimd128Register(0); i.InputSimd128Register(0));
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpunpckhqdq(dst, src, src);
} else {
__ pshufd(dst, src, 0xEE);
}
__ Pmovsxdq(dst, dst);
break; break;
} }
case kIA32I64x2UConvertI32x4Low: { case kIA32I64x2UConvertI32x4Low: {
...@@ -2289,17 +2282,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2289,17 +2282,8 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kIA32I64x2UConvertI32x4High: { case kIA32I64x2UConvertI32x4High: {
XMMRegister dst = i.OutputSimd128Register(); __ I64x2UConvertI32x4High(i.OutputSimd128Register(),
XMMRegister src = i.InputSimd128Register(0); i.InputSimd128Register(0), kScratchDoubleReg);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpxor(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpunpckhdq(dst, src, kScratchDoubleReg);
} else {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pshufd(dst, src, 0xEE);
__ pmovzxdq(dst, dst);
}
break; break;
} }
case kIA32I8x16SignSelect: { case kIA32I8x16SignSelect: {
......
...@@ -3911,22 +3911,22 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst, ...@@ -3911,22 +3911,22 @@ void LiftoffAssembler::emit_i64x2_bitmask(LiftoffRegister dst,
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_sconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
bailout(kSimd, "i64x2_sconvert_i32x4_low"); Pmovsxdq(dst.fp(), src.fp());
} }
void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_sconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
bailout(kSimd, "i64x2_sconvert_i32x4_high"); I64x2SConvertI32x4High(dst.fp(), src.fp());
} }
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_uconvert_i32x4_low(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
bailout(kSimd, "i64x2_uconvert_i32x4_low"); Pmovzxdq(dst.fp(), src.fp());
} }
void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst, void LiftoffAssembler::emit_i64x2_uconvert_i32x4_high(LiftoffRegister dst,
LiftoffRegister src) { LiftoffRegister src) {
bailout(kSimd, "i64x2_uconvert_i32x4_high"); I64x2UConvertI32x4High(dst.fp(), src.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst, void LiftoffAssembler::emit_f32x4_abs(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment