Commit b57a0d19 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64][liftoff] Implement i64x2 signed compares

Extract code sequence into macro-assembler for reuse between Liftoff and
TurboFan.

There is a bit of register-aliasing checking due to the rather strict
requirements for the code sequence depending on the CpuFetures that are
supported.

Bug: v8:11415
Change-Id: Idbc0ca43475db5650d1747c8a741e9f11b80d8e3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2698063Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72875}
parent 8229983a
...@@ -2484,6 +2484,63 @@ void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src) { ...@@ -2484,6 +2484,63 @@ void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src) {
} }
} }
void TurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
XMMRegister src1) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_EQ(dst, src0);
pcmpgtq(dst, src1);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movdqa(dst, src1);
movdqa(kScratchDoubleReg, src0);
psubq(dst, src0);
pcmpeqd(kScratchDoubleReg, src1);
pand(dst, kScratchDoubleReg);
movdqa(kScratchDoubleReg, src0);
pcmpgtd(kScratchDoubleReg, src1);
por(dst, kScratchDoubleReg);
pshufd(dst, dst, 0xF5);
}
}
void TurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
XMMRegister src1) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src1, src0);
vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
vpxor(dst, dst, kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
movdqa(dst, src1);
}
pcmpgtq(dst, src0);
pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
pxor(dst, kScratchDoubleReg);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movdqa(dst, src0);
movdqa(kScratchDoubleReg, src1);
psubq(dst, src1);
pcmpeqd(kScratchDoubleReg, src0);
pand(dst, kScratchDoubleReg);
movdqa(kScratchDoubleReg, src1);
pcmpgtd(kScratchDoubleReg, src0);
por(dst, kScratchDoubleReg);
pshufd(dst, dst, 0xF5);
pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
pxor(dst, kScratchDoubleReg);
}
}
void TurboAssembler::Abspd(XMMRegister dst) { void TurboAssembler::Abspd(XMMRegister dst) {
Andps(dst, ExternalReferenceAsOperand( Andps(dst, ExternalReferenceAsOperand(
ExternalReference::address_of_double_abs_constant())); ExternalReference::address_of_double_abs_constant()));
......
...@@ -631,6 +631,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -631,6 +631,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src); void I32x4TruncSatF64x2SZero(XMMRegister dst, XMMRegister src);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src); void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src);
void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1);
void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1);
void Abspd(XMMRegister dst); void Abspd(XMMRegister dst);
void Negpd(XMMRegister dst); void Negpd(XMMRegister dst);
......
...@@ -2867,66 +2867,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2867,66 +2867,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kX64I64x2GtS: { case kX64I64x2GtS: {
XMMRegister dst = i.OutputSimd128Register(); __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
XMMRegister src0 = i.InputSimd128Register(0); i.InputSimd128Register(1));
XMMRegister src1 = i.InputSimd128Register(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(tasm(), SSE4_2);
DCHECK_EQ(dst, src0);
__ pcmpgtq(dst, src1);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
__ movdqa(dst, src1);
__ movdqa(kScratchDoubleReg, src0);
__ psubq(dst, src0);
__ pcmpeqd(kScratchDoubleReg, src1);
__ pand(dst, kScratchDoubleReg);
__ movdqa(kScratchDoubleReg, src0);
__ pcmpgtd(kScratchDoubleReg, src1);
__ por(dst, kScratchDoubleReg);
__ pshufd(dst, dst, 0xF5);
}
break; break;
} }
case kX64I64x2GeS: { case kX64I64x2GeS: {
XMMRegister dst = i.OutputSimd128Register(); __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
XMMRegister src0 = i.InputSimd128Register(0); i.InputSimd128Register(1));
XMMRegister src1 = i.InputSimd128Register(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtq(dst, src1, src0);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpxor(dst, dst, kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(tasm(), SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
__ movdqa(dst, src1);
}
__ pcmpgtq(dst, src0);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
__ movdqa(dst, src0);
__ movdqa(kScratchDoubleReg, src1);
__ psubq(dst, src1);
__ pcmpeqd(kScratchDoubleReg, src0);
__ pand(dst, kScratchDoubleReg);
__ movdqa(kScratchDoubleReg, src1);
__ pcmpgtd(kScratchDoubleReg, src0);
__ por(dst, kScratchDoubleReg);
__ pshufd(dst, dst, 0xF5);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
}
break; break;
} }
case kX64I64x2ShrU: { case kX64I64x2ShrU: {
......
...@@ -3751,6 +3751,16 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -3751,6 +3751,16 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
bailout(kSimd, "i64x2_ne"); bailout(kSimd, "i64x2_ne");
} }
void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2.gt_s");
}
void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2.ge_s");
}
void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs), vceq(liftoff::GetSimd128Register(dst), liftoff::GetSimd128Register(lhs),
......
...@@ -2758,6 +2758,16 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -2758,6 +2758,16 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
Mvn(dst.fp().V2D(), dst.fp().V2D()); Mvn(dst.fp().V2D(), dst.fp().V2D());
} }
void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2.gt_s");
}
void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2.ge_s");
}
void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S()); Fcmeq(dst.fp().V4S(), lhs.fp().V4S(), rhs.fp().V4S());
......
...@@ -3134,6 +3134,16 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -3134,6 +3134,16 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
Pxor(dst.fp(), liftoff::kScratchDoubleReg); Pxor(dst.fp(), liftoff::kScratchDoubleReg);
} }
void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2.gt_s");
}
void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
bailout(kSimd, "i64x2.ge_s");
}
void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
......
...@@ -988,6 +988,10 @@ class LiftoffAssembler : public TurboAssembler { ...@@ -988,6 +988,10 @@ class LiftoffAssembler : public TurboAssembler {
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs);
inline void emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs); LiftoffRegister rhs);
inline void emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs, inline void emit_f32x4_ne(LiftoffRegister dst, LiftoffRegister lhs,
......
...@@ -3060,6 +3060,16 @@ class LiftoffCompiler { ...@@ -3060,6 +3060,16 @@ class LiftoffCompiler {
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_ge_u); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i32x4_ge_u);
case wasm::kExprI64x2Eq: case wasm::kExprI64x2Eq:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_eq); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_eq);
case wasm::kExprI64x2LtS:
return EmitBinOp<kS128, kS128, true>(
&LiftoffAssembler::emit_i64x2_gt_s);
case wasm::kExprI64x2GtS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_gt_s);
case wasm::kExprI64x2LeS:
return EmitBinOp<kS128, kS128, true>(
&LiftoffAssembler::emit_i64x2_ge_s);
case wasm::kExprI64x2GeS:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_i64x2_ge_s);
case wasm::kExprF32x4Eq: case wasm::kExprF32x4Eq:
return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_eq); return EmitBinOp<kS128, kS128>(&LiftoffAssembler::emit_f32x4_eq);
case wasm::kExprF32x4Ne: case wasm::kExprF32x4Ne:
......
...@@ -2721,6 +2721,57 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -2721,6 +2721,57 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
Pxor(dst.fp(), kScratchDoubleReg); Pxor(dst.fp(), kScratchDoubleReg);
} }
void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Different register alias requirements depending on CpuFeatures supported:
if (CpuFeatures::IsSupported(AVX)) {
// 1. AVX, no requirements.
I64x2GtS(dst.fp(), lhs.fp(), rhs.fp());
} else if (CpuFeatures::IsSupported(SSE4_2)) {
// 2. SSE4_2, dst == lhs.
if (dst != lhs) {
movdqa(dst.fp(), lhs.fp());
}
I64x2GtS(dst.fp(), dst.fp(), rhs.fp());
} else {
// 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
if (dst == lhs || dst == rhs) {
// macro-assembler uses kScratchDoubleReg, so don't use it.
I64x2GtS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp());
movaps(dst.fp(), liftoff::kScratchDoubleReg2);
} else {
I64x2GtS(dst.fp(), lhs.fp(), rhs.fp());
}
}
}
void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) {
// Different register alias requirements depending on CpuFeatures supported:
if (CpuFeatures::IsSupported(AVX)) {
// 1. AVX, no requirements.
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp());
} else if (CpuFeatures::IsSupported(SSE4_2)) {
// 2. SSE4_2, dst != lhs.
if (dst == lhs) {
// macro-assembler uses kScratchDoubleReg, so don't use it.
I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp());
movdqa(dst.fp(), liftoff::kScratchDoubleReg2);
} else {
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp());
}
} else {
// 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
if (dst == lhs || dst == rhs) {
// macro-assembler uses kScratchDoubleReg, so don't use it.
I64x2GeS(liftoff::kScratchDoubleReg2, lhs.fp(), rhs.fp());
movaps(dst.fp(), liftoff::kScratchDoubleReg2);
} else {
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp());
}
}
}
void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>( liftoff::EmitSimdCommutativeBinOp<&Assembler::vcmpeqps, &Assembler::cmpeqps>(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment