Commit 50292697 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32][liftoff] Implement i64x2 signed compares

Extract code sequence into macro-assembler for reuse between Liftoff and
TurboFan.

Similar to x64, there is a bit of register-aliasing checking due to the
rather strict requirements for the code sequence depending on the
CpuFetures that are supported.

Bug: v8:11415
Change-Id: I3f855da25493941d158383020fbcafee8d18095f
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2698066
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72925}
parent 8136e399
...@@ -962,6 +962,65 @@ void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src, ...@@ -962,6 +962,65 @@ void TurboAssembler::I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
} }
} }
void TurboAssembler::I64x2GtS(XMMRegister dst, XMMRegister src0,
XMMRegister src1, XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_EQ(dst, src0);
pcmpgtq(dst, src1);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src1);
movaps(scratch, src0);
psubq(dst, src0);
pcmpeqd(scratch, src1);
andps(dst, scratch);
movaps(scratch, src0);
pcmpgtd(scratch, src1);
orps(dst, scratch);
movshdup(dst, dst);
}
}
void TurboAssembler::I64x2GeS(XMMRegister dst, XMMRegister src0,
XMMRegister src1, XMMRegister scratch) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(this, AVX);
vpcmpgtq(dst, src1, src0);
vpcmpeqd(scratch, scratch, scratch);
vpxor(dst, dst, scratch);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(this, SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
movaps(dst, src1);
}
pcmpgtq(dst, src0);
pcmpeqd(scratch, scratch);
xorps(dst, scratch);
} else {
CpuFeatureScope sse_scope(this, SSSE3);
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
movaps(dst, src0);
movaps(scratch, src1);
psubq(dst, src1);
pcmpeqd(scratch, src0);
andps(dst, scratch);
movaps(scratch, src1);
pcmpgtd(scratch, src0);
orps(dst, scratch);
movshdup(dst, dst);
pcmpeqd(scratch, scratch);
xorps(dst, scratch);
}
}
void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) { void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) {
DCHECK_GE(63, shift); DCHECK_GE(63, shift);
if (shift >= 32) { if (shift >= 32) {
......
...@@ -671,6 +671,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -671,6 +671,10 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
XMMRegister scratch, Register tmp); XMMRegister scratch, Register tmp);
void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src, void I32x4TruncSatF64x2UZero(XMMRegister dst, XMMRegister src,
XMMRegister scratch, Register tmp); XMMRegister scratch, Register tmp);
void I64x2GtS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
XMMRegister scratch);
void I64x2GeS(XMMRegister dst, XMMRegister src0, XMMRegister src1,
XMMRegister scratch);
void Push(Register src) { push(src); } void Push(Register src) { push(src); }
void Push(Operand src) { push(src); } void Push(Operand src) { push(src); }
......
...@@ -2259,66 +2259,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2259,66 +2259,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kIA32I64x2GtS: { case kIA32I64x2GtS: {
XMMRegister dst = i.OutputSimd128Register(); __ I64x2GtS(i.OutputSimd128Register(), i.InputSimd128Register(0),
XMMRegister src0 = i.InputSimd128Register(0); i.InputSimd128Register(1), kScratchDoubleReg);
XMMRegister src1 = i.InputSimd128Register(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtq(dst, src0, src1);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(tasm(), SSE4_2);
DCHECK_EQ(dst, src0);
__ pcmpgtq(dst, src1);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
__ movdqa(dst, src1);
__ movdqa(kScratchDoubleReg, src0);
__ psubq(dst, src0);
__ pcmpeqd(kScratchDoubleReg, src1);
__ pand(dst, kScratchDoubleReg);
__ movdqa(kScratchDoubleReg, src0);
__ pcmpgtd(kScratchDoubleReg, src1);
__ por(dst, kScratchDoubleReg);
__ pshufd(dst, dst, 0xF5);
}
break; break;
} }
case kIA32I64x2GeS: { case kIA32I64x2GeS: {
XMMRegister dst = i.OutputSimd128Register(); __ I64x2GeS(i.OutputSimd128Register(), i.InputSimd128Register(0),
XMMRegister src0 = i.InputSimd128Register(0); i.InputSimd128Register(1), kScratchDoubleReg);
XMMRegister src1 = i.InputSimd128Register(1);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vpcmpgtq(dst, src1, src0);
__ vpcmpeqd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
__ vpxor(dst, dst, kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
CpuFeatureScope sse_scope(tasm(), SSE4_2);
DCHECK_NE(dst, src0);
if (dst != src1) {
__ movdqa(dst, src1);
}
__ pcmpgtq(dst, src0);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
} else {
DCHECK_NE(dst, src0);
DCHECK_NE(dst, src1);
__ movdqa(dst, src0);
__ movdqa(kScratchDoubleReg, src1);
__ psubq(dst, src1);
__ pcmpeqd(kScratchDoubleReg, src0);
__ pand(dst, kScratchDoubleReg);
__ movdqa(kScratchDoubleReg, src1);
__ pcmpgtd(kScratchDoubleReg, src0);
__ por(dst, kScratchDoubleReg);
__ pshufd(dst, dst, 0xF5);
__ pcmpeqd(kScratchDoubleReg, kScratchDoubleReg);
__ pxor(dst, kScratchDoubleReg);
}
break; break;
} }
case kIA32I64x2SConvertI32x4Low: { case kIA32I64x2SConvertI32x4Low: {
......
...@@ -3139,12 +3139,57 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs, ...@@ -3139,12 +3139,57 @@ void LiftoffAssembler::emit_i64x2_ne(LiftoffRegister dst, LiftoffRegister lhs,
void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i64x2_gt_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
bailout(kSimd, "i64x2.gt_s"); // Different register alias requirements depending on CpuFeatures supported:
if (CpuFeatures::IsSupported(AVX)) {
// 1. AVX, no requirements.
I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
// 2. SSE4_2, dst == lhs.
if (dst != lhs) {
movdqa(dst.fp(), lhs.fp());
}
I64x2GtS(dst.fp(), dst.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
} else {
// 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
if (dst == lhs || dst == rhs) {
LiftoffRegister tmp = GetUnusedRegister(
RegClass::kFpReg, LiftoffRegList::ForRegs(lhs, rhs));
I64x2GtS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
movaps(dst.fp(), tmp.fp());
} else {
I64x2GtS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
}
}
} }
void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_i64x2_ge_s(LiftoffRegister dst, LiftoffRegister lhs,
LiftoffRegister rhs) { LiftoffRegister rhs) {
bailout(kSimd, "i64x2.ge_s"); // Different register alias requirements depending on CpuFeatures supported:
if (CpuFeatures::IsSupported(AVX)) {
// 1. AVX, no requirements.
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
} else if (CpuFeatures::IsSupported(SSE4_2)) {
// 2. SSE4_2, dst != lhs.
if (dst == lhs) {
LiftoffRegister tmp = GetUnusedRegister(RegClass::kFpReg, {rhs},
LiftoffRegList::ForRegs(lhs));
// macro-assembler uses kScratchDoubleReg, so don't use it.
I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
movdqa(dst.fp(), tmp.fp());
} else {
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
}
} else {
// 3. Else, dst != lhs && dst != rhs (lhs == rhs is ok).
if (dst == lhs || dst == rhs) {
LiftoffRegister tmp = GetUnusedRegister(
RegClass::kFpReg, LiftoffRegList::ForRegs(lhs, rhs));
I64x2GeS(tmp.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
movaps(dst.fp(), tmp.fp());
} else {
I64x2GeS(dst.fp(), lhs.fp(), rhs.fp(), liftoff::kScratchDoubleReg);
}
}
} }
void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs, void LiftoffAssembler::emit_f32x4_eq(LiftoffRegister dst, LiftoffRegister lhs,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment