Commit 306bb635 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd] Implement F64x2ConvertI64x2 for x64

Bug: v8:8460
Change-Id: Icefb90c67af77ac93bd75b4e452ba426232de83a
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1710332
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#63627}
parent db3cc4a2
......@@ -4625,6 +4625,14 @@ void Assembler::movups(Operand dst, XMMRegister src) {
emit_sse_operand(src, dst);
}
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x16);
emit_sse_operand(dst, src);
}
void Assembler::sse2_instr(XMMRegister dst, XMMRegister src, byte prefix,
byte escape, byte opcode) {
EnsureSpace ensure_space(this);
......
......@@ -1148,6 +1148,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void cvtdq2ps(XMMRegister dst, XMMRegister src);
void cvtdq2ps(XMMRegister dst, Operand src);
void movlhps(XMMRegister dst, XMMRegister src);
// AVX instruction
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x99, dst, src1, src2);
......
......@@ -1853,6 +1853,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsFloat64(node), VisitF64x2ExtractLane(node);
case IrOpcode::kF64x2ReplaceLane:
return MarkAsSimd128(node), VisitF64x2ReplaceLane(node);
case IrOpcode::kF64x2SConvertI64x2:
return MarkAsSimd128(node), VisitF64x2SConvertI64x2(node);
case IrOpcode::kF64x2UConvertI64x2:
return MarkAsSimd128(node), VisitF64x2UConvertI64x2(node);
case IrOpcode::kF64x2Abs:
return MarkAsSimd128(node), VisitF64x2Abs(node);
case IrOpcode::kF64x2Neg:
......@@ -2606,6 +2610,12 @@ void InstructionSelector::VisitWord64AtomicCompareExchange(Node* node) {
// !V8_TARGET_ARCH_MIPS64 && !V8_TARGET_ARCH_S390
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF64x2SConvertI64x2(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
UNIMPLEMENTED();
}
#if !V8_TARGET_ARCH_ARM64
void InstructionSelector::VisitF64x2Splat(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2ExtractLane(Node* node) { UNIMPLEMENTED(); }
......
......@@ -2287,6 +2287,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64F64x2SConvertI64x2: {
XMMRegister dst = i.OutputSimd128Register();
Register tmp1 = i.TempRegister(0);
Register tmp2 = i.TempRegister(1);
DCHECK_EQ(dst, i.InputSimd128Register(0));
// Move low quardword into tmp1, high quadword into tmp2.
__ movq(tmp1, dst);
__ pextrq(tmp2, dst, 1);
// Convert tmp2, then copy from low to high quadword of dst.
__ cvtqsi2sd(dst, tmp2);
__ movlhps(dst, dst);
// Finally convert tmp1.
__ cvtqsi2sd(dst, tmp1);
break;
}
case kX64F64x2UConvertI64x2: {
XMMRegister dst = i.OutputSimd128Register();
Register tmp = i.TempRegister(0);
XMMRegister tmp_xmm = i.TempSimd128Register(1);
DCHECK_EQ(dst, i.InputSimd128Register(0));
// Extract high quardword.
__ pextrq(tmp, dst, 1);
// We cannot convert directly into dst, as the next call to Cvtqui2sd will
// zero it out, so be careful to make sure dst is unique to tmp_xmm.
__ Cvtqui2sd(tmp_xmm, tmp);
// Extract low quadword and convert.
__ movq(tmp, dst);
__ Cvtqui2sd(dst, tmp);
// Move converted high quadword to top of dst.
__ movlhps(dst, tmp_xmm);
break;
}
case kX64F64x2ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
......
......@@ -158,6 +158,8 @@ namespace compiler {
V(X64F64x2Splat) \
V(X64F64x2ExtractLane) \
V(X64F64x2ReplaceLane) \
V(X64F64x2SConvertI64x2) \
V(X64F64x2UConvertI64x2) \
V(X64F64x2Abs) \
V(X64F64x2Neg) \
V(X64F64x2Add) \
......
......@@ -127,6 +127,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Splat:
case kX64F64x2ExtractLane:
case kX64F64x2ReplaceLane:
case kX64F64x2SConvertI64x2:
case kX64F64x2UConvertI64x2:
case kX64F64x2Abs:
case kX64F64x2Neg:
case kX64F64x2Add:
......
......@@ -2866,6 +2866,21 @@ void InstructionSelector::VisitF64x2Neg(Node* node) {
arraysize(temps), temps);
}
void InstructionSelector::VisitF64x2SConvertI64x2(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
Emit(kX64F64x2SConvertI64x2, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
}
void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
// Need dst to be unique to temp because Cvtqui2sd will zero temp.
Emit(kX64F64x2UConvertI64x2, g.DefineSameAsFirst(node),
g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);
}
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
X64OperandGenerator g(this);
Emit(kX64F32x4UConvertI32x4, g.DefineSameAsFirst(node),
......
......@@ -253,6 +253,8 @@ MachineType AtomicOpType(Operator const* op) {
V(Word32PairShr, Operator::kNoProperties, 3, 0, 2) \
V(Word32PairSar, Operator::kNoProperties, 3, 0, 2) \
V(F64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(F64x2SConvertI64x2, Operator::kNoProperties, 1, 0, 1) \
V(F64x2UConvertI64x2, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Abs, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Neg, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Add, Operator::kCommutative, 2, 0, 1) \
......
......@@ -475,6 +475,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
// SIMD operators.
const Operator* F64x2Splat();
const Operator* F64x2SConvertI64x2();
const Operator* F64x2UConvertI64x2();
const Operator* F64x2Abs();
const Operator* F64x2Neg();
const Operator* F64x2Add();
......
......@@ -745,6 +745,8 @@
#define MACHINE_SIMD_OP_LIST(V) \
V(F64x2Splat) \
V(F64x2SConvertI64x2) \
V(F64x2UConvertI64x2) \
V(F64x2ExtractLane) \
V(F64x2ReplaceLane) \
V(F64x2Abs) \
......
......@@ -4004,6 +4004,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
switch (opcode) {
case wasm::kExprF64x2Splat:
return graph()->NewNode(mcgraph()->machine()->F64x2Splat(), inputs[0]);
case wasm::kExprF64x2SConvertI64x2:
return graph()->NewNode(mcgraph()->machine()->F64x2SConvertI64x2(),
inputs[0]);
case wasm::kExprF64x2UConvertI64x2:
return graph()->NewNode(mcgraph()->machine()->F64x2UConvertI64x2(),
inputs[0]);
case wasm::kExprF64x2Abs:
return graph()->NewNode(mcgraph()->machine()->F64x2Abs(), inputs[0]);
case wasm::kExprF64x2Neg:
......
......@@ -2174,6 +2174,12 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
AppendToBuffer("%s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
}
} else if (opcode == 0x16) {
// movlhps xmm1, xmm2
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("movlhps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x1F) {
// NOP
int mod, regop, rm;
......
......@@ -2475,6 +2475,10 @@ class ThreadImpl {
Push(WasmValue(Simd128(res))); \
return true; \
}
CONVERT_CASE(F64x2SConvertI64x2, int2, i64x2, float2, 2, 0, int64_t,
static_cast<double>(a))
CONVERT_CASE(F64x2UConvertI64x2, int2, i64x2, float2, 2, 0, uint64_t,
static_cast<double>(a))
CONVERT_CASE(F32x4SConvertI32x4, int4, i32x4, float4, 4, 0, int32_t,
static_cast<float>(a))
CONVERT_CASE(F32x4UConvertI32x4, int4, i32x4, float4, 4, 0, uint32_t,
......
......@@ -258,6 +258,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F32x4_OP(Le, "le")
CASE_F32x4_OP(Gt, "gt")
CASE_F32x4_OP(Ge, "ge")
CASE_CONVERT_OP(Convert, F64x2, I64x2, "i64", "convert")
CASE_CONVERT_OP(Convert, F32x4, I32x4, "i32", "convert")
CASE_CONVERT_OP(Convert, I32x4, F32x4, "f32", "convert")
CASE_CONVERT_OP(Convert, I32x4, I16x8Low, "i32", "convert")
......
......@@ -416,6 +416,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(I32x4UConvertF32x4, 0xfdac, s_s) \
V(F32x4SConvertI32x4, 0xfdaf, s_s) \
V(F32x4UConvertI32x4, 0xfdb0, s_s) \
V(F64x2SConvertI64x2, 0xfdb1, s_s) \
V(F64x2UConvertI64x2, 0xfdb2, s_s) \
V(I8x16SConvertI16x8, 0xfdc6, s_ss) \
V(I8x16UConvertI16x8, 0xfdc7, s_ss) \
V(I16x8SConvertI32x4, 0xfdc8, s_ss) \
......
......@@ -592,6 +592,7 @@ TEST(DisasmX64) {
__ movups(xmm5, xmm1);
__ movups(xmm5, Operand(rdx, 4));
__ movups(Operand(rdx, 4), xmm5);
__ movlhps(xmm5, xmm1);
__ pmulld(xmm5, xmm1);
__ pmulld(xmm5, Operand(rdx, 4));
__ pmullw(xmm5, xmm1);
......
......@@ -1272,6 +1272,32 @@ WASM_SIMD_TEST_NO_LOWERING(I64x2MaxU) {
RunI64x2BinOpTest(execution_tier, lower_simd, kExprI64x2MaxU,
UnsignedMaximum);
}
WASM_SIMD_TEST_NO_LOWERING(F64x2ConvertI64x2) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
// Create two output vectors to hold signed and unsigned results.
double* g0 = r.builder().AddGlobal<double>(kWasmS128);
double* g1 = r.builder().AddGlobal<double>(kWasmS128);
// Build fn to splat test value, perform conversions, and write the results.
byte value = 0;
byte temp1 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_I64x2_SPLAT(WASM_GET_LOCAL(value))),
WASM_SET_GLOBAL(
0, WASM_SIMD_UNOP(kExprF64x2SConvertI64x2, WASM_GET_LOCAL(temp1))),
WASM_SET_GLOBAL(
1, WASM_SIMD_UNOP(kExprF64x2UConvertI64x2, WASM_GET_LOCAL(temp1))),
WASM_ONE);
FOR_INT64_INPUTS(x) {
r.Call(x);
double expected_signed = static_cast<double>(x);
double expected_unsigned = static_cast<double>(static_cast<uint64_t>(x));
for (int i = 0; i < 2; i++) {
CHECK_EQ(expected_signed, ReadLittleEndianValue<double>(&g0[i]));
CHECK_EQ(expected_unsigned, ReadLittleEndianValue<double>(&g1[i]));
}
}
}
#endif // V8_TARGET_ARCH_X64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment