Commit b477d91c authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

Reland "[wasm-simd] Implement F64x2ConvertI64x2 for x64"

This is a reland of 306bb635

Original change's description:
> [wasm-simd] Implement F64x2ConvertI64x2 for x64
> 
> Bug: v8:8460
> Change-Id: Icefb90c67af77ac93bd75b4e452ba426232de83a
> Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1710332
> Commit-Queue: Zhi An Ng <zhin@chromium.org>
> Reviewed-by: Michael Starzinger <mstarzinger@chromium.org>
> Reviewed-by: Bill Budge <bbudge@chromium.org>
> Reviewed-by: Deepti Gandluri <gdeepti@chromium.org>
> Cr-Commit-Position: refs/heads/master@{#63627}

Bug: v8:8460
Change-Id: I08d2c88e81ce51d3d1cfdf3d7d6ba34792e34e9e
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1793902Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#64365}
parent a0b95232
......@@ -4669,6 +4669,14 @@ void Assembler::movups(Operand dst, XMMRegister src) {
emit_sse_operand(src, dst);
}
void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
EnsureSpace ensure_space(this);
emit_optional_rex_32(dst, src);
emit(0x0F);
emit(0x16);
emit_sse_operand(dst, src);
}
void Assembler::sse2_instr(XMMRegister dst, XMMRegister src, byte prefix,
byte escape, byte opcode) {
EnsureSpace ensure_space(this);
......
......@@ -1153,6 +1153,8 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
void cvtdq2ps(XMMRegister dst, XMMRegister src);
void cvtdq2ps(XMMRegister dst, Operand src);
void movlhps(XMMRegister dst, XMMRegister src);
// AVX instruction
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmasd(0x99, dst, src1, src2);
......
......@@ -1855,6 +1855,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsFloat64(node), VisitF64x2ExtractLane(node);
case IrOpcode::kF64x2ReplaceLane:
return MarkAsSimd128(node), VisitF64x2ReplaceLane(node);
case IrOpcode::kF64x2SConvertI64x2:
return MarkAsSimd128(node), VisitF64x2SConvertI64x2(node);
case IrOpcode::kF64x2UConvertI64x2:
return MarkAsSimd128(node), VisitF64x2UConvertI64x2(node);
case IrOpcode::kF64x2Abs:
return MarkAsSimd128(node), VisitF64x2Abs(node);
case IrOpcode::kF64x2Neg:
......@@ -2636,6 +2640,12 @@ void InstructionSelector::VisitI64x2ReplaceLaneI32Pair(Node* node) {
#endif // !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF64x2SConvertI64x2(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
UNIMPLEMENTED();
}
#if !V8_TARGET_ARCH_ARM64
#if !V8_TARGET_ARCH_IA32
void InstructionSelector::VisitF64x2Abs(Node* node) { UNIMPLEMENTED(); }
......
......@@ -2286,6 +2286,40 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64F64x2SConvertI64x2: {
XMMRegister dst = i.OutputSimd128Register();
Register tmp1 = i.TempRegister(0);
Register tmp2 = i.TempRegister(1);
DCHECK_EQ(dst, i.InputSimd128Register(0));
// Move low quardword into tmp1, high quadword into tmp2.
__ movq(tmp1, dst);
__ pextrq(tmp2, dst, 1);
// Convert tmp2, then copy from low to high quadword of dst.
__ cvtqsi2sd(dst, tmp2);
__ movlhps(dst, dst);
// Finally convert tmp1.
__ cvtqsi2sd(dst, tmp1);
break;
}
case kX64F64x2UConvertI64x2: {
XMMRegister dst = i.OutputSimd128Register();
Register tmp = i.TempRegister(0);
XMMRegister tmp_xmm = i.TempSimd128Register(1);
DCHECK_EQ(dst, i.InputSimd128Register(0));
// Extract high quardword.
__ pextrq(tmp, dst, 1);
// We cannot convert directly into dst, as the next call to Cvtqui2sd will
// zero it out, so be careful to make sure dst is unique to tmp_xmm.
__ Cvtqui2sd(tmp_xmm, tmp);
// Extract low quadword and convert.
__ movq(tmp, dst);
__ Cvtqui2sd(dst, tmp);
// Move converted high quadword to top of dst.
__ movlhps(dst, tmp_xmm);
break;
}
case kX64F64x2ExtractLane: {
CpuFeatureScope sse_scope(tasm(), SSE4_1);
__ pextrq(kScratchRegister, i.InputSimd128Register(0), i.InputInt8(1));
......
......@@ -158,6 +158,8 @@ namespace compiler {
V(X64F64x2Splat) \
V(X64F64x2ExtractLane) \
V(X64F64x2ReplaceLane) \
V(X64F64x2SConvertI64x2) \
V(X64F64x2UConvertI64x2) \
V(X64F64x2Abs) \
V(X64F64x2Neg) \
V(X64F64x2Sqrt) \
......
......@@ -127,6 +127,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Splat:
case kX64F64x2ExtractLane:
case kX64F64x2ReplaceLane:
case kX64F64x2SConvertI64x2:
case kX64F64x2UConvertI64x2:
case kX64F64x2Abs:
case kX64F64x2Neg:
case kX64F64x2Sqrt:
......
......@@ -2895,6 +2895,21 @@ void InstructionSelector::VisitF64x2Neg(Node* node) {
arraysize(temps), temps);
}
void InstructionSelector::VisitF64x2SConvertI64x2(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister(), g.TempRegister()};
Emit(kX64F64x2SConvertI64x2, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)), arraysize(temps), temps);
}
void InstructionSelector::VisitF64x2UConvertI64x2(Node* node) {
X64OperandGenerator g(this);
InstructionOperand temps[] = {g.TempRegister(), g.TempSimd128Register()};
// Need dst to be unique to temp because Cvtqui2sd will zero temp.
Emit(kX64F64x2UConvertI64x2, g.DefineSameAsFirst(node),
g.UseUniqueRegister(node->InputAt(0)), arraysize(temps), temps);
}
void InstructionSelector::VisitF32x4UConvertI32x4(Node* node) {
X64OperandGenerator g(this);
Emit(kX64F32x4UConvertI32x4, g.DefineSameAsFirst(node),
......
......@@ -254,6 +254,8 @@ MachineType AtomicOpType(Operator const* op) {
V(Word32PairShr, Operator::kNoProperties, 3, 0, 2) \
V(Word32PairSar, Operator::kNoProperties, 3, 0, 2) \
V(F64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(F64x2SConvertI64x2, Operator::kNoProperties, 1, 0, 1) \
V(F64x2UConvertI64x2, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Abs, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Neg, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Sqrt, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -481,6 +481,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
// SIMD operators.
const Operator* F64x2Splat();
const Operator* F64x2SConvertI64x2();
const Operator* F64x2UConvertI64x2();
const Operator* F64x2Abs();
const Operator* F64x2Neg();
const Operator* F64x2Sqrt();
......
......@@ -751,6 +751,8 @@
#define MACHINE_SIMD_OP_LIST(V) \
V(F64x2Splat) \
V(F64x2SConvertI64x2) \
V(F64x2UConvertI64x2) \
V(F64x2ExtractLane) \
V(F64x2ReplaceLane) \
V(F64x2Abs) \
......
......@@ -4012,6 +4012,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
switch (opcode) {
case wasm::kExprF64x2Splat:
return graph()->NewNode(mcgraph()->machine()->F64x2Splat(), inputs[0]);
case wasm::kExprF64x2SConvertI64x2:
return graph()->NewNode(mcgraph()->machine()->F64x2SConvertI64x2(),
inputs[0]);
case wasm::kExprF64x2UConvertI64x2:
return graph()->NewNode(mcgraph()->machine()->F64x2UConvertI64x2(),
inputs[0]);
case wasm::kExprF64x2Abs:
return graph()->NewNode(mcgraph()->machine()->F64x2Abs(), inputs[0]);
case wasm::kExprF64x2Neg:
......
......@@ -2186,6 +2186,12 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
AppendToBuffer("%s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
}
} else if (opcode == 0x16) {
// movlhps xmm1, xmm2
int mod, regop, rm;
get_modrm(*current, &mod, &regop, &rm);
AppendToBuffer("movlhps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
} else if (opcode == 0x1F) {
// NOP
int mod, regop, rm;
......
......@@ -2540,6 +2540,10 @@ class ThreadImpl {
Push(WasmValue(Simd128(res))); \
return true; \
}
CONVERT_CASE(F64x2SConvertI64x2, int2, i64x2, float2, 2, 0, int64_t,
static_cast<double>(a))
CONVERT_CASE(F64x2UConvertI64x2, int2, i64x2, float2, 2, 0, uint64_t,
static_cast<double>(a))
CONVERT_CASE(F32x4SConvertI32x4, int4, i32x4, float4, 4, 0, int32_t,
static_cast<float>(a))
CONVERT_CASE(F32x4UConvertI32x4, int4, i32x4, float4, 4, 0, uint32_t,
......
......@@ -260,6 +260,7 @@ const char* WasmOpcodes::OpcodeName(WasmOpcode opcode) {
CASE_F32x4_OP(Le, "le")
CASE_F32x4_OP(Gt, "gt")
CASE_F32x4_OP(Ge, "ge")
CASE_CONVERT_OP(Convert, F64x2, I64x2, "i64", "convert")
CASE_CONVERT_OP(Convert, F32x4, I32x4, "i32", "convert")
CASE_CONVERT_OP(Convert, I32x4, F32x4, "f32", "convert")
CASE_CONVERT_OP(Convert, I32x4, I16x8Low, "i32", "convert")
......
......@@ -421,6 +421,8 @@ bool IsJSCompatibleSignature(const FunctionSig* sig, const WasmFeatures&);
V(F32x4SConvertI32x4, 0xfdaf, s_s) \
V(F32x4UConvertI32x4, 0xfdb0, s_s) \
V(S8x16Swizzle, 0xfdc0, s_ss) \
V(F64x2SConvertI64x2, 0xfdb1, s_s) \
V(F64x2UConvertI64x2, 0xfdb2, s_s) \
V(I8x16SConvertI16x8, 0xfdc6, s_ss) \
V(I8x16UConvertI16x8, 0xfdc7, s_ss) \
V(I16x8SConvertI32x4, 0xfdc8, s_ss) \
......
......@@ -594,6 +594,7 @@ TEST(DisasmX64) {
__ movups(xmm5, xmm1);
__ movups(xmm5, Operand(rdx, 4));
__ movups(Operand(rdx, 4), xmm5);
__ movlhps(xmm5, xmm1);
__ pmulld(xmm5, xmm1);
__ pmulld(xmm5, Operand(rdx, 4));
__ pmullw(xmm5, xmm1);
......
......@@ -1505,6 +1505,34 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2Qfms) {
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_IA32
#if V8_TARGET_ARCH_X64
WASM_SIMD_TEST_NO_LOWERING(F64x2ConvertI64x2) {
WasmRunner<int32_t, int64_t> r(execution_tier, lower_simd);
// Create two output vectors to hold signed and unsigned results.
double* g0 = r.builder().AddGlobal<double>(kWasmS128);
double* g1 = r.builder().AddGlobal<double>(kWasmS128);
// Build fn to splat test value, perform conversions, and write the results.
byte value = 0;
byte temp1 = r.AllocateLocal(kWasmS128);
BUILD(r, WASM_SET_LOCAL(temp1, WASM_SIMD_I64x2_SPLAT(WASM_GET_LOCAL(value))),
WASM_SET_GLOBAL(
0, WASM_SIMD_UNOP(kExprF64x2SConvertI64x2, WASM_GET_LOCAL(temp1))),
WASM_SET_GLOBAL(
1, WASM_SIMD_UNOP(kExprF64x2UConvertI64x2, WASM_GET_LOCAL(temp1))),
WASM_ONE);
FOR_INT64_INPUTS(x) {
r.Call(x);
double expected_signed = static_cast<double>(x);
double expected_unsigned = static_cast<double>(static_cast<uint64_t>(x));
for (int i = 0; i < 2; i++) {
CHECK_EQ(expected_signed, ReadLittleEndianValue<double>(&g0[i]));
CHECK_EQ(expected_unsigned, ReadLittleEndianValue<double>(&g1[i]));
}
}
}
#endif // V8_TARGET_ARCH_X64
WASM_SIMD_TEST(I32x4Splat) {
WasmRunner<int32_t, int32_t> r(execution_tier, lower_simd);
// Set up a global to hold output vector.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment