Commit 3097bf78 authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][x64] Prototype double precision conversion

Prototype these 6 instructions on x64:

- f64x2.convert_low_i32x4_s
- f64x2.convert_low_i32x4_u
- i32x4.trunc_sat_f64x2_s_zero
- i32x4.trunc_sat_f64x2_u_zero
- f32x4.demote_f64x2_zero
- f64x2.promote_low_f32x4

Some of these code sequences make use of special masks, we keep them in
external references.

Code sequence based on suggestions at:
https://github.com/WebAssembly/simd/pull/383

Bug: v8:11265
Change-Id: Ied67d7b5b6beaaccac7c179ec13504482cb9c915
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2643562Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Reviewed-by: 's avatarGeorg Neis <neis@chromium.org>
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Cr-Commit-Position: refs/heads/master@{#72297}
parent 27771482
......@@ -81,6 +81,30 @@ constexpr struct alignas(16) {
} wasm_i8x16_popcnt_mask = {uint64_t{0x03020201'02010100},
uint64_t{0x04030302'03020201}};
constexpr struct alignas(16) {
uint64_t a;
uint64_t b;
} wasm_f64x2_convert_low_i32x4_u_int_mask = {uint64_t{0x4330000043300000},
uint64_t{0x4330000043300000}};
constexpr struct alignas(16) {
uint64_t a;
uint64_t b;
} wasm_double_2_power_52 = {uint64_t{0x4330000000000000},
uint64_t{0x4330000000000000}};
constexpr struct alignas(16) {
uint64_t a;
uint64_t b;
} wasm_int32_max_as_double = {uint64_t{0x41dfffffffc00000},
uint64_t{0x41dfffffffc00000}};
constexpr struct alignas(16) {
uint64_t a;
uint64_t b;
} wasm_uint32_max_as_double = {uint64_t{0x41efffffffe00000},
uint64_t{0x41efffffffe00000}};
// Implementation of ExternalReference
static ExternalReference::Type BuiltinCallTypeForResultSize(int result_size) {
......@@ -490,6 +514,26 @@ ExternalReference ExternalReference::address_of_wasm_i8x16_popcnt_mask() {
return ExternalReference(reinterpret_cast<Address>(&wasm_i8x16_popcnt_mask));
}
ExternalReference
ExternalReference::address_of_wasm_f64x2_convert_low_i32x4_u_int_mask() {
return ExternalReference(
reinterpret_cast<Address>(&wasm_f64x2_convert_low_i32x4_u_int_mask));
}
ExternalReference ExternalReference::address_of_wasm_double_2_power_52() {
return ExternalReference(reinterpret_cast<Address>(&wasm_double_2_power_52));
}
ExternalReference ExternalReference::address_of_wasm_int32_max_as_double() {
return ExternalReference(
reinterpret_cast<Address>(&wasm_int32_max_as_double));
}
ExternalReference ExternalReference::address_of_wasm_uint32_max_as_double() {
return ExternalReference(
reinterpret_cast<Address>(&wasm_uint32_max_as_double));
}
ExternalReference
ExternalReference::address_of_enable_experimental_regexp_engine() {
return ExternalReference(&FLAG_enable_experimental_regexp_engine);
......
......@@ -234,6 +234,11 @@ class StatsCounter;
V(wasm_memory_init, "wasm::memory_init") \
V(wasm_memory_copy, "wasm::memory_copy") \
V(wasm_memory_fill, "wasm::memory_fill") \
V(address_of_wasm_f64x2_convert_low_i32x4_u_int_mask, \
"wasm_f64x2_convert_low_i32x4_u_int_mask") \
V(address_of_wasm_double_2_power_52, "wasm_double_2_power_52") \
V(address_of_wasm_int32_max_as_double, "wasm_int32_max_as_double") \
V(address_of_wasm_uint32_max_as_double, "wasm_uint32_max_as_double") \
V(write_barrier_marking_from_code_function, "WriteBarrier::MarkingFromCode") \
V(call_enqueue_microtask_function, "MicrotaskQueue::CallEnqueueMicrotask") \
V(call_enter_context_function, "call_enter_context_function") \
......@@ -254,7 +259,6 @@ class StatsCounter;
"ExperimentalRegExp::MatchForCallFromJs") \
EXTERNAL_REFERENCE_LIST_INTL(V) \
EXTERNAL_REFERENCE_LIST_HEAP_SANDBOX(V)
#ifdef V8_INTL_SUPPORT
#define EXTERNAL_REFERENCE_LIST_INTL(V) \
V(intl_convert_one_byte_to_lower, "intl_convert_one_byte_to_lower") \
......
......@@ -162,6 +162,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Addss, addss)
AVX_OP(Addsd, addsd)
AVX_OP(Mulsd, mulsd)
AVX_OP(Unpcklps, unpcklps)
AVX_OP(Andps, andps)
AVX_OP(Andnps, andnps)
AVX_OP(Andpd, andpd)
......@@ -184,6 +185,7 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Sqrtps, sqrtps)
AVX_OP(Sqrtpd, sqrtpd)
AVX_OP(Cvttps2dq, cvttps2dq)
AVX_OP(Cvttpd2dq, cvttpd2dq)
AVX_OP(Ucomiss, ucomiss)
AVX_OP(Ucomisd, ucomisd)
AVX_OP(Pand, pand)
......@@ -227,6 +229,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
AVX_OP(Maxps, maxps)
AVX_OP(Maxpd, maxpd)
AVX_OP(Cvtdq2ps, cvtdq2ps)
AVX_OP(Cvtdq2pd, cvtdq2pd)
AVX_OP(Cvtpd2ps, cvtpd2ps)
AVX_OP(Cvtps2pd, cvtps2pd)
AVX_OP(Rcpps, rcpps)
AVX_OP(Rsqrtps, rsqrtps)
AVX_OP(Addps, addps)
......
......@@ -10,10 +10,12 @@
V(sqrtps, 0F, 51) \
V(rsqrtps, 0F, 52) \
V(rcpps, 0F, 53) \
V(cvtps2pd, 0F, 5A) \
V(cvtdq2ps, 0F, 5B)
// SSE instructions whose AVX version has three operands.
#define SSE_BINOP_INSTRUCTION_LIST(V) \
V(unpcklps, 0F, 14) \
V(andps, 0F, 54) \
V(andnps, 0F, 55) \
V(orps, 0F, 56) \
......@@ -108,7 +110,9 @@
#define SSE2_UNOP_INSTRUCTION_LIST(V) \
V(ucomisd, 66, 0F, 2E) \
V(sqrtpd, 66, 0F, 51) \
V(cvtps2dq, 66, 0F, 5B)
V(cvtpd2ps, 66, 0F, 5A) \
V(cvtps2dq, 66, 0F, 5B) \
V(cvttpd2dq, 66, 0F, E6)
// SSE2 shift instructions with an immediate operand. The last element is the
// extension to the opcode.
......@@ -131,7 +135,8 @@
V(subsd, F2, 0F, 5C) \
V(minsd, F2, 0F, 5D) \
V(divsd, F2, 0F, 5E) \
V(maxsd, F2, 0F, 5F)
V(maxsd, F2, 0F, 5F) \
V(cvtdq2pd, F3, 0F, E6)
#define SSSE3_INSTRUCTION_LIST(V) \
V(pshufb, 66, 0F, 38, 00) \
......
......@@ -1946,6 +1946,12 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF64x2Trunc(node);
case IrOpcode::kF64x2NearestInt:
return MarkAsSimd128(node), VisitF64x2NearestInt(node);
case IrOpcode::kF64x2ConvertLowI32x4S:
return MarkAsSimd128(node), VisitF64x2ConvertLowI32x4S(node);
case IrOpcode::kF64x2ConvertLowI32x4U:
return MarkAsSimd128(node), VisitF64x2ConvertLowI32x4U(node);
case IrOpcode::kF64x2PromoteLowF32x4:
return MarkAsSimd128(node), VisitF64x2PromoteLowF32x4(node);
case IrOpcode::kF32x4Splat:
return MarkAsSimd128(node), VisitF32x4Splat(node);
case IrOpcode::kF32x4ExtractLane:
......@@ -2004,6 +2010,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitF32x4Trunc(node);
case IrOpcode::kF32x4NearestInt:
return MarkAsSimd128(node), VisitF32x4NearestInt(node);
case IrOpcode::kF32x4DemoteF64x2Zero:
return MarkAsSimd128(node), VisitF32x4DemoteF64x2Zero(node);
case IrOpcode::kI64x2Splat:
return MarkAsSimd128(node), VisitI64x2Splat(node);
case IrOpcode::kI64x2SplatI32Pair:
......@@ -2124,6 +2132,10 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsSimd128(node), VisitI32x4ExtAddPairwiseI16x8S(node);
case IrOpcode::kI32x4ExtAddPairwiseI16x8U:
return MarkAsSimd128(node), VisitI32x4ExtAddPairwiseI16x8U(node);
case IrOpcode::kI32x4TruncSatF64x2SZero:
return MarkAsSimd128(node), VisitI32x4TruncSatF64x2SZero(node);
case IrOpcode::kI32x4TruncSatF64x2UZero:
return MarkAsSimd128(node), VisitI32x4TruncSatF64x2UZero(node);
case IrOpcode::kI16x8Splat:
return MarkAsSimd128(node), VisitI16x8Splat(node);
case IrOpcode::kI16x8ExtractLaneU:
......@@ -2798,6 +2810,27 @@ void InstructionSelector::VisitI64x2SignSelect(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_IA32 && !V8_TARGET_ARCH_ARM64
// && !V8_TARGET_ARCH_ARM
#if !V8_TARGET_ARCH_X64
void InstructionSelector::VisitF64x2ConvertLowI32x4S(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF64x2ConvertLowI32x4U(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF64x2PromoteLowF32x4(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitF32x4DemoteF64x2Zero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
UNIMPLEMENTED();
}
void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
UNIMPLEMENTED();
}
#endif //! V8_TARGET_ARCH_X64
void InstructionSelector::VisitFinishRegion(Node* node) { EmitIdentity(node); }
void InstructionSelector::VisitParameter(Node* node) {
......
......@@ -7,6 +7,7 @@
#include "src/base/overflowing-math.h"
#include "src/codegen/assembler.h"
#include "src/codegen/cpu-features.h"
#include "src/codegen/external-reference.h"
#include "src/codegen/macro-assembler.h"
#include "src/codegen/optimized-compilation-info.h"
#include "src/codegen/x64/assembler-x64.h"
......@@ -2480,6 +2481,104 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
break;
}
case kX64F64x2ConvertLowI32x4S: {
__ Cvtdq2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F64x2ConvertLowI32x4U: {
XMMRegister dst = i.OutputSimd128Register();
// dst = [ src_low, 0x43300000, src_high, 0x4330000 ];
// 0x43300000'00000000 is a special double where the significand bits
// precisely represents all uint32 numbers.
__ Unpcklps(
dst, __ ExternalReferenceAsOperand(
ExternalReference::
address_of_wasm_f64x2_convert_low_i32x4_u_int_mask()));
__ Subpd(dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
break;
}
case kX64F64x2PromoteLowF32x4: {
__ Cvtps2pd(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64F32x4DemoteF64x2Zero: {
__ Cvtpd2ps(i.OutputSimd128Register(), i.InputSimd128Register(0));
break;
}
case kX64I32x4TruncSatF64x2SZero: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
DCHECK_NE(dst, src);
// dst = 0 if src == NaN, else all ones.
__ vcmpeqpd(dst, src, src);
// dst = 0 if src == NaN, else INT32_MAX as double.
__ vandpd(
dst, dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double()));
// dst = 0 if src == NaN, src is saturated to INT32_MAX as double.
__ vminpd(dst, src, dst);
// Values > INT32_MAX already saturated, values < INT32_MIN raises an
// exception, which is masked and returns 0x80000000.
__ vcvttpd2dq(dst, dst);
} else {
DCHECK_EQ(dst, src);
__ Move(kScratchDoubleReg, src);
__ cmpeqpd(kScratchDoubleReg, src);
__ andps(kScratchDoubleReg,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_int32_max_as_double()));
__ minpd(dst, kScratchDoubleReg);
__ cvttpd2dq(dst, dst);
}
break;
}
case kX64I32x4TruncSatF64x2UZero: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputSimd128Register(0);
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope avx_scope(tasm(), AVX);
__ vxorpd(kScratchDoubleReg, kScratchDoubleReg, kScratchDoubleReg);
// Saturate to 0.
__ vmaxpd(dst, src, kScratchDoubleReg);
// Saturate to UINT32_MAX.
__ vminpd(
dst, dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double()));
// Truncate.
__ vroundpd(dst, dst, kRoundToZero);
// Add to special double where significant bits == uint32.
__ vaddpd(dst, dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
// Extract low 32 bits of each double's significand, zero top lanes.
// dst = [dst[0], dst[2], 0, 0]
__ vshufps(dst, dst, kScratchDoubleReg, 0x88);
break;
} else {
DCHECK_EQ(dst, src);
__ xorps(kScratchDoubleReg, kScratchDoubleReg);
__ maxpd(dst, kScratchDoubleReg);
__ minpd(
dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_uint32_max_as_double()));
__ roundpd(dst, dst, kRoundToZero);
__ addpd(dst,
__ ExternalReferenceAsOperand(
ExternalReference::address_of_wasm_double_2_power_52()));
__ shufps(dst, kScratchDoubleReg, 0x88);
break;
}
break;
}
case kX64F32x4Splat: {
XMMRegister dst = i.OutputSimd128Register();
XMMRegister src = i.InputDoubleRegister(0);
......
......@@ -174,6 +174,9 @@ namespace compiler {
V(X64F64x2Pmin) \
V(X64F64x2Pmax) \
V(X64F64x2Round) \
V(X64F64x2ConvertLowI32x4S) \
V(X64F64x2ConvertLowI32x4U) \
V(X64F64x2PromoteLowF32x4) \
V(X64F32x4Splat) \
V(X64F32x4ExtractLane) \
V(X64F32x4ReplaceLane) \
......@@ -200,6 +203,7 @@ namespace compiler {
V(X64F32x4Pmin) \
V(X64F32x4Pmax) \
V(X64F32x4Round) \
V(X64F32x4DemoteF64x2Zero) \
V(X64I64x2Splat) \
V(X64I64x2ExtractLane) \
V(X64I64x2Neg) \
......@@ -256,6 +260,8 @@ namespace compiler {
V(X64I32x4ExtMulHighI16x8U) \
V(X64I32x4ExtAddPairwiseI16x8S) \
V(X64I32x4ExtAddPairwiseI16x8U) \
V(X64I32x4TruncSatF64x2SZero) \
V(X64I32x4TruncSatF64x2UZero) \
V(X64I16x8Splat) \
V(X64I16x8ExtractLaneS) \
V(X64I16x8SConvertI8x16Low) \
......
......@@ -150,6 +150,9 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F64x2Pmin:
case kX64F64x2Pmax:
case kX64F64x2Round:
case kX64F64x2ConvertLowI32x4S:
case kX64F64x2ConvertLowI32x4U:
case kX64F64x2PromoteLowF32x4:
case kX64F32x4Splat:
case kX64F32x4ExtractLane:
case kX64F32x4ReplaceLane:
......@@ -176,6 +179,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64F32x4Pmin:
case kX64F32x4Pmax:
case kX64F32x4Round:
case kX64F32x4DemoteF64x2Zero:
case kX64I64x2Splat:
case kX64I64x2ExtractLane:
case kX64I64x2Neg:
......@@ -232,6 +236,8 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kX64I32x4ExtMulHighI16x8U:
case kX64I32x4ExtAddPairwiseI16x8S:
case kX64I32x4ExtAddPairwiseI16x8U:
case kX64I32x4TruncSatF64x2SZero:
case kX64I32x4TruncSatF64x2UZero:
case kX64I16x8Splat:
case kX64I16x8ExtractLaneS:
case kX64I16x8SConvertI8x16Low:
......
......@@ -2929,12 +2929,16 @@ VISIT_ATOMIC_BINOP(Xor)
#define SIMD_UNOP_LIST(V) \
V(F64x2Sqrt) \
V(F64x2ConvertLowI32x4S) \
V(F64x2ConvertLowI32x4U) \
V(F64x2PromoteLowF32x4) \
V(F32x4SConvertI32x4) \
V(F32x4Abs) \
V(F32x4Neg) \
V(F32x4Sqrt) \
V(F32x4RecipApprox) \
V(F32x4RecipSqrtApprox) \
V(F32x4DemoteF64x2Zero) \
V(I64x2Neg) \
V(I64x2BitMask) \
V(I64x2SConvertI32x4Low) \
......@@ -3729,6 +3733,26 @@ void InstructionSelector::VisitI8x16Popcnt(Node* node) {
arraysize(temps), temps);
}
void InstructionSelector::VisitI32x4TruncSatF64x2SZero(Node* node) {
X64OperandGenerator g(this);
if (CpuFeatures::IsSupported(AVX)) {
// Requires dst != src.
Emit(kX64I32x4TruncSatF64x2SZero, g.DefineAsRegister(node),
g.UseUniqueRegister(node->InputAt(0)));
} else {
Emit(kX64I32x4TruncSatF64x2SZero, g.DefineSameAsFirst(node),
g.UseRegister(node->InputAt(0)));
}
}
void InstructionSelector::VisitI32x4TruncSatF64x2UZero(Node* node) {
X64OperandGenerator g(this);
InstructionOperand dst = CpuFeatures::IsSupported(AVX)
? g.DefineAsRegister(node)
: g.DefineSameAsFirst(node);
Emit(kX64I32x4TruncSatF64x2UZero, dst, g.UseRegister(node->InputAt(0)));
}
// static
MachineOperatorBuilder::Flags
InstructionSelector::SupportedMachineOperatorFlags() {
......
......@@ -382,6 +382,9 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(F64x2Floor, Operator::kNoProperties, 1, 0, 1) \
V(F64x2Trunc, Operator::kNoProperties, 1, 0, 1) \
V(F64x2NearestInt, Operator::kNoProperties, 1, 0, 1) \
V(F64x2ConvertLowI32x4S, Operator::kNoProperties, 1, 0, 1) \
V(F64x2ConvertLowI32x4U, Operator::kNoProperties, 1, 0, 1) \
V(F64x2PromoteLowF32x4, Operator::kNoProperties, 1, 0, 1) \
V(F32x4Splat, Operator::kNoProperties, 1, 0, 1) \
V(F32x4SConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
V(F32x4UConvertI32x4, Operator::kNoProperties, 1, 0, 1) \
......@@ -409,6 +412,7 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(F32x4Floor, Operator::kNoProperties, 1, 0, 1) \
V(F32x4Trunc, Operator::kNoProperties, 1, 0, 1) \
V(F32x4NearestInt, Operator::kNoProperties, 1, 0, 1) \
V(F32x4DemoteF64x2Zero, Operator::kNoProperties, 1, 0, 1) \
V(I64x2Splat, Operator::kNoProperties, 1, 0, 1) \
V(I64x2SplatI32Pair, Operator::kNoProperties, 2, 0, 1) \
V(I64x2Neg, Operator::kNoProperties, 1, 0, 1) \
......@@ -464,6 +468,8 @@ ShiftKind ShiftKindOf(Operator const* op) {
V(I32x4SignSelect, Operator::kNoProperties, 3, 0, 1) \
V(I32x4ExtAddPairwiseI16x8S, Operator::kNoProperties, 1, 0, 1) \
V(I32x4ExtAddPairwiseI16x8U, Operator::kNoProperties, 1, 0, 1) \
V(I32x4TruncSatF64x2SZero, Operator::kNoProperties, 1, 0, 1) \
V(I32x4TruncSatF64x2UZero, Operator::kNoProperties, 1, 0, 1) \
V(I16x8Splat, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16Low, Operator::kNoProperties, 1, 0, 1) \
V(I16x8SConvertI8x16High, Operator::kNoProperties, 1, 0, 1) \
......
......@@ -625,6 +625,9 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F64x2Floor();
const Operator* F64x2Trunc();
const Operator* F64x2NearestInt();
const Operator* F64x2ConvertLowI32x4S();
const Operator* F64x2ConvertLowI32x4U();
const Operator* F64x2PromoteLowF32x4();
const Operator* F32x4Splat();
const Operator* F32x4ExtractLane(int32_t);
......@@ -655,6 +658,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* F32x4Floor();
const Operator* F32x4Trunc();
const Operator* F32x4NearestInt();
const Operator* F32x4DemoteF64x2Zero();
const Operator* I64x2Splat();
const Operator* I64x2SplatI32Pair();
......@@ -718,6 +722,8 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const Operator* I32x4SignSelect();
const Operator* I32x4ExtAddPairwiseI16x8S();
const Operator* I32x4ExtAddPairwiseI16x8U();
const Operator* I32x4TruncSatF64x2SZero();
const Operator* I32x4TruncSatF64x2UZero();
const Operator* I16x8Splat();
const Operator* I16x8ExtractLaneU(int32_t);
......
......@@ -782,6 +782,9 @@
V(F64x2Floor) \
V(F64x2Trunc) \
V(F64x2NearestInt) \
V(F64x2ConvertLowI32x4S) \
V(F64x2ConvertLowI32x4U) \
V(F64x2PromoteLowF32x4) \
V(F32x4Splat) \
V(F32x4ExtractLane) \
V(F32x4ReplaceLane) \
......@@ -813,6 +816,7 @@
V(F32x4Floor) \
V(F32x4Trunc) \
V(F32x4NearestInt) \
V(F32x4DemoteF64x2Zero) \
V(I64x2Splat) \
V(I64x2SplatI32Pair) \
V(I64x2ExtractLane) \
......@@ -877,6 +881,8 @@
V(I32x4SignSelect) \
V(I32x4ExtAddPairwiseI16x8S) \
V(I32x4ExtAddPairwiseI16x8U) \
V(I32x4TruncSatF64x2SZero) \
V(I32x4TruncSatF64x2UZero) \
V(I16x8Splat) \
V(I16x8ExtractLaneU) \
V(I16x8ExtractLaneS) \
......
......@@ -4693,6 +4693,15 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return BuildF64x2NearestInt(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F64x2NearestInt(),
inputs[0]);
case wasm::kExprF64x2ConvertLowI32x4S:
return graph()->NewNode(mcgraph()->machine()->F64x2ConvertLowI32x4S(),
inputs[0]);
case wasm::kExprF64x2ConvertLowI32x4U:
return graph()->NewNode(mcgraph()->machine()->F64x2ConvertLowI32x4U(),
inputs[0]);
case wasm::kExprF64x2PromoteLowF32x4:
return graph()->NewNode(mcgraph()->machine()->F64x2PromoteLowF32x4(),
inputs[0]);
case wasm::kExprF32x4Splat:
return graph()->NewNode(mcgraph()->machine()->F32x4Splat(), inputs[0]);
case wasm::kExprF32x4SConvertI32x4:
......@@ -4787,6 +4796,9 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
return BuildF32x4NearestInt(inputs[0]);
return graph()->NewNode(mcgraph()->machine()->F32x4NearestInt(),
inputs[0]);
case wasm::kExprF32x4DemoteF64x2Zero:
return graph()->NewNode(mcgraph()->machine()->F32x4DemoteF64x2Zero(),
inputs[0]);
case wasm::kExprI64x2Splat:
return graph()->NewNode(mcgraph()->machine()->I64x2Splat(), inputs[0]);
case wasm::kExprI64x2Neg:
......@@ -4954,6 +4966,12 @@ Node* WasmGraphBuilder::SimdOp(wasm::WasmOpcode opcode, Node* const* inputs) {
case wasm::kExprI32x4ExtAddPairwiseI16x8U:
return graph()->NewNode(mcgraph()->machine()->I32x4ExtAddPairwiseI16x8U(),
inputs[0]);
case wasm::kExprI32x4TruncSatF64x2SZero:
return graph()->NewNode(mcgraph()->machine()->I32x4TruncSatF64x2SZero(),
inputs[0]);
case wasm::kExprI32x4TruncSatF64x2UZero:
return graph()->NewNode(mcgraph()->machine()->I32x4TruncSatF64x2UZero(),
inputs[0]);
case wasm::kExprI16x8Splat:
return graph()->NewNode(mcgraph()->machine()->I16x8Splat(), inputs[0]);
case wasm::kExprI16x8SConvertI8x16Low:
......
......@@ -1173,6 +1173,10 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0xE6:
AppendToBuffer("vcvtdq2pd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
break;
default:
UnimplementedInstruction();
}
......@@ -1408,6 +1412,11 @@ int DisassemblerX64::AVXInstruction(byte* data) {
current += PrintRightXMMOperand(current);
AppendToBuffer(",%s", NameOfXMMRegister(regop));
break;
case 0x14:
AppendToBuffer("vunpcklps %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv));
current += PrintRightXMMOperand(current);
break;
case 0x16:
if (mod == 0b11) {
AppendToBuffer("vmovlhps %s,%s,", NameOfXMMRegister(regop),
......@@ -2071,6 +2080,8 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
AppendToBuffer("cmp%sss %s,%s", cmp_pseudo_op[current[1]],
NameOfXMMRegister(regop), NameOfXMMRegister(rm));
current += 2;
} else if (opcode == 0xE6) {
current += PrintOperands("cvtdq2pd", XMMREG_XMMOPER_OP_ORDER, current);
} else {
UnimplementedInstruction();
}
......@@ -2091,6 +2102,9 @@ int DisassemblerX64::TwoByteOpcodeInstruction(byte* data) {
} else if (opcode == 0x13) {
// movlps m64, xmm1
current += PrintOperands("movlps", XMMOPER_XMMREG_OP_ORDER, current);
} else if (opcode == 0x14) {
// unpcklps xmm1, xmm2/m128
current += PrintOperands("unpcklps", XMMREG_XMMOPER_OP_ORDER, current);
} else if (opcode == 0x16) {
if (mod == 0b11) {
current += PrintOperands("movlhps", XMMREG_XMMOPER_OP_ORDER, current);
......
......@@ -1284,12 +1284,10 @@ WASM_SIMD_TEST(F64x2NearestInt) {
}
// TODO(v8:11265): Prototyping double precision conversions.
#if V8_TARGET_ARCH_X64
template <typename SrcType>
void RunF64x2ConvertLowI32x4Test(TestExecutionTier execution_tier,
LowerSimd lower_simd, WasmOpcode opcode) {
if (TestExecutionTier::kInterpreter != execution_tier) {
return;
}
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, SrcType> r(execution_tier, lower_simd);
......@@ -1324,9 +1322,6 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2ConvertLowI32x4U) {
template <typename SrcType>
void RunI32x4TruncSatF64x2Test(TestExecutionTier execution_tier,
LowerSimd lower_simd, WasmOpcode opcode) {
if (TestExecutionTier::kInterpreter != execution_tier) {
return;
}
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
......@@ -1362,9 +1357,6 @@ WASM_SIMD_TEST_NO_LOWERING(I32x4TruncSatF64x2UZero) {
}
WASM_SIMD_TEST_NO_LOWERING(F32x4DemoteF64x2Zero) {
if (TestExecutionTier::kInterpreter != execution_tier) {
return;
}
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, double> r(execution_tier, lower_simd);
......@@ -1390,9 +1382,6 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4DemoteF64x2Zero) {
}
WASM_SIMD_TEST_NO_LOWERING(F64x2PromoteLowF32x4) {
if (TestExecutionTier::kInterpreter != execution_tier) {
return;
}
FLAG_SCOPE(wasm_simd_post_mvp);
WasmRunner<int32_t, float> r(execution_tier, lower_simd);
......@@ -1416,6 +1405,7 @@ WASM_SIMD_TEST_NO_LOWERING(F64x2PromoteLowF32x4) {
}
}
}
#endif // V8_TARGET_ARCH_X64
void RunF64x2BinOpTest(TestExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, DoubleBinOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment