Commit ef1d6a3b authored by Ng Zhi An's avatar Ng Zhi An Committed by Commit Bot

[wasm-simd][ia32] Prototype f32x4 and f64x2 rounding

Implements f32x4 and f64x2 ceil, floor, trunc, nearestint for ia32.

Bug: v8:10553
Change-Id: I272c20441297b9aebd39bc7ff96870b40e8ac653
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2227257
Commit-Queue: Zhi An Ng <zhin@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Cr-Commit-Position: refs/heads/master@{#68347}
parent 8427296e
...@@ -2269,6 +2269,30 @@ void Assembler::ucomisd(XMMRegister dst, Operand src) { ...@@ -2269,6 +2269,30 @@ void Assembler::ucomisd(XMMRegister dst, Operand src) {
emit_sse_operand(dst, src); emit_sse_operand(dst, src);
} }
void Assembler::roundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x3A);
EMIT(0x08);
emit_sse_operand(dst, src);
// Mask precision exeption.
EMIT(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this);
EMIT(0x66);
EMIT(0x0F);
EMIT(0x3A);
EMIT(0x09);
emit_sse_operand(dst, src);
// Mask precision exeption.
EMIT(static_cast<byte>(mode) | 0x8);
}
void Assembler::roundss(XMMRegister dst, XMMRegister src, RoundingMode mode) { void Assembler::roundss(XMMRegister dst, XMMRegister src, RoundingMode mode) {
DCHECK(IsEnabled(SSE4_1)); DCHECK(IsEnabled(SSE4_1));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
...@@ -2944,6 +2968,15 @@ void Assembler::vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, ...@@ -2944,6 +2968,15 @@ void Assembler::vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2,
EMIT(offset); EMIT(offset);
} }
void Assembler::vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode) {
vinstr(0x08, dst, xmm0, Operand(src), k66, k0F3A, kWIG);
EMIT(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void Assembler::vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode) {
vinstr(0x09, dst, xmm0, Operand(src), k66, k0F3A, kWIG);
EMIT(static_cast<byte>(mode) | 0x8); // Mask precision exception.
}
void Assembler::vmovmskps(Register dst, XMMRegister src) { void Assembler::vmovmskps(Register dst, XMMRegister src) {
DCHECK(IsEnabled(AVX)); DCHECK(IsEnabled(AVX));
EnsureSpace ensure_space(this); EnsureSpace ensure_space(this);
......
...@@ -1068,6 +1068,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1068,6 +1068,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
} }
void pinsrd(XMMRegister dst, Operand src, uint8_t offset); void pinsrd(XMMRegister dst, Operand src, uint8_t offset);
void roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
// AVX instructions // AVX instructions
void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) { void vfmadd132sd(XMMRegister dst, XMMRegister src1, XMMRegister src2) {
vfmadd132sd(dst, src1, Operand(src2)); vfmadd132sd(dst, src1, Operand(src2));
...@@ -1413,6 +1416,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase { ...@@ -1413,6 +1416,9 @@ class V8_EXPORT_PRIVATE Assembler : public AssemblerBase {
} }
void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset); void vpinsrd(XMMRegister dst, XMMRegister src1, Operand src2, uint8_t offset);
void vroundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void vroundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void vcvtdq2ps(XMMRegister dst, XMMRegister src) { void vcvtdq2ps(XMMRegister dst, XMMRegister src) {
vcvtdq2ps(dst, Operand(src)); vcvtdq2ps(dst, Operand(src));
} }
......
...@@ -597,6 +597,28 @@ void TurboAssembler::Cvttsd2ui(Register dst, Operand src, XMMRegister tmp) { ...@@ -597,6 +597,28 @@ void TurboAssembler::Cvttsd2ui(Register dst, Operand src, XMMRegister tmp) {
add(dst, Immediate(0x80000000)); add(dst, Immediate(0x80000000));
} }
void TurboAssembler::Roundps(XMMRegister dst, XMMRegister src,
RoundingMode mode) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vroundps(dst, src, mode);
} else {
CpuFeatureScope scope(this, SSE4_1);
roundps(dst, src, mode);
}
}
void TurboAssembler::Roundpd(XMMRegister dst, XMMRegister src,
RoundingMode mode) {
if (CpuFeatures::IsSupported(AVX)) {
CpuFeatureScope scope(this, AVX);
vroundpd(dst, src, mode);
} else {
CpuFeatureScope scope(this, SSE4_1);
roundpd(dst, src, mode);
}
}
void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) { void TurboAssembler::ShlPair(Register high, Register low, uint8_t shift) {
DCHECK_GE(63, shift); DCHECK_GE(63, shift);
if (shift >= 32) { if (shift >= 32) {
......
...@@ -536,6 +536,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -536,6 +536,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
} }
void Cvttsd2ui(Register dst, Operand src, XMMRegister tmp); void Cvttsd2ui(Register dst, Operand src, XMMRegister tmp);
void Roundps(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Roundpd(XMMRegister dst, XMMRegister src, RoundingMode mode);
void Push(Register src) { push(src); } void Push(Register src) { push(src); }
void Push(Operand src) { push(src); } void Push(Operand src) { push(src); }
void Push(Immediate value); void Push(Immediate value);
......
...@@ -2032,6 +2032,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2032,6 +2032,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Maxpd(dst, dst, i.InputSimd128Register(1)); __ Maxpd(dst, dst, i.InputSimd128Register(1));
break; break;
} }
case kIA32F64x2Round: {
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundpd(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
break;
}
case kIA32I64x2SplatI32Pair: { case kIA32I64x2SplatI32Pair: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ Pinsrd(dst, i.InputRegister(0), 0); __ Pinsrd(dst, i.InputRegister(0), 0);
...@@ -2442,6 +2448,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2442,6 +2448,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ Maxps(dst, dst, i.InputSimd128Register(1)); __ Maxps(dst, dst, i.InputSimd128Register(1));
break; break;
} }
case kIA32F32x4Round: {
RoundingMode const mode =
static_cast<RoundingMode>(MiscField::decode(instr->opcode()));
__ Roundps(i.OutputSimd128Register(), i.InputDoubleRegister(0), mode);
break;
}
case kIA32I32x4Splat: { case kIA32I32x4Splat: {
XMMRegister dst = i.OutputSimd128Register(); XMMRegister dst = i.OutputSimd128Register();
__ Movd(dst, i.InputOperand(0)); __ Movd(dst, i.InputOperand(0));
......
...@@ -136,6 +136,7 @@ namespace compiler { ...@@ -136,6 +136,7 @@ namespace compiler {
V(IA32F64x2Le) \ V(IA32F64x2Le) \
V(IA32F64x2Pmin) \ V(IA32F64x2Pmin) \
V(IA32F64x2Pmax) \ V(IA32F64x2Pmax) \
V(IA32F64x2Round) \
V(IA32I64x2SplatI32Pair) \ V(IA32I64x2SplatI32Pair) \
V(IA32I64x2ReplaceLaneI32Pair) \ V(IA32I64x2ReplaceLaneI32Pair) \
V(IA32I64x2Neg) \ V(IA32I64x2Neg) \
...@@ -186,6 +187,7 @@ namespace compiler { ...@@ -186,6 +187,7 @@ namespace compiler {
V(AVXF32x4Le) \ V(AVXF32x4Le) \
V(IA32F32x4Pmin) \ V(IA32F32x4Pmin) \
V(IA32F32x4Pmax) \ V(IA32F32x4Pmax) \
V(IA32F32x4Round) \
V(IA32I32x4Splat) \ V(IA32I32x4Splat) \
V(IA32I32x4ExtractLane) \ V(IA32I32x4ExtractLane) \
V(SSEI32x4ReplaceLane) \ V(SSEI32x4ReplaceLane) \
......
...@@ -117,6 +117,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -117,6 +117,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32F64x2Le: case kIA32F64x2Le:
case kIA32F64x2Pmin: case kIA32F64x2Pmin:
case kIA32F64x2Pmax: case kIA32F64x2Pmax:
case kIA32F64x2Round:
case kIA32I64x2SplatI32Pair: case kIA32I64x2SplatI32Pair:
case kIA32I64x2ReplaceLaneI32Pair: case kIA32I64x2ReplaceLaneI32Pair:
case kIA32I64x2Neg: case kIA32I64x2Neg:
...@@ -167,6 +168,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -167,6 +168,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kAVXF32x4Le: case kAVXF32x4Le:
case kIA32F32x4Pmin: case kIA32F32x4Pmin:
case kIA32F32x4Pmax: case kIA32F32x4Pmax:
case kIA32F32x4Round:
case kIA32I32x4Splat: case kIA32I32x4Splat:
case kIA32I32x4ExtractLane: case kIA32I32x4ExtractLane:
case kSSEI32x4ReplaceLane: case kSSEI32x4ReplaceLane:
......
...@@ -941,7 +941,16 @@ void InstructionSelector::VisitWord32Ror(Node* node) { ...@@ -941,7 +941,16 @@ void InstructionSelector::VisitWord32Ror(Node* node) {
V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \ V(Float64RoundTruncate, kSSEFloat64Round | MiscField::encode(kRoundToZero)) \
V(Float32RoundTiesEven, \ V(Float32RoundTiesEven, \
kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \ kSSEFloat32Round | MiscField::encode(kRoundToNearest)) \
V(Float64RoundTiesEven, kSSEFloat64Round | MiscField::encode(kRoundToNearest)) V(Float64RoundTiesEven, \
kSSEFloat64Round | MiscField::encode(kRoundToNearest)) \
V(F32x4Ceil, kIA32F32x4Round | MiscField::encode(kRoundUp)) \
V(F32x4Floor, kIA32F32x4Round | MiscField::encode(kRoundDown)) \
V(F32x4Trunc, kIA32F32x4Round | MiscField::encode(kRoundToZero)) \
V(F32x4NearestInt, kIA32F32x4Round | MiscField::encode(kRoundToNearest)) \
V(F64x2Ceil, kIA32F64x2Round | MiscField::encode(kRoundUp)) \
V(F64x2Floor, kIA32F64x2Round | MiscField::encode(kRoundDown)) \
V(F64x2Trunc, kIA32F64x2Round | MiscField::encode(kRoundToZero)) \
V(F64x2NearestInt, kIA32F64x2Round | MiscField::encode(kRoundToNearest))
#define RRO_FLOAT_OP_LIST(V) \ #define RRO_FLOAT_OP_LIST(V) \
V(Float32Add, kAVXFloat32Add, kSSEFloat32Add) \ V(Float32Add, kAVXFloat32Add, kSSEFloat32Add) \
......
...@@ -2687,7 +2687,8 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); } ...@@ -2687,7 +2687,8 @@ void InstructionSelector::VisitF64x2Pmax(Node* node) { UNIMPLEMENTED(); }
// && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X && // && !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_S390X &&
// !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64 // !V8_TARGET_ARCH_MIPS && !V8_TARGET_ARCH_MIPS64
#if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X #if !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X && \
!V8_TARGET_ARCH_IA32
// TODO(v8:10553) Prototyping floating point rounding instructions. // TODO(v8:10553) Prototyping floating point rounding instructions.
void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Ceil(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF64x2Floor(Node* node) { UNIMPLEMENTED(); }
...@@ -2698,6 +2699,7 @@ void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); } ...@@ -2698,6 +2699,7 @@ void InstructionSelector::VisitF32x4Floor(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4Trunc(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); } void InstructionSelector::VisitF32x4NearestInt(Node* node) { UNIMPLEMENTED(); }
#endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X #endif // !V8_TARGET_ARCH_X64 && !V8_TARGET_ARCH_ARM64 && !V8_TARGET_ARCH_S390X
// && !V8_TARGET_ARCH_IA32
#if !V8_TARGET_ARCH_X64 #if !V8_TARGET_ARCH_X64
// TODO(v8:10583) Prototype i32x4.dot_i16x8_s // TODO(v8:10583) Prototype i32x4.dot_i16x8_s
......
...@@ -791,6 +791,18 @@ int DisassemblerIA32::AVXInstruction(byte* data) { ...@@ -791,6 +791,18 @@ int DisassemblerIA32::AVXInstruction(byte* data) {
int mod, regop, rm, vvvv = vex_vreg(); int mod, regop, rm, vvvv = vex_vreg();
get_modrm(*current, &mod, &regop, &rm); get_modrm(*current, &mod, &regop, &rm);
switch (opcode) { switch (opcode) {
case 0x08:
AppendToBuffer("vroundps %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%d", Imm8_U(current));
current++;
break;
case 0x09:
AppendToBuffer("vroundpd %s,", NameOfXMMRegister(regop));
current += PrintRightXMMOperand(current);
AppendToBuffer(",%d", Imm8_U(current));
current++;
break;
case 0x0E: case 0x0E:
AppendToBuffer("vpblendw %s,%s,", NameOfXMMRegister(regop), AppendToBuffer("vpblendw %s,%s,", NameOfXMMRegister(regop),
NameOfXMMRegister(vvvv)); NameOfXMMRegister(vvvv));
...@@ -2126,7 +2138,23 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer, ...@@ -2126,7 +2138,23 @@ int DisassemblerIA32::InstructionDecode(v8::internal::Vector<char> out_buffer,
} }
} else if (*data == 0x3A) { } else if (*data == 0x3A) {
data++; data++;
if (*data == 0x0A) { if (*data == 0x08) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("roundps %s,%s,%d", NameOfXMMRegister(regop),
NameOfXMMRegister(rm), static_cast<int>(imm8));
data += 2;
} else if (*data == 0x09) {
data++;
int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm);
int8_t imm8 = static_cast<int8_t>(data[1]);
AppendToBuffer("roundpd %s,%s,%d", NameOfXMMRegister(regop),
NameOfXMMRegister(rm), static_cast<int>(imm8));
data += 2;
} else if (*data == 0x0A) {
data++; data++;
int mod, regop, rm; int mod, regop, rm;
get_modrm(*data, &mod, &regop, &rm); get_modrm(*data, &mod, &regop, &rm);
......
...@@ -691,7 +691,8 @@ WASM_SIMD_TEST(F32x4RecipSqrtApprox) { ...@@ -691,7 +691,8 @@ WASM_SIMD_TEST(F32x4RecipSqrtApprox) {
} }
// TODO(v8:10553) Prototyping floating-point rounding instructions. // TODO(v8:10553) Prototyping floating-point rounding instructions.
#if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X #if V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X || \
V8_TARGET_ARCH_IA32
WASM_SIMD_TEST_NO_LOWERING(F32x4Ceil) { WASM_SIMD_TEST_NO_LOWERING(F32x4Ceil) {
FLAG_SCOPE(wasm_simd_post_mvp); FLAG_SCOPE(wasm_simd_post_mvp);
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Ceil, ceilf, true); RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4Ceil, ceilf, true);
...@@ -712,7 +713,8 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) { ...@@ -712,7 +713,8 @@ WASM_SIMD_TEST_NO_LOWERING(F32x4NearestInt) {
RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf, RunF32x4UnOpTest(execution_tier, lower_simd, kExprF32x4NearestInt, nearbyintf,
true); true);
} }
#endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X #endif // V8_TARGET_ARCH_X64 || V8_TARGET_ARCH_ARM64 || V8_TARGET_ARCH_S390X ||
// V8_TARGET_ARCH_IA32
void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd, void RunF32x4BinOpTest(ExecutionTier execution_tier, LowerSimd lower_simd,
WasmOpcode opcode, FloatBinOp expected_op) { WasmOpcode opcode, FloatBinOp expected_op) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment