Commit 3591de71 authored by Milad Fa's avatar Milad Fa Committed by Commit Bot

S390 [wasm-simd]: Implement saturating rounding multiply high

Also fix the simulator to avoid overrating dst register
during VectorPack.

Bug: v8:10971
Change-Id: I137e3cf4f73ddfc12c50099d519668858f95ecf3
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2625487Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#72051}
parent bfcb3f00
......@@ -4221,6 +4221,33 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break;
}
#undef EXT_ADD_PAIRWISE
#define Q15_MUL_ROAUND(accumulator, unpack) \
__ unpack(tempFPReg1, i.InputSimd128Register(0), Condition(0), Condition(0), \
Condition(1)); \
__ unpack(accumulator, i.InputSimd128Register(1), Condition(0), \
Condition(0), Condition(1)); \
__ vml(accumulator, tempFPReg1, accumulator, Condition(0), Condition(0), \
Condition(2)); \
__ va(accumulator, accumulator, tempFPReg2, Condition(0), Condition(0), \
Condition(2)); \
__ vrepi(tempFPReg1, Operand(15), Condition(2)); \
__ vesrav(accumulator, accumulator, tempFPReg1, Condition(0), Condition(0), \
Condition(2));
case kS390_I16x8Q15MulRSatS: {
Simd128Register dst = i.OutputSimd128Register();
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0));
Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1));
__ vrepi(tempFPReg2, Operand(0x4000), Condition(2));
Q15_MUL_ROAUND(kScratchDoubleReg, vupl)
Q15_MUL_ROAUND(dst, vuph)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpks(dst, dst, kScratchDoubleReg, Condition(0), Condition(2));
#else
__ vpks(dst, kScratchDoubleReg, dst, Condition(0), Condition(2));
#endif
break;
}
#undef Q15_MUL_ROAUND
case kS390_StoreCompressTagged: {
CHECK(!instr->HasOutput());
size_t index = 0;
......
......@@ -337,6 +337,7 @@ namespace compiler {
V(S390_I16x8ExtMulHighI8x16U) \
V(S390_I16x8ExtAddPairwiseI8x16S) \
V(S390_I16x8ExtAddPairwiseI8x16U) \
V(S390_I16x8Q15MulRSatS) \
V(S390_I8x16Splat) \
V(S390_I8x16ExtractLaneU) \
V(S390_I8x16ExtractLaneS) \
......
......@@ -283,6 +283,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kS390_I16x8ExtMulHighI8x16U:
case kS390_I16x8ExtAddPairwiseI8x16S:
case kS390_I16x8ExtAddPairwiseI8x16U:
case kS390_I16x8Q15MulRSatS:
case kS390_I8x16Splat:
case kS390_I8x16ExtractLaneU:
case kS390_I8x16ExtractLaneS:
......
......@@ -2450,6 +2450,7 @@ void InstructionSelector::VisitWord64AtomicStore(Node* node) {
V(I64x2ExtMulHighI32x4S) \
V(I64x2ExtMulLowI32x4U) \
V(I64x2ExtMulHighI32x4U) \
V(I16x8Q15MulRSatS) \
V(I32x4Add) \
V(I32x4AddHoriz) \
V(I32x4Sub) \
......
......@@ -3346,6 +3346,8 @@ void VectorPack(Simulator* sim, int dst, int src1, int src2, bool saturate,
int src = src1;
int count = 0;
S value = 0;
// Setup a temp array to avoid overwriting dst mid loop.
D temps[kSimd128Size / sizeof(D)] = {0};
for (size_t i = 0; i < kSimd128Size / sizeof(D); i++, count++) {
if (count == kSimd128Size / sizeof(S)) {
src = src2;
......@@ -3358,8 +3360,9 @@ void VectorPack(Simulator* sim, int dst, int src1, int src2, bool saturate,
else if (value < min)
value = min;
}
sim->set_simd_register_by_lane<D>(dst, i, value);
temps[i] = value;
}
FOR_EACH_LANE(i, D) { sim->set_simd_register_by_lane<D>(dst, i, temps[i]); }
}
#define CASE(i, S, D, SAT, MAX, MIN) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment