Commit b4a62038 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

S390 [liftoff]: Implement simd integer Q-format rounding mul

Change-Id: I0d6258cae0e75b6bd4916cb4c559161f7eac2170
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3465739Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Farazmand <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/main@{#79128}
parent 148d9853
...@@ -6015,6 +6015,29 @@ void TurboAssembler::I32x4DotI16x8S(Simd128Register dst, Simd128Register src1, ...@@ -6015,6 +6015,29 @@ void TurboAssembler::I32x4DotI16x8S(Simd128Register dst, Simd128Register src1,
va(dst, scratch, dst, Condition(0), Condition(0), Condition(2)); va(dst, scratch, dst, Condition(0), Condition(0), Condition(2));
} }
#define Q15_MUL_ROAUND(accumulator, src1, src2, const_val, scratch, unpack) \
unpack(scratch, src1, Condition(0), Condition(0), Condition(1)); \
unpack(accumulator, src2, Condition(0), Condition(0), Condition(1)); \
vml(accumulator, scratch, accumulator, Condition(0), Condition(0), \
Condition(2)); \
va(accumulator, accumulator, const_val, Condition(0), Condition(0), \
Condition(2)); \
vrepi(scratch, Operand(15), Condition(2)); \
vesrav(accumulator, accumulator, scratch, Condition(0), Condition(0), \
Condition(2));
void TurboAssembler::I16x8Q15MulRSatS(Simd128Register dst, Simd128Register src1,
Simd128Register src2,
Simd128Register scratch1,
Simd128Register scratch2,
Simd128Register scratch3) {
DCHECK(!AreAliased(src1, src2, scratch1, scratch2, scratch3));
vrepi(scratch1, Operand(0x4000), Condition(2));
Q15_MUL_ROAUND(scratch2, src1, src2, scratch1, scratch3, vupl)
Q15_MUL_ROAUND(dst, src1, src2, scratch1, scratch3, vuph)
vpks(dst, dst, scratch2, Condition(0), Condition(2));
}
#undef Q15_MUL_ROAUND
// Vector LE Load and Transform instructions. // Vector LE Load and Transform instructions.
#ifdef V8_TARGET_BIG_ENDIAN #ifdef V8_TARGET_BIG_ENDIAN
#define IS_BIG_ENDIAN true #define IS_BIG_ENDIAN true
......
...@@ -1155,6 +1155,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase { ...@@ -1155,6 +1155,9 @@ class V8_EXPORT_PRIVATE TurboAssembler : public TurboAssemblerBase {
Simd128Register scratch3); Simd128Register scratch3);
void I32x4DotI16x8S(Simd128Register dst, Simd128Register src1, void I32x4DotI16x8S(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register scratch); Simd128Register src2, Simd128Register scratch);
void I16x8Q15MulRSatS(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register scratch1,
Simd128Register scratch2, Simd128Register scratch3);
void S128Select(Simd128Register dst, Simd128Register src1, void S128Select(Simd128Register dst, Simd128Register src1,
Simd128Register src2, Simd128Register mask); Simd128Register src2, Simd128Register mask);
......
...@@ -2975,32 +2975,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2975,32 +2975,13 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
i.InputSimd128Register(1), kScratchDoubleReg); i.InputSimd128Register(1), kScratchDoubleReg);
break; break;
} }
#define Q15_MUL_ROAUND(accumulator, unpack) \
__ unpack(tempFPReg1, src0, Condition(0), Condition(0), Condition(1)); \
__ unpack(accumulator, src1, Condition(0), Condition(0), Condition(1)); \
__ vml(accumulator, tempFPReg1, accumulator, Condition(0), Condition(0), \
Condition(2)); \
__ va(accumulator, accumulator, tempFPReg2, Condition(0), Condition(0), \
Condition(2)); \
__ vrepi(tempFPReg1, Operand(15), Condition(2)); \
__ vesrav(accumulator, accumulator, tempFPReg1, Condition(0), Condition(0), \
Condition(2));
case kS390_I16x8Q15MulRSatS: { case kS390_I16x8Q15MulRSatS: {
Simd128Register dst = i.OutputSimd128Register(); __ I16x8Q15MulRSatS(i.OutputSimd128Register(), i.InputSimd128Register(0),
Simd128Register src0 = i.InputSimd128Register(0); i.InputSimd128Register(1), kScratchDoubleReg,
Simd128Register src1 = i.InputSimd128Register(1); i.ToSimd128Register(instr->TempAt(0)),
Simd128Register tempFPReg1 = i.ToSimd128Register(instr->TempAt(0)); i.ToSimd128Register(instr->TempAt(1)));
Simd128Register tempFPReg2 = i.ToSimd128Register(instr->TempAt(1)); break;
DCHECK_NE(src1, tempFPReg1); }
DCHECK_NE(src0, tempFPReg2);
DCHECK_NE(src1, tempFPReg2);
__ vrepi(tempFPReg2, Operand(0x4000), Condition(2));
Q15_MUL_ROAUND(kScratchDoubleReg, vupl)
Q15_MUL_ROAUND(dst, vuph)
__ vpks(dst, dst, kScratchDoubleReg, Condition(0), Condition(2));
break;
}
#undef Q15_MUL_ROAUND
case kS390_I8x16Popcnt: { case kS390_I8x16Popcnt: {
__ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0)); __ I8x16Popcnt(i.OutputSimd128Register(), i.InputSimd128Register(0));
break; break;
......
...@@ -2639,7 +2639,16 @@ void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst, ...@@ -2639,7 +2639,16 @@ void LiftoffAssembler::emit_i16x8_bitmask(LiftoffRegister dst,
void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst, void LiftoffAssembler::emit_i16x8_q15mulr_sat_s(LiftoffRegister dst,
LiftoffRegister src1, LiftoffRegister src1,
LiftoffRegister src2) { LiftoffRegister src2) {
bailout(kSimd, "i16x8_q15mulr_sat_s"); Simd128Register s1 = src1.fp();
Simd128Register s2 = src2.fp();
Simd128Register dest = dst.fp();
// Make sure temp registers are unique.
Simd128Register temp1 =
GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dest, s1, s2)).fp();
Simd128Register temp2 =
GetUnusedRegister(kFpReg, LiftoffRegList::ForRegs(dest, s1, s2, temp1))
.fp();
I16x8Q15MulRSatS(dest, s1, s2, kScratchDoubleReg, temp1, temp2);
} }
void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst, void LiftoffAssembler::emit_i8x16_shuffle(LiftoffRegister dst,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment