Commit f29078a8 authored by Milad Fa's avatar Milad Fa Committed by Commit Bot

s390: [was-simd] Fix Vector pack and unpack behaviour.

Due to the lane numbering difference between Intel and IBM machines,
we need to switch the input registers when doing a vector pack.

Change-Id: I40e1fdae308e5dcd67aafab2abf099d4be0bb1a2
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2450832Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#70327}
parent 9edcb196
......@@ -4068,12 +4068,22 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
}
#undef VECTOR_UNPACK
case kS390_I16x8SConvertI32x4:
#ifdef V8_TARGET_BIG_ENDIAN
__ vpks(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0), Condition(0), Condition(2));
#else
__ vpks(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(2));
#endif
break;
case kS390_I8x16SConvertI16x8:
#ifdef V8_TARGET_BIG_ENDIAN
__ vpks(i.OutputSimd128Register(), i.InputSimd128Register(1),
i.InputSimd128Register(0), Condition(0), Condition(1));
#else
__ vpks(i.OutputSimd128Register(), i.InputSimd128Register(0),
i.InputSimd128Register(1), Condition(0), Condition(1));
#endif
break;
#define VECTOR_PACK_UNSIGNED(mode) \
Simd128Register tempFPReg = i.ToSimd128Register(instr->TempAt(0)); \
......@@ -4082,17 +4092,29 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
__ vmx(tempFPReg, i.InputSimd128Register(0), kScratchDoubleReg, \
Condition(0), Condition(0), Condition(mode)); \
__ vmx(kScratchDoubleReg, i.InputSimd128Register(1), kScratchDoubleReg, \
Condition(0), Condition(0), Condition(mode)); \
__ vpkls(i.OutputSimd128Register(), tempFPReg, kScratchDoubleReg, \
Condition(0), Condition(mode));
Condition(0), Condition(0), Condition(mode));
case kS390_I16x8UConvertI32x4: {
// treat inputs as signed, and saturate to unsigned (negative to 0)
VECTOR_PACK_UNSIGNED(2)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg,
Condition(0), Condition(2));
#else
__ vpkls(i.OutputSimd128Register(), tempFPReg, kScratchDoubleReg,
Condition(0), Condition(2));
#endif
break;
}
case kS390_I8x16UConvertI16x8: {
// treat inputs as signed, and saturate to unsigned (negative to 0)
VECTOR_PACK_UNSIGNED(1)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg,
Condition(0), Condition(1));
#else
__ vpkls(i.OutputSimd128Register(), tempFPReg, kScratchDoubleReg,
Condition(0), Condition(1));
#endif
break;
}
#undef VECTOR_PACK_UNSIGNED
......@@ -4115,20 +4137,35 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(mode + 1));
case kS390_I16x8AddSaturateS: {
BINOP_EXTRACT(va, vuph, vupl, 1)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpks(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(2));
#else
__ vpks(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
Condition(0), Condition(2));
#endif
break;
}
case kS390_I16x8SubSaturateS: {
BINOP_EXTRACT(vs, vuph, vupl, 1)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpks(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(2));
#else
__ vpks(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
Condition(0), Condition(2));
#endif
break;
}
case kS390_I16x8AddSaturateU: {
BINOP_EXTRACT(va, vuplh, vupll, 1)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(2));
#else
__ vpkls(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
Condition(0), Condition(2));
#endif
break;
}
case kS390_I16x8SubSaturateU: {
......@@ -4140,26 +4177,46 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(0), Condition(2));
__ vmx(tempFPReg1, tempFPReg2, tempFPReg1, Condition(0), Condition(0),
Condition(2));
#ifdef V8_TARGET_BIG_ENDIAN
__ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(2));
#else
__ vpkls(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
Condition(0), Condition(2));
#endif
break;
}
case kS390_I8x16AddSaturateS: {
BINOP_EXTRACT(va, vuph, vupl, 0)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpks(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(1));
#else
__ vpks(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
Condition(0), Condition(1));
#endif
break;
}
case kS390_I8x16SubSaturateS: {
BINOP_EXTRACT(vs, vuph, vupl, 0)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpks(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(1));
#else
__ vpks(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
Condition(0), Condition(1));
#endif
break;
}
case kS390_I8x16AddSaturateU: {
BINOP_EXTRACT(va, vuplh, vupll, 0)
#ifdef V8_TARGET_BIG_ENDIAN
__ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(1));
#else
__ vpkls(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
Condition(0), Condition(1));
#endif
break;
}
case kS390_I8x16SubSaturateU: {
......@@ -4171,8 +4228,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Condition(0), Condition(1));
__ vmx(tempFPReg1, tempFPReg2, tempFPReg1, Condition(0), Condition(0),
Condition(1));
#ifdef V8_TARGET_BIG_ENDIAN
__ vpkls(i.OutputSimd128Register(), kScratchDoubleReg, tempFPReg1,
Condition(0), Condition(1));
#else
__ vpkls(i.OutputSimd128Register(), tempFPReg1, kScratchDoubleReg,
Condition(0), Condition(1));
#endif
break;
}
#undef BINOP_EXTRACT
......
......@@ -3405,9 +3405,10 @@ EVALUATE(VPKLS) {
template <class S, class D>
void VectorUnpackHigh(void* dst, void* src) {
constexpr size_t kItemCount = kSimd128Size / sizeof(D);
D value = 0;
for (size_t i = 0; i < kSimd128Size / sizeof(D); i++) {
value = *(reinterpret_cast<S*>(src) + i);
for (size_t i = 0; i < kItemCount; i++) {
value = *(reinterpret_cast<S*>(src) + i + kItemCount);
memcpy(reinterpret_cast<D*>(dst) + i, &value, sizeof(D));
}
}
......@@ -3462,11 +3463,14 @@ EVALUATE(VUPLH) {
template <class S, class D>
void VectorUnpackLow(void* dst, void* src) {
D value = 0;
size_t count = kSimd128Size / sizeof(D);
for (size_t i = 0; i < count; i++) {
value = *(reinterpret_cast<S*>(src) + i + count);
memcpy(reinterpret_cast<D*>(dst) + i, &value, sizeof(D));
constexpr size_t kItemCount = kSimd128Size / sizeof(D);
D temps[kItemCount] = {0};
// About overwriting if src and dst are the same register.
for (size_t i = 0; i < kItemCount; i++) {
temps[i] = static_cast<D>(*(reinterpret_cast<S*>(src) + i));
}
for (size_t i = 0; i < kItemCount; i++) {
memcpy(reinterpret_cast<D*>(dst) + i, &temps[i], sizeof(D));
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment