Commit feee80b0 authored by Milad Fa's avatar Milad Fa Committed by V8 LUCI CQ

PPC/S390 [simd]: Optimize I8x16Shuffle on codegen

Change-Id: I264a06924d8ffcb8cce1febe9bde18280edeaea0
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2896273Reviewed-by: 's avatarJunliang Yan <junyan@redhat.com>
Commit-Queue: Milad Fa <mfarazma@redhat.com>
Cr-Commit-Position: refs/heads/master@{#74603}
parent b4942eb3
...@@ -3108,15 +3108,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3108,15 +3108,14 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
break; break;
} }
case kPPC_I8x16Shuffle: { case kPPC_I8x16Shuffle: {
constexpr int lane_width_in_bytes = 8;
Simd128Register dst = i.OutputSimd128Register(), Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0), src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1); src1 = i.InputSimd128Register(1);
__ mov(r0, Operand(make_uint64(i.InputUint32(3), i.InputUint32(2)))); uint64_t low = make_uint64(i.InputUint32(3), i.InputUint32(2));
__ mov(ip, Operand(make_uint64(i.InputUint32(5), i.InputUint32(4)))); uint64_t high = make_uint64(i.InputUint32(5), i.InputUint32(4));
__ mtvsrd(kScratchSimd128Reg, r0); __ mov(r0, Operand(low));
__ mtvsrd(dst, ip); __ mov(ip, Operand(high));
__ vinsertd(dst, kScratchSimd128Reg, Operand(1 * lane_width_in_bytes)); __ mtvsrdd(dst, ip, r0);
__ vperm(dst, src0, src1, dst); __ vperm(dst, src0, src1, dst);
break; break;
} }
......
...@@ -3512,15 +3512,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3512,15 +3512,12 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Simd128Register dst = i.OutputSimd128Register(), Simd128Register dst = i.OutputSimd128Register(),
src0 = i.InputSimd128Register(0), src0 = i.InputSimd128Register(0),
src1 = i.InputSimd128Register(1); src1 = i.InputSimd128Register(1);
int32_t k8x16_indices[] = {i.InputInt32(2), i.InputInt32(3), uint64_t low = make_uint64(i.InputUint32(3), i.InputUint32(2));
i.InputInt32(4), i.InputInt32(5)}; uint64_t high = make_uint64(i.InputUint32(5), i.InputUint32(4));
// create 2 * 8 byte inputs indicating new indices __ mov(r0, Operand(low));
for (int i = 0, j = 0; i < 2; i++, j = +2) { __ mov(ip, Operand(high));
__ mov(i < 1 ? ip : r0, Operand(k8x16_indices[j])); __ vlvgp(dst, ip, r0);
__ iihf(i < 1 ? ip : r0, Operand(k8x16_indices[j + 1])); __ vperm(dst, src0, src1, dst, Condition(0), Condition(0));
}
__ vlvgp(kScratchDoubleReg, r0, ip);
__ vperm(dst, src0, src1, kScratchDoubleReg, Condition(0), Condition(0));
break; break;
} }
case kS390_I8x16Swizzle: { case kS390_I8x16Swizzle: {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment