Commit 93210c88 authored by Zhao Jiazhong's avatar Zhao Jiazhong Committed by Commit Bot

[mips][wasm-simd] Bitmask instructions

Port 3406cba8
https://crrev.com/c/2099451

Change-Id: I7217e333f468aa24f25231d24e31c321a2c209b5
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/2224595Reviewed-by: 's avatarClemens Backes <clemensb@chromium.org>
Reviewed-by: 's avatarDeepti Gandluri <gdeepti@chromium.org>
Commit-Queue: Zhao Jiazhong <zhaojiazhong-hf@loongson.cn>
Cr-Commit-Position: refs/heads/master@{#68162}
parent 6feae531
...@@ -2442,6 +2442,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2442,6 +2442,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero); kSimd128RegZero);
break; break;
} }
case kMipsI32x4BitMask: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
__ srli_w(scratch0, src, 31);
__ srli_d(scratch1, scratch0, 31);
__ or_v(scratch0, scratch0, scratch1);
__ shf_w(scratch1, scratch0, 0x0E);
__ slli_d(scratch1, scratch1, 2);
__ or_v(scratch0, scratch0, scratch1);
__ copy_u_b(dst, scratch0, 0);
break;
}
case kMipsI16x8Splat: { case kMipsI16x8Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_h(i.OutputSimd128Register(), i.InputRegister(0)); __ fill_h(i.OutputSimd128Register(), i.InputRegister(0));
...@@ -2609,6 +2624,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2609,6 +2624,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero); kSimd128RegZero);
break; break;
} }
case kMipsI16x8BitMask: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
__ srli_h(scratch0, src, 15);
__ srli_w(scratch1, scratch0, 15);
__ or_v(scratch0, scratch0, scratch1);
__ srli_d(scratch1, scratch0, 30);
__ or_v(scratch0, scratch0, scratch1);
__ shf_w(scratch1, scratch0, 0x0E);
__ slli_d(scratch1, scratch1, 4);
__ or_v(scratch0, scratch0, scratch1);
__ copy_u_b(dst, scratch0, 0);
break;
}
case kMipsI8x16Splat: { case kMipsI8x16Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_b(i.OutputSimd128Register(), i.InputRegister(0)); __ fill_b(i.OutputSimd128Register(), i.InputRegister(0));
...@@ -2776,6 +2808,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2776,6 +2808,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero); kSimd128RegZero);
break; break;
} }
case kMipsI8x16BitMask: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
__ srli_b(scratch0, src, 7);
__ srli_h(scratch1, scratch0, 7);
__ or_v(scratch0, scratch0, scratch1);
__ srli_w(scratch1, scratch0, 14);
__ or_v(scratch0, scratch0, scratch1);
__ srli_d(scratch1, scratch0, 28);
__ or_v(scratch0, scratch0, scratch1);
__ shf_w(scratch1, scratch0, 0x0E);
__ ilvev_b(scratch0, scratch1, scratch0);
__ copy_u_h(dst, scratch0, 0);
break;
}
case kMipsS128And: { case kMipsS128And: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0), __ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
......
...@@ -204,6 +204,7 @@ namespace compiler { ...@@ -204,6 +204,7 @@ namespace compiler {
V(MipsI32x4GtU) \ V(MipsI32x4GtU) \
V(MipsI32x4GeU) \ V(MipsI32x4GeU) \
V(MipsI32x4Abs) \ V(MipsI32x4Abs) \
V(MipsI32x4BitMask) \
V(MipsI16x8Splat) \ V(MipsI16x8Splat) \
V(MipsI16x8ExtractLaneU) \ V(MipsI16x8ExtractLaneU) \
V(MipsI16x8ExtractLaneS) \ V(MipsI16x8ExtractLaneS) \
...@@ -232,6 +233,7 @@ namespace compiler { ...@@ -232,6 +233,7 @@ namespace compiler {
V(MipsI16x8GeU) \ V(MipsI16x8GeU) \
V(MipsI16x8RoundingAverageU) \ V(MipsI16x8RoundingAverageU) \
V(MipsI16x8Abs) \ V(MipsI16x8Abs) \
V(MipsI16x8BitMask) \
V(MipsI8x16Splat) \ V(MipsI8x16Splat) \
V(MipsI8x16ExtractLaneU) \ V(MipsI8x16ExtractLaneU) \
V(MipsI8x16ExtractLaneS) \ V(MipsI8x16ExtractLaneS) \
...@@ -259,6 +261,7 @@ namespace compiler { ...@@ -259,6 +261,7 @@ namespace compiler {
V(MipsI8x16GeU) \ V(MipsI8x16GeU) \
V(MipsI8x16RoundingAverageU) \ V(MipsI8x16RoundingAverageU) \
V(MipsI8x16Abs) \ V(MipsI8x16Abs) \
V(MipsI8x16BitMask) \
V(MipsS128And) \ V(MipsS128And) \
V(MipsS128Or) \ V(MipsS128Or) \
V(MipsS128Xor) \ V(MipsS128Xor) \
......
...@@ -138,6 +138,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -138,6 +138,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI16x8UConvertI8x16High: case kMipsI16x8UConvertI8x16High:
case kMipsI16x8UConvertI8x16Low: case kMipsI16x8UConvertI8x16Low:
case kMipsI16x8Abs: case kMipsI16x8Abs:
case kMipsI16x8BitMask:
case kMipsI32x4Add: case kMipsI32x4Add:
case kMipsI32x4AddHoriz: case kMipsI32x4AddHoriz:
case kMipsI32x4Eq: case kMipsI32x4Eq:
...@@ -166,6 +167,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -166,6 +167,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI32x4UConvertI16x8High: case kMipsI32x4UConvertI16x8High:
case kMipsI32x4UConvertI16x8Low: case kMipsI32x4UConvertI16x8Low:
case kMipsI32x4Abs: case kMipsI32x4Abs:
case kMipsI32x4BitMask:
case kMipsI8x16Add: case kMipsI8x16Add:
case kMipsI8x16AddSaturateS: case kMipsI8x16AddSaturateS:
case kMipsI8x16AddSaturateU: case kMipsI8x16AddSaturateU:
...@@ -195,6 +197,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -195,6 +197,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMipsI8x16SubSaturateU: case kMipsI8x16SubSaturateU:
case kMipsI8x16UConvertI16x8: case kMipsI8x16UConvertI16x8:
case kMipsI8x16Abs: case kMipsI8x16Abs:
case kMipsI8x16BitMask:
case kMipsIns: case kMipsIns:
case kMipsLsa: case kMipsLsa:
case kMipsMaddD: case kMipsMaddD:
......
...@@ -2172,6 +2172,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2172,6 +2172,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4GtU, kMipsI32x4GtU) \ V(I32x4GtU, kMipsI32x4GtU) \
V(I32x4GeU, kMipsI32x4GeU) \ V(I32x4GeU, kMipsI32x4GeU) \
V(I32x4Abs, kMipsI32x4Abs) \ V(I32x4Abs, kMipsI32x4Abs) \
V(I32x4BitMask, kMipsI32x4BitMask) \
V(I16x8Add, kMipsI16x8Add) \ V(I16x8Add, kMipsI16x8Add) \
V(I16x8AddSaturateS, kMipsI16x8AddSaturateS) \ V(I16x8AddSaturateS, kMipsI16x8AddSaturateS) \
V(I16x8AddSaturateU, kMipsI16x8AddSaturateU) \ V(I16x8AddSaturateU, kMipsI16x8AddSaturateU) \
...@@ -2194,6 +2195,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2194,6 +2195,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I16x8UConvertI32x4, kMipsI16x8UConvertI32x4) \ V(I16x8UConvertI32x4, kMipsI16x8UConvertI32x4) \
V(I16x8RoundingAverageU, kMipsI16x8RoundingAverageU) \ V(I16x8RoundingAverageU, kMipsI16x8RoundingAverageU) \
V(I16x8Abs, kMipsI16x8Abs) \ V(I16x8Abs, kMipsI16x8Abs) \
V(I16x8BitMask, kMipsI16x8BitMask) \
V(I8x16Add, kMipsI8x16Add) \ V(I8x16Add, kMipsI8x16Add) \
V(I8x16AddSaturateS, kMipsI8x16AddSaturateS) \ V(I8x16AddSaturateS, kMipsI8x16AddSaturateS) \
V(I8x16AddSaturateU, kMipsI8x16AddSaturateU) \ V(I8x16AddSaturateU, kMipsI8x16AddSaturateU) \
...@@ -2215,6 +2217,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2215,6 +2217,7 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I8x16SConvertI16x8, kMipsI8x16SConvertI16x8) \ V(I8x16SConvertI16x8, kMipsI8x16SConvertI16x8) \
V(I8x16UConvertI16x8, kMipsI8x16UConvertI16x8) \ V(I8x16UConvertI16x8, kMipsI8x16UConvertI16x8) \
V(I8x16Abs, kMipsI8x16Abs) \ V(I8x16Abs, kMipsI8x16Abs) \
V(I8x16BitMask, kMipsI8x16BitMask) \
V(S128And, kMipsS128And) \ V(S128And, kMipsS128And) \
V(S128Or, kMipsS128Or) \ V(S128Or, kMipsS128Or) \
V(S128Xor, kMipsS128Xor) \ V(S128Xor, kMipsS128Xor) \
......
...@@ -2634,6 +2634,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2634,6 +2634,21 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero); kSimd128RegZero);
break; break;
} }
case kMips64I32x4BitMask: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
__ srli_w(scratch0, src, 31);
__ srli_d(scratch1, scratch0, 31);
__ or_v(scratch0, scratch0, scratch1);
__ shf_w(scratch1, scratch0, 0x0E);
__ slli_d(scratch1, scratch1, 2);
__ or_v(scratch0, scratch0, scratch1);
__ copy_u_b(dst, scratch0, 0);
break;
}
case kMips64I16x8Splat: { case kMips64I16x8Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_h(i.OutputSimd128Register(), i.InputRegister(0)); __ fill_h(i.OutputSimd128Register(), i.InputRegister(0));
...@@ -2820,6 +2835,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -2820,6 +2835,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero); kSimd128RegZero);
break; break;
} }
case kMips64I16x8BitMask: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
__ srli_h(scratch0, src, 15);
__ srli_w(scratch1, scratch0, 15);
__ or_v(scratch0, scratch0, scratch1);
__ srli_d(scratch1, scratch0, 30);
__ or_v(scratch0, scratch0, scratch1);
__ shf_w(scratch1, scratch0, 0x0E);
__ slli_d(scratch1, scratch1, 4);
__ or_v(scratch0, scratch0, scratch1);
__ copy_u_b(dst, scratch0, 0);
break;
}
case kMips64I8x16Splat: { case kMips64I8x16Splat: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ fill_b(i.OutputSimd128Register(), i.InputRegister(0)); __ fill_b(i.OutputSimd128Register(), i.InputRegister(0));
...@@ -3006,6 +3038,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction( ...@@ -3006,6 +3038,24 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
kSimd128RegZero); kSimd128RegZero);
break; break;
} }
case kMips64I8x16BitMask: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
Register dst = i.OutputRegister();
Simd128Register src = i.InputSimd128Register(0);
Simd128Register scratch0 = kSimd128RegZero;
Simd128Register scratch1 = kSimd128ScratchReg;
__ srli_b(scratch0, src, 7);
__ srli_h(scratch1, scratch0, 7);
__ or_v(scratch0, scratch0, scratch1);
__ srli_w(scratch1, scratch0, 14);
__ or_v(scratch0, scratch0, scratch1);
__ srli_d(scratch1, scratch0, 28);
__ or_v(scratch0, scratch0, scratch1);
__ shf_w(scratch1, scratch0, 0x0E);
__ ilvev_b(scratch0, scratch1, scratch0);
__ copy_u_h(dst, scratch0, 0);
break;
}
case kMips64S128And: { case kMips64S128And: {
CpuFeatureScope msa_scope(tasm(), MIPS_SIMD); CpuFeatureScope msa_scope(tasm(), MIPS_SIMD);
__ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0), __ and_v(i.OutputSimd128Register(), i.InputSimd128Register(0),
......
...@@ -237,6 +237,7 @@ namespace compiler { ...@@ -237,6 +237,7 @@ namespace compiler {
V(Mips64I32x4GtU) \ V(Mips64I32x4GtU) \
V(Mips64I32x4GeU) \ V(Mips64I32x4GeU) \
V(Mips64I32x4Abs) \ V(Mips64I32x4Abs) \
V(Mips64I32x4BitMask) \
V(Mips64I16x8Splat) \ V(Mips64I16x8Splat) \
V(Mips64I16x8ExtractLaneU) \ V(Mips64I16x8ExtractLaneU) \
V(Mips64I16x8ExtractLaneS) \ V(Mips64I16x8ExtractLaneS) \
...@@ -265,6 +266,7 @@ namespace compiler { ...@@ -265,6 +266,7 @@ namespace compiler {
V(Mips64I16x8GeU) \ V(Mips64I16x8GeU) \
V(Mips64I16x8RoundingAverageU) \ V(Mips64I16x8RoundingAverageU) \
V(Mips64I16x8Abs) \ V(Mips64I16x8Abs) \
V(Mips64I16x8BitMask) \
V(Mips64I8x16Splat) \ V(Mips64I8x16Splat) \
V(Mips64I8x16ExtractLaneU) \ V(Mips64I8x16ExtractLaneU) \
V(Mips64I8x16ExtractLaneS) \ V(Mips64I8x16ExtractLaneS) \
...@@ -292,6 +294,7 @@ namespace compiler { ...@@ -292,6 +294,7 @@ namespace compiler {
V(Mips64I8x16GeU) \ V(Mips64I8x16GeU) \
V(Mips64I8x16RoundingAverageU) \ V(Mips64I8x16RoundingAverageU) \
V(Mips64I8x16Abs) \ V(Mips64I8x16Abs) \
V(Mips64I8x16BitMask) \
V(Mips64S128And) \ V(Mips64S128And) \
V(Mips64S128Or) \ V(Mips64S128Or) \
V(Mips64S128Xor) \ V(Mips64S128Xor) \
......
...@@ -171,6 +171,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -171,6 +171,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I16x8UConvertI8x16Low: case kMips64I16x8UConvertI8x16Low:
case kMips64I16x8RoundingAverageU: case kMips64I16x8RoundingAverageU:
case kMips64I16x8Abs: case kMips64I16x8Abs:
case kMips64I16x8BitMask:
case kMips64I32x4Add: case kMips64I32x4Add:
case kMips64I32x4AddHoriz: case kMips64I32x4AddHoriz:
case kMips64I32x4Eq: case kMips64I32x4Eq:
...@@ -199,6 +200,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -199,6 +200,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I32x4UConvertI16x8High: case kMips64I32x4UConvertI16x8High:
case kMips64I32x4UConvertI16x8Low: case kMips64I32x4UConvertI16x8Low:
case kMips64I32x4Abs: case kMips64I32x4Abs:
case kMips64I32x4BitMask:
case kMips64I8x16Add: case kMips64I8x16Add:
case kMips64I8x16AddSaturateS: case kMips64I8x16AddSaturateS:
case kMips64I8x16AddSaturateU: case kMips64I8x16AddSaturateU:
...@@ -226,6 +228,7 @@ int InstructionScheduler::GetTargetInstructionFlags( ...@@ -226,6 +228,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kMips64I8x16SubSaturateU: case kMips64I8x16SubSaturateU:
case kMips64I8x16RoundingAverageU: case kMips64I8x16RoundingAverageU:
case kMips64I8x16Abs: case kMips64I8x16Abs:
case kMips64I8x16BitMask:
case kMips64Ins: case kMips64Ins:
case kMips64Lsa: case kMips64Lsa:
case kMips64MaxD: case kMips64MaxD:
......
...@@ -2778,14 +2778,17 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) { ...@@ -2778,14 +2778,17 @@ void InstructionSelector::VisitInt64AbsWithOverflow(Node* node) {
V(I32x4UConvertI16x8Low, kMips64I32x4UConvertI16x8Low) \ V(I32x4UConvertI16x8Low, kMips64I32x4UConvertI16x8Low) \
V(I32x4UConvertI16x8High, kMips64I32x4UConvertI16x8High) \ V(I32x4UConvertI16x8High, kMips64I32x4UConvertI16x8High) \
V(I32x4Abs, kMips64I32x4Abs) \ V(I32x4Abs, kMips64I32x4Abs) \
V(I32x4BitMask, kMips64I32x4BitMask) \
V(I16x8Neg, kMips64I16x8Neg) \ V(I16x8Neg, kMips64I16x8Neg) \
V(I16x8SConvertI8x16Low, kMips64I16x8SConvertI8x16Low) \ V(I16x8SConvertI8x16Low, kMips64I16x8SConvertI8x16Low) \
V(I16x8SConvertI8x16High, kMips64I16x8SConvertI8x16High) \ V(I16x8SConvertI8x16High, kMips64I16x8SConvertI8x16High) \
V(I16x8UConvertI8x16Low, kMips64I16x8UConvertI8x16Low) \ V(I16x8UConvertI8x16Low, kMips64I16x8UConvertI8x16Low) \
V(I16x8UConvertI8x16High, kMips64I16x8UConvertI8x16High) \ V(I16x8UConvertI8x16High, kMips64I16x8UConvertI8x16High) \
V(I16x8Abs, kMips64I16x8Abs) \ V(I16x8Abs, kMips64I16x8Abs) \
V(I16x8BitMask, kMips64I16x8BitMask) \
V(I8x16Neg, kMips64I8x16Neg) \ V(I8x16Neg, kMips64I8x16Neg) \
V(I8x16Abs, kMips64I8x16Abs) \ V(I8x16Abs, kMips64I8x16Abs) \
V(I8x16BitMask, kMips64I8x16BitMask) \
V(S128Not, kMips64S128Not) \ V(S128Not, kMips64S128Not) \
V(V32x4AnyTrue, kMips64V32x4AnyTrue) \ V(V32x4AnyTrue, kMips64V32x4AnyTrue) \
V(V32x4AllTrue, kMips64V32x4AllTrue) \ V(V32x4AllTrue, kMips64V32x4AllTrue) \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment