Commit 1225709e authored by Milad Farazmand's avatar Milad Farazmand Committed by Commit Bot

[wasm-simd] Adding Simd128ReverseBytes to all supported architectures

WASM only supports Little-endian byte ordering and we need a mechanism to
reverse the ordering efficiently on Big-endian machines.
Up until now this was done using TF graphs within wasm-compiler.
The new approach allows for having more machine level optimizations
by introducing the new "kSimd128ReverseBytes" opcode which gets executed
only on Big-endian machines.

Change-Id: I63c6c3c42ca9ff9d9b2af2d45070a70cf1b3cefc
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/1803494Reviewed-by: 's avatarMichael Starzinger <mstarzinger@chromium.org>
Commit-Queue: Milad Farazmand <miladfar@ca.ibm.com>
Cr-Commit-Position: refs/heads/master@{#63875}
parent 6f9b2bd4
...@@ -1120,6 +1120,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { ...@@ -1120,6 +1120,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
VisitRR(this, kArmRev, node); VisitRR(this, kArmRev, node);
} }
void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
UNREACHABLE();
}
void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); } void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitInt32Add(Node* node) { void InstructionSelector::VisitInt32Add(Node* node) {
......
...@@ -771,6 +771,10 @@ void InstructionSelector::VisitProtectedStore(Node* node) { ...@@ -771,6 +771,10 @@ void InstructionSelector::VisitProtectedStore(Node* node) {
UNIMPLEMENTED(); UNIMPLEMENTED();
} }
void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
UNREACHABLE();
}
// Architecture supports unaligned access, therefore VisitLoad is used instead // Architecture supports unaligned access, therefore VisitLoad is used instead
void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); } void InstructionSelector::VisitUnalignedLoad(Node* node) { UNREACHABLE(); }
......
...@@ -890,6 +890,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { ...@@ -890,6 +890,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
Emit(kIA32Bswap, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0))); Emit(kIA32Bswap, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
} }
void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
UNREACHABLE();
}
void InstructionSelector::VisitInt32Add(Node* node) { void InstructionSelector::VisitInt32Add(Node* node) {
IA32OperandGenerator g(this); IA32OperandGenerator g(this);
......
...@@ -1439,6 +1439,8 @@ void InstructionSelector::VisitNode(Node* node) { ...@@ -1439,6 +1439,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsWord64(node), VisitWord64ReverseBits(node); return MarkAsWord64(node), VisitWord64ReverseBits(node);
case IrOpcode::kWord64ReverseBytes: case IrOpcode::kWord64ReverseBytes:
return MarkAsWord64(node), VisitWord64ReverseBytes(node); return MarkAsWord64(node), VisitWord64ReverseBytes(node);
case IrOpcode::kSimd128ReverseBytes:
return MarkAsSimd128(node), VisitSimd128ReverseBytes(node);
case IrOpcode::kInt64AbsWithOverflow: case IrOpcode::kInt64AbsWithOverflow:
return MarkAsWord64(node), VisitInt64AbsWithOverflow(node); return MarkAsWord64(node), VisitInt64AbsWithOverflow(node);
case IrOpcode::kWord64Equal: case IrOpcode::kWord64Equal:
......
...@@ -780,6 +780,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { ...@@ -780,6 +780,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
g.UseRegister(node->InputAt(0))); g.UseRegister(node->InputAt(0)));
} }
void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
UNREACHABLE();
}
void InstructionSelector::VisitWord32Ctz(Node* node) { void InstructionSelector::VisitWord32Ctz(Node* node) {
MipsOperandGenerator g(this); MipsOperandGenerator g(this);
Emit(kMipsCtz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); Emit(kMipsCtz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
......
...@@ -822,6 +822,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { ...@@ -822,6 +822,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
g.UseRegister(node->InputAt(0))); g.UseRegister(node->InputAt(0)));
} }
void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
UNREACHABLE();
}
void InstructionSelector::VisitWord32Ctz(Node* node) { void InstructionSelector::VisitWord32Ctz(Node* node) {
Mips64OperandGenerator g(this); Mips64OperandGenerator g(this);
Emit(kMips64Ctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0))); Emit(kMips64Ctz, g.DefineAsRegister(node), g.UseRegister(node->InputAt(0)));
......
...@@ -926,6 +926,12 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { ...@@ -926,6 +926,12 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
g.UseRegister(node->InputAt(0))); g.UseRegister(node->InputAt(0)));
} }
void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
// TODO(miladfar): Implement the ppc selector for reversing SIMD bytes.
// Check if the input node is a Load and do a Load Reverse at once.
UNIMPLEMENTED();
}
void InstructionSelector::VisitInt32Add(Node* node) { void InstructionSelector::VisitInt32Add(Node* node) {
VisitBinop<Int32BinopMatcher>(this, node, kPPC_Add32, kInt16Imm); VisitBinop<Int32BinopMatcher>(this, node, kPPC_Add32, kInt16Imm);
} }
......
...@@ -1170,6 +1170,12 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { ...@@ -1170,6 +1170,12 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
g.UseRegister(node->InputAt(0))); g.UseRegister(node->InputAt(0)));
} }
void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
// TODO(miladfar): Implement the s390 selector for reversing SIMD bytes.
// Check if the input node is a Load and do a Load Reverse at once.
UNIMPLEMENTED();
}
template <class Matcher, ArchOpcode neg_opcode> template <class Matcher, ArchOpcode neg_opcode>
static inline bool TryMatchNegFromSub(InstructionSelector* selector, static inline bool TryMatchNegFromSub(InstructionSelector* selector,
Node* node) { Node* node) {
......
...@@ -875,6 +875,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) { ...@@ -875,6 +875,10 @@ void InstructionSelector::VisitWord32ReverseBytes(Node* node) {
Emit(kX64Bswap32, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0))); Emit(kX64Bswap32, g.DefineSameAsFirst(node), g.UseRegister(node->InputAt(0)));
} }
void InstructionSelector::VisitSimd128ReverseBytes(Node* node) {
UNREACHABLE();
}
void InstructionSelector::VisitInt32Add(Node* node) { void InstructionSelector::VisitInt32Add(Node* node) {
X64OperandGenerator g(this); X64OperandGenerator g(this);
......
...@@ -146,6 +146,7 @@ MachineType AtomicOpType(Operator const* op) { ...@@ -146,6 +146,7 @@ MachineType AtomicOpType(Operator const* op) {
V(Word64Clz, Operator::kNoProperties, 1, 0, 1) \ V(Word64Clz, Operator::kNoProperties, 1, 0, 1) \
V(Word32ReverseBytes, Operator::kNoProperties, 1, 0, 1) \ V(Word32ReverseBytes, Operator::kNoProperties, 1, 0, 1) \
V(Word64ReverseBytes, Operator::kNoProperties, 1, 0, 1) \ V(Word64ReverseBytes, Operator::kNoProperties, 1, 0, 1) \
V(Simd128ReverseBytes, Operator::kNoProperties, 1, 0, 1) \
V(BitcastTaggedToWordForTagAndSmiBits, Operator::kNoProperties, 1, 0, 1) \ V(BitcastTaggedToWordForTagAndSmiBits, Operator::kNoProperties, 1, 0, 1) \
V(BitcastWordToTaggedSigned, Operator::kNoProperties, 1, 0, 1) \ V(BitcastWordToTaggedSigned, Operator::kNoProperties, 1, 0, 1) \
V(BitcastWord32ToCompressedSigned, Operator::kNoProperties, 1, 0, 1) \ V(BitcastWord32ToCompressedSigned, Operator::kNoProperties, 1, 0, 1) \
......
...@@ -239,6 +239,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final ...@@ -239,6 +239,7 @@ class V8_EXPORT_PRIVATE MachineOperatorBuilder final
const OptionalOperator Word64ReverseBits(); const OptionalOperator Word64ReverseBits();
const Operator* Word32ReverseBytes(); const Operator* Word32ReverseBytes();
const Operator* Word64ReverseBytes(); const Operator* Word64ReverseBytes();
const Operator* Simd128ReverseBytes();
const OptionalOperator Int32AbsWithOverflow(); const OptionalOperator Int32AbsWithOverflow();
const OptionalOperator Int64AbsWithOverflow(); const OptionalOperator Int64AbsWithOverflow();
......
...@@ -669,6 +669,7 @@ ...@@ -669,6 +669,7 @@
V(Word64Ctz) \ V(Word64Ctz) \
V(Word64ReverseBits) \ V(Word64ReverseBits) \
V(Word64ReverseBytes) \ V(Word64ReverseBytes) \
V(Simd128ReverseBytes) \
V(Int64AbsWithOverflow) \ V(Int64AbsWithOverflow) \
V(BitcastTaggedToWord) \ V(BitcastTaggedToWord) \
V(BitcastTaggedToWordForTagAndSmiBits) \ V(BitcastTaggedToWordForTagAndSmiBits) \
......
...@@ -941,6 +941,28 @@ void SimdScalarLowering::LowerNode(Node* node) { ...@@ -941,6 +941,28 @@ void SimdScalarLowering::LowerNode(Node* node) {
} }
break; break;
} }
case IrOpcode::kSimd128ReverseBytes: {
DCHECK_EQ(1, node->InputCount());
bool is_float = ReplacementType(node->InputAt(0)) == SimdType::kFloat32x4;
replacements_[node->id()].type =
is_float ? SimdType::kFloat32x4 : SimdType::kInt32x4;
Node** rep = GetReplacementsWithType(
node->InputAt(0),
is_float ? SimdType::kFloat32x4 : SimdType::kInt32x4);
Node* rep_node[kNumLanes32];
for (int i = 0; i < kNumLanes32; ++i) {
Node* temp = is_float ? graph()->NewNode(
machine()->BitcastFloat32ToInt32(), rep[i])
: rep[i];
temp = graph()->NewNode(machine()->Word32ReverseBytes(), temp);
rep_node[kNumLanes32 - 1 - i] =
is_float
? graph()->NewNode(machine()->BitcastInt32ToFloat32(), temp)
: temp;
}
ReplaceNode(node, rep_node, kNumLanes32);
break;
}
case IrOpcode::kLoad: case IrOpcode::kLoad:
case IrOpcode::kUnalignedLoad: case IrOpcode::kUnalignedLoad:
case IrOpcode::kProtectedLoad: { case IrOpcode::kProtectedLoad: {
......
...@@ -1700,6 +1700,7 @@ void Verifier::Visitor::Check(Node* node, const AllNodes& all) { ...@@ -1700,6 +1700,7 @@ void Verifier::Visitor::Check(Node* node, const AllNodes& all) {
case IrOpcode::kWord64Ctz: case IrOpcode::kWord64Ctz:
case IrOpcode::kWord64ReverseBits: case IrOpcode::kWord64ReverseBits:
case IrOpcode::kWord64ReverseBytes: case IrOpcode::kWord64ReverseBytes:
case IrOpcode::kSimd128ReverseBytes:
case IrOpcode::kInt64AbsWithOverflow: case IrOpcode::kInt64AbsWithOverflow:
case IrOpcode::kWord64Equal: case IrOpcode::kWord64Equal:
case IrOpcode::kInt32Add: case IrOpcode::kInt32Add:
......
...@@ -1266,27 +1266,9 @@ Node* WasmGraphBuilder::BuildChangeEndiannessStore( ...@@ -1266,27 +1266,9 @@ Node* WasmGraphBuilder::BuildChangeEndiannessStore(
case 8: case 8:
result = graph()->NewNode(m->Word64ReverseBytes(), value); result = graph()->NewNode(m->Word64ReverseBytes(), value);
break; break;
case 16: { case 16:
Node* byte_reversed_lanes[4]; result = graph()->NewNode(m->Simd128ReverseBytes(), value);
for (int lane = 0; lane < 4; lane++) {
byte_reversed_lanes[lane] = graph()->NewNode(
m->Word32ReverseBytes(),
graph()->NewNode(mcgraph()->machine()->I32x4ExtractLane(lane),
value));
}
// This is making a copy of the value.
result =
graph()->NewNode(mcgraph()->machine()->S128And(), value, value);
for (int lane = 0; lane < 4; lane++) {
result =
graph()->NewNode(mcgraph()->machine()->I32x4ReplaceLane(3 - lane),
result, byte_reversed_lanes[lane]);
}
break; break;
}
default: default:
UNREACHABLE(); UNREACHABLE();
break; break;
...@@ -1405,27 +1387,9 @@ Node* WasmGraphBuilder::BuildChangeEndiannessLoad(Node* node, ...@@ -1405,27 +1387,9 @@ Node* WasmGraphBuilder::BuildChangeEndiannessLoad(Node* node,
case 8: case 8:
result = graph()->NewNode(m->Word64ReverseBytes(), value); result = graph()->NewNode(m->Word64ReverseBytes(), value);
break; break;
case 16: { case 16:
Node* byte_reversed_lanes[4]; result = graph()->NewNode(m->Simd128ReverseBytes(), value);
for (int lane = 0; lane < 4; lane++) {
byte_reversed_lanes[lane] = graph()->NewNode(
m->Word32ReverseBytes(),
graph()->NewNode(mcgraph()->machine()->I32x4ExtractLane(lane),
value));
}
// This is making a copy of the value.
result =
graph()->NewNode(mcgraph()->machine()->S128And(), value, value);
for (int lane = 0; lane < 4; lane++) {
result =
graph()->NewNode(mcgraph()->machine()->I32x4ReplaceLane(3 - lane),
result, byte_reversed_lanes[lane]);
}
break; break;
}
default: default:
UNREACHABLE(); UNREACHABLE();
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment