Commit 0761b55d authored by Pierre Langlois's avatar Pierre Langlois Committed by Commit Bot

[cctest] Support testing Simd128 moves and swaps

Extend the code-generator tests to cover AssembleMove and AssembleSwap with
Simd128 registers and stack slots, for targets that support them.

For this to work however, we need support for passing Simd128 stack parameters
in TurboFan which this patch implements for Arm and x86. PPC and S390 both do
not support the Simd128 representation and it appears MIPS and MIPS64's
implementation of AssembleMove and AssembleSwap do not support it either.

As per the design of the tests, the set of values to perform moves on are
represented in a FixedArray of Smis (for kTagged) and HeapNumbers (for kFloat32
and kFloat64). They are converted to raw values for the moves to be performed
on, to be then converted back into a FixedArray. For the kSimd128
representation, we represent values as a FixedArray of 4 Smis, each representing
a lane. They are converted to a raw Simd128 vector using the `I32x4ReplaceLane`
and `I32x4ExtractLane` operations.

Finally, these tests need Simd128 variables mixed with the CodeStubAssembler
which is not a use-case officially supported. And as a result, the `RecordWrite`
stub does not guarantee to preserve Simd128 registers. To get around this, we
have to be careful to skip write barriers when dealing with Simd128 parameters
inside the "teardown" function, and we've had to move all allocations to the
"setup" function.

Thanks to this, we are able to catch bugs such as this one
https://bugs.chromium.org/p/v8/issues/detail?id=6843.

Bug: v8:6848
Change-Id: I8787d6339cdbfcd9356c5e8995925f0b45c562fa
Reviewed-on: https://chromium-review.googlesource.com/728599
Commit-Queue: Pierre Langlois <pierre.langlois@arm.com>
Reviewed-by: 's avatarBenedikt Meurer <bmeurer@chromium.org>
Reviewed-by: 's avatarBill Budge <bbudge@chromium.org>
Cr-Commit-Position: refs/heads/master@{#50326}
parent 78ac6405
......@@ -1345,6 +1345,10 @@ class Assembler : public AssemblerBase {
void pop();
void vpush(QwNeonRegister src, Condition cond = al) {
vstm(db_w, sp, src.low(), src.high(), cond);
}
void vpush(DwVfpRegister src, Condition cond = al) {
vstm(db_w, sp, src, src, cond);
}
......
......@@ -1623,13 +1623,23 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
case kArmPush:
if (instr->InputAt(0)->IsFPRegister()) {
LocationOperand* op = LocationOperand::cast(instr->InputAt(0));
if (op->representation() == MachineRepresentation::kFloat64) {
__ vpush(i.InputDoubleRegister(0));
frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
} else {
DCHECK_EQ(MachineRepresentation::kFloat32, op->representation());
__ vpush(i.InputFloatRegister(0));
frame_access_state()->IncreaseSPDelta(1);
switch (op->representation()) {
case MachineRepresentation::kFloat32:
__ vpush(i.InputFloatRegister(0));
frame_access_state()->IncreaseSPDelta(1);
break;
case MachineRepresentation::kFloat64:
__ vpush(i.InputDoubleRegister(0));
frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
break;
case MachineRepresentation::kSimd128: {
__ vpush(i.InputSimd128Register(0));
frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
break;
}
default:
UNREACHABLE();
break;
}
} else {
__ push(i.InputRegister(0));
......
......@@ -1326,7 +1326,9 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
Register prev = __ StackPointer();
__ SetStackPointer(arch_opcode == kArm64PokeCSP ? csp : jssp);
Operand operand(i.InputInt32(1) * kPointerSize);
if (instr->InputAt(0)->IsFPRegister()) {
if (instr->InputAt(0)->IsSimd128Register()) {
__ Poke(i.InputSimd128Register(0), operand);
} else if (instr->InputAt(0)->IsFPRegister()) {
__ Poke(i.InputFloat64Register(0), operand);
} else {
__ Poke(i.InputOrZeroRegister64(0), operand);
......
......@@ -1687,6 +1687,8 @@ void InstructionSelector::EmitPrepareArguments(
bool always_claim = to_native_stack != from_native_stack;
// `arguments` includes alignment "holes". This means that slots bigger than
// kPointerSize, e.g. Simd128, will span across multiple arguments.
int claim_count = static_cast<int>(arguments->size());
int slot = claim_count - 1;
claim_count = RoundUp(claim_count, 2);
......@@ -1710,8 +1712,12 @@ void InstructionSelector::EmitPrepareArguments(
// Poke the arguments into the stack.
while (slot >= 0) {
Emit(poke, g.NoOutput(), g.UseRegister((*arguments)[slot].node),
g.TempImmediate(slot));
Node* input_node = (*arguments)[slot].node;
// Skip any alignment holes in pushed nodes.
if (input_node != nullptr) {
Emit(poke, g.NoOutput(), g.UseRegister(input_node),
g.TempImmediate(slot));
}
slot--;
// TODO(ahaas): Poke arguments in pairs if two subsequent arguments have the
// same type.
......
......@@ -1568,6 +1568,17 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
}
break;
case kIA32PushSimd128:
if (instr->InputAt(0)->IsFPRegister()) {
__ sub(esp, Immediate(kSimd128Size));
__ movups(Operand(esp, 0), i.InputSimd128Register(0));
} else {
__ movups(kScratchDoubleReg, i.InputOperand(0));
__ sub(esp, Immediate(kSimd128Size));
__ movups(Operand(esp, 0), kScratchDoubleReg);
}
frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
break;
case kIA32Push:
if (AddressingModeField::decode(instr->opcode()) != kMode_None) {
size_t index = 0;
......
......@@ -110,6 +110,7 @@ namespace compiler {
V(IA32Push) \
V(IA32PushFloat32) \
V(IA32PushFloat64) \
V(IA32PushSimd128) \
V(IA32Poke) \
V(IA32Peek) \
V(IA32StackCheck) \
......
......@@ -269,6 +269,7 @@ int InstructionScheduler::GetTargetInstructionFlags(
case kIA32Push:
case kIA32PushFloat32:
case kIA32PushFloat64:
case kIA32PushSimd128:
case kIA32Poke:
return kHasSideEffect;
......
......@@ -1000,6 +1000,8 @@ void InstructionSelector::EmitPrepareArguments(
Emit(kIA32PushFloat32, g.NoOutput(), value);
} else if (input.location.GetType() == MachineType::Float64()) {
Emit(kIA32PushFloat64, g.NoOutput(), value);
} else if (input.location.GetType() == MachineType::Simd128()) {
Emit(kIA32PushSimd128, g.NoOutput(), value);
} else {
Emit(kIA32Push, g.NoOutput(), value);
}
......
......@@ -265,6 +265,11 @@ class MachineRepresentationInferrer {
MachineRepresentation::kFloat64;
}
break;
case IrOpcode::kI32x4ReplaceLane:
case IrOpcode::kI32x4Splat:
representation_vector_[node->id()] =
MachineRepresentation::kSimd128;
break;
#undef LABEL
default:
break;
......@@ -369,6 +374,14 @@ class MachineRepresentationChecker {
CheckValueInputRepresentationIs(node, 0,
MachineRepresentation::kSimd128);
break;
case IrOpcode::kI32x4ReplaceLane:
CheckValueInputRepresentationIs(node, 0,
MachineRepresentation::kSimd128);
CheckValueInputForInt32Op(node, 1);
break;
case IrOpcode::kI32x4Splat:
CheckValueInputForInt32Op(node, 0);
break;
#define LABEL(opcode) case IrOpcode::k##opcode:
case IrOpcode::kChangeInt32ToTagged:
case IrOpcode::kChangeUint32ToTagged:
......
......@@ -1981,18 +1981,37 @@ CodeGenerator::CodeGenResult CodeGenerator::AssembleArchInstruction(
frame_access_state()->IncreaseSPDelta(1);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kPointerSize);
} else if (instr->InputAt(0)->IsFPRegister()) {
} else if (instr->InputAt(0)->IsFloatRegister() ||
instr->InputAt(0)->IsDoubleRegister()) {
// TODO(titzer): use another machine instruction?
__ subq(rsp, Immediate(kDoubleSize));
frame_access_state()->IncreaseSPDelta(kDoubleSize / kPointerSize);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kDoubleSize);
__ Movsd(Operand(rsp, 0), i.InputDoubleRegister(0));
} else {
} else if (instr->InputAt(0)->IsSimd128Register()) {
// TODO(titzer): use another machine instruction?
__ subq(rsp, Immediate(kSimd128Size));
frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSimd128Size);
__ Movups(Operand(rsp, 0), i.InputSimd128Register(0));
} else if (instr->InputAt(0)->IsStackSlot() ||
instr->InputAt(0)->IsFloatStackSlot() ||
instr->InputAt(0)->IsDoubleStackSlot()) {
__ pushq(i.InputOperand(0));
frame_access_state()->IncreaseSPDelta(1);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kPointerSize);
} else {
DCHECK(instr->InputAt(0)->IsSimd128StackSlot());
__ Movups(kScratchDoubleReg, i.InputOperand(0));
// TODO(titzer): use another machine instruction?
__ subq(rsp, Immediate(kSimd128Size));
frame_access_state()->IncreaseSPDelta(kSimd128Size / kPointerSize);
unwinding_info_writer_.MaybeIncreaseBaseOffsetAt(__ pc_offset(),
kSimd128Size);
__ Movups(Operand(rsp, 0), kScratchDoubleReg);
}
break;
case kX64Poke: {
......
......@@ -1441,6 +1441,9 @@ void InstructionSelector::EmitPrepareArguments(
// Push any stack arguments.
int effect_level = GetEffectLevel(node);
for (PushParameter input : base::Reversed(*arguments)) {
// Skip any alignment holes in pushed nodes. We may have one in case of a
// Simd128 stack argument.
if (input.node == nullptr) continue;
if (g.CanBeImmediate(input.node)) {
Emit(kX64Push, g.NoOutput(), g.UseImmediate(input.node));
} else if (IsSupported(ATOM) ||
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment