Commit a594ff73 authored by ahaas's avatar ahaas Committed by Commit bot

Implemented the ctz Turbo Fan operator for x64.

Ctz is implemented as an optional operator at the moment, which is only
implemented by x64 at the moment.

R=titzer@chromium.org

Review URL: https://codereview.chromium.org/1421163005

Cr-Commit-Position: refs/heads/master@{#31912}
parent 9a569ec2
......@@ -936,6 +936,9 @@ void InstructionSelector::VisitWord32Clz(Node* node) {
void InstructionSelector::VisitWord32Ctz(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord64Ctz(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }
......
......@@ -738,6 +738,8 @@ void InstructionSelector::VisitNode(Node* node) {
return MarkAsWord64(node), VisitWord64Ror(node);
case IrOpcode::kWord64Clz:
return MarkAsWord64(node), VisitWord64Clz(node);
case IrOpcode::kWord64Ctz:
return MarkAsWord64(node), VisitWord64Ctz(node);
case IrOpcode::kWord64Equal:
return VisitWord64Equal(node);
case IrOpcode::kInt32Add:
......@@ -976,6 +978,9 @@ void InstructionSelector::VisitWord64Ror(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Clz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Ctz(Node* node) { UNIMPLEMENTED(); }
void InstructionSelector::VisitWord64Equal(Node* node) { UNIMPLEMENTED(); }
......
......@@ -170,6 +170,7 @@ CheckedStoreRepresentation CheckedStoreRepresentationOf(Operator const* op) {
#define PURE_OPTIONAL_OP_LIST(V) \
V(Word32Ctz, Operator::kNoProperties, 1, 0, 1) \
V(Word64Ctz, Operator::kNoProperties, 1, 0, 1) \
V(Word32Popcnt, Operator::kNoProperties, 1, 0, 1) \
V(Float32Max, Operator::kNoProperties, 2, 0, 1) \
V(Float32Min, Operator::kNoProperties, 2, 0, 1) \
......
......@@ -119,9 +119,11 @@ class MachineOperatorBuilder final : public ZoneObject {
kWord32ShiftIsSafe = 1u << 9,
kWord32Ctz = 1u << 10,
kWord32Popcnt = 1u << 11,
kWord64Ctz = 1u << 12,
kAllOptionalOps = kFloat32Max | kFloat32Min | kFloat64Max | kFloat64Min |
kFloat64RoundDown | kFloat64RoundTruncate |
kFloat64RoundTiesAway | kWord32Ctz | kWord32Popcnt
kFloat64RoundTiesAway | kWord32Ctz | kWord32Popcnt |
kWord64Ctz
};
typedef base::Flags<Flag, unsigned> Flags;
......@@ -149,6 +151,7 @@ class MachineOperatorBuilder final : public ZoneObject {
const Operator* Word64Sar();
const Operator* Word64Ror();
const Operator* Word64Clz();
const OptionalOperator Word64Ctz();
const Operator* Word64Equal();
const Operator* Int32Add();
......
......@@ -324,6 +324,9 @@ void InstructionSelector::VisitWord32Clz(Node* node) {
void InstructionSelector::VisitWord32Ctz(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord64Ctz(Node* node) { UNREACHABLE(); }
void InstructionSelector::VisitWord32Popcnt(Node* node) { UNREACHABLE(); }
......
......@@ -243,6 +243,7 @@
V(Word64Sar) \
V(Word64Ror) \
V(Word64Clz) \
V(Word64Ctz) \
V(Int32Add) \
V(Int32AddWithOverflow) \
V(Int32Sub) \
......
......@@ -1958,6 +1958,9 @@ Type* Typer::Visitor::TypeWord64Ror(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Clz(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Ctz(Node* node) { return Type::Internal(); }
Type* Typer::Visitor::TypeWord64Equal(Node* node) { return Type::Boolean(); }
......
......@@ -829,6 +829,7 @@ void Verifier::Visitor::Check(Node* node) {
case IrOpcode::kWord64Sar:
case IrOpcode::kWord64Ror:
case IrOpcode::kWord64Clz:
case IrOpcode::kWord64Ctz:
case IrOpcode::kWord64Equal:
case IrOpcode::kInt32Add:
case IrOpcode::kInt32AddWithOverflow:
......
......@@ -782,6 +782,13 @@ void CodeGenerator::AssembleArchInstruction(Instruction* instr) {
__ Lzcntl(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Tzcnt:
if (instr->InputAt(0)->IsRegister()) {
__ Tzcntq(i.OutputRegister(), i.InputRegister(0));
} else {
__ Tzcntq(i.OutputRegister(), i.InputOperand(0));
}
break;
case kX64Tzcnt32:
if (instr->InputAt(0)->IsRegister()) {
__ Tzcntl(i.OutputRegister(), i.InputRegister(0));
......
......@@ -48,6 +48,7 @@ namespace compiler {
V(X64Ror32) \
V(X64Lzcnt) \
V(X64Lzcnt32) \
V(X64Tzcnt) \
V(X64Tzcnt32) \
V(X64Popcnt32) \
V(SSEFloat32Cmp) \
......
......@@ -584,6 +584,12 @@ void InstructionSelector::VisitWord32Clz(Node* node) {
}
void InstructionSelector::VisitWord64Ctz(Node* node) {
X64OperandGenerator g(this);
Emit(kX64Tzcnt, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
}
void InstructionSelector::VisitWord32Ctz(Node* node) {
X64OperandGenerator g(this);
Emit(kX64Tzcnt32, g.DefineAsRegister(node), g.Use(node->InputAt(0)));
......@@ -1606,7 +1612,7 @@ InstructionSelector::SupportedMachineOperatorFlags() {
MachineOperatorBuilder::kFloat64Max |
MachineOperatorBuilder::kFloat64Min |
MachineOperatorBuilder::kWord32ShiftIsSafe |
MachineOperatorBuilder::kWord32Ctz;
MachineOperatorBuilder::kWord32Ctz | MachineOperatorBuilder::kWord64Ctz;
if (CpuFeatures::IsSupported(POPCNT)) {
flags |= MachineOperatorBuilder::kWord32Popcnt;
}
......
......@@ -781,6 +781,24 @@ void Assembler::bsfl(Register dst, const Operand& src) {
}
void Assembler::bsfq(Register dst, Register src) {
EnsureSpace ensure_space(this);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBC);
emit_modrm(dst, src);
}
void Assembler::bsfq(Register dst, const Operand& src) {
EnsureSpace ensure_space(this);
emit_rex_64(dst, src);
emit(0x0F);
emit(0xBC);
emit_operand(dst, src);
}
void Assembler::call(Label* L) {
positions_recorder()->WriteRecordedPositions();
EnsureSpace ensure_space(this);
......
......@@ -851,6 +851,8 @@ class Assembler : public AssemblerBase {
void bsrq(Register dst, const Operand& src);
void bsrl(Register dst, Register src);
void bsrl(Register dst, const Operand& src);
void bsfq(Register dst, Register src);
void bsfq(Register dst, const Operand& src);
void bsfl(Register dst, Register src);
void bsfl(Register dst, const Operand& src);
......
......@@ -3189,6 +3189,36 @@ void MacroAssembler::Lzcntq(Register dst, const Operand& src) {
}
void MacroAssembler::Tzcntq(Register dst, Register src) {
if (CpuFeatures::IsSupported(BMI1)) {
CpuFeatureScope scope(this, BMI1);
tzcntq(dst, src);
return;
}
Label not_zero_src;
bsfq(dst, src);
j(not_zero, &not_zero_src, Label::kNear);
// Define the result of tzcnt(0) separately, because bsf(0) is undefined.
Set(dst, 64);
bind(&not_zero_src);
}
void MacroAssembler::Tzcntq(Register dst, const Operand& src) {
if (CpuFeatures::IsSupported(BMI1)) {
CpuFeatureScope scope(this, BMI1);
tzcntq(dst, src);
return;
}
Label not_zero_src;
bsfq(dst, src);
j(not_zero, &not_zero_src, Label::kNear);
// Define the result of tzcnt(0) separately, because bsf(0) is undefined.
Set(dst, 64);
bind(&not_zero_src);
}
void MacroAssembler::Tzcntl(Register dst, Register src) {
if (CpuFeatures::IsSupported(BMI1)) {
CpuFeatureScope scope(this, BMI1);
......
......@@ -1016,6 +1016,9 @@ class MacroAssembler: public Assembler {
void Lzcntl(Register dst, Register src);
void Lzcntl(Register dst, const Operand& src);
void Tzcntq(Register dst, Register src);
void Tzcntq(Register dst, const Operand& src);
void Tzcntl(Register dst, Register src);
void Tzcntl(Register dst, const Operand& src);
......
......@@ -208,6 +208,82 @@ TEST(RunWord64Clz) {
CHECK_EQ(63, m.Call(uint64_t(0x0000000000000001)));
CHECK_EQ(64, m.Call(uint64_t(0x0000000000000000)));
}
TEST(RunWord64Ctz) {
RawMachineAssemblerTester<int32_t> m(kMachUint64);
if (!m.machine()->Word64Ctz().IsSupported()) {
return;
}
m.Return(m.AddNode(m.machine()->Word64Ctz().op(), m.Parameter(0)));
CHECK_EQ(64, m.Call(uint64_t(0x0000000000000000)));
CHECK_EQ(63, m.Call(uint64_t(0x8000000000000000)));
CHECK_EQ(62, m.Call(uint64_t(0x4000000000000000)));
CHECK_EQ(61, m.Call(uint64_t(0x2000000000000000)));
CHECK_EQ(60, m.Call(uint64_t(0x1000000000000000)));
CHECK_EQ(59, m.Call(uint64_t(0xa800000000000000)));
CHECK_EQ(58, m.Call(uint64_t(0xf400000000000000)));
CHECK_EQ(57, m.Call(uint64_t(0x6200000000000000)));
CHECK_EQ(56, m.Call(uint64_t(0x9100000000000000)));
CHECK_EQ(55, m.Call(uint64_t(0xcd80000000000000)));
CHECK_EQ(54, m.Call(uint64_t(0x0940000000000000)));
CHECK_EQ(53, m.Call(uint64_t(0xaf20000000000000)));
CHECK_EQ(52, m.Call(uint64_t(0xac10000000000000)));
CHECK_EQ(51, m.Call(uint64_t(0xe0b8000000000000)));
CHECK_EQ(50, m.Call(uint64_t(0x9ce4000000000000)));
CHECK_EQ(49, m.Call(uint64_t(0xc792000000000000)));
CHECK_EQ(48, m.Call(uint64_t(0xb8f1000000000000)));
CHECK_EQ(47, m.Call(uint64_t(0x3b9f800000000000)));
CHECK_EQ(46, m.Call(uint64_t(0xdb4c400000000000)));
CHECK_EQ(45, m.Call(uint64_t(0xe9a3200000000000)));
CHECK_EQ(44, m.Call(uint64_t(0xfca6100000000000)));
CHECK_EQ(43, m.Call(uint64_t(0x6c8a780000000000)));
CHECK_EQ(42, m.Call(uint64_t(0x8ce5a40000000000)));
CHECK_EQ(41, m.Call(uint64_t(0xcb7d020000000000)));
CHECK_EQ(40, m.Call(uint64_t(0xcb4dc10000000000)));
CHECK_EQ(39, m.Call(uint64_t(0xdfbec58000000000)));
CHECK_EQ(38, m.Call(uint64_t(0x27a9db4000000000)));
CHECK_EQ(37, m.Call(uint64_t(0xde3bcb2000000000)));
CHECK_EQ(36, m.Call(uint64_t(0xd7e8a61000000000)));
CHECK_EQ(35, m.Call(uint64_t(0x9afdbc8800000000)));
CHECK_EQ(34, m.Call(uint64_t(0x9afdbc8400000000)));
CHECK_EQ(33, m.Call(uint64_t(0x9afdbc8200000000)));
CHECK_EQ(32, m.Call(uint64_t(0x9afdbc8100000000)));
CHECK_EQ(31, m.Call(uint64_t(0x0000000080000000)));
CHECK_EQ(30, m.Call(uint64_t(0x0000000040000000)));
CHECK_EQ(29, m.Call(uint64_t(0x0000000020000000)));
CHECK_EQ(28, m.Call(uint64_t(0x0000000010000000)));
CHECK_EQ(27, m.Call(uint64_t(0x00000000a8000000)));
CHECK_EQ(26, m.Call(uint64_t(0x00000000f4000000)));
CHECK_EQ(25, m.Call(uint64_t(0x0000000062000000)));
CHECK_EQ(24, m.Call(uint64_t(0x0000000091000000)));
CHECK_EQ(23, m.Call(uint64_t(0x00000000cd800000)));
CHECK_EQ(22, m.Call(uint64_t(0x0000000009400000)));
CHECK_EQ(21, m.Call(uint64_t(0x00000000af200000)));
CHECK_EQ(20, m.Call(uint64_t(0x00000000ac100000)));
CHECK_EQ(19, m.Call(uint64_t(0x00000000e0b80000)));
CHECK_EQ(18, m.Call(uint64_t(0x000000009ce40000)));
CHECK_EQ(17, m.Call(uint64_t(0x00000000c7920000)));
CHECK_EQ(16, m.Call(uint64_t(0x00000000b8f10000)));
CHECK_EQ(15, m.Call(uint64_t(0x000000003b9f8000)));
CHECK_EQ(14, m.Call(uint64_t(0x00000000db4c4000)));
CHECK_EQ(13, m.Call(uint64_t(0x00000000e9a32000)));
CHECK_EQ(12, m.Call(uint64_t(0x00000000fca61000)));
CHECK_EQ(11, m.Call(uint64_t(0x000000006c8a7800)));
CHECK_EQ(10, m.Call(uint64_t(0x000000008ce5a400)));
CHECK_EQ(9, m.Call(uint64_t(0x00000000cb7d0200)));
CHECK_EQ(8, m.Call(uint64_t(0x00000000cb4dc100)));
CHECK_EQ(7, m.Call(uint64_t(0x00000000dfbec580)));
CHECK_EQ(6, m.Call(uint64_t(0x0000000027a9db40)));
CHECK_EQ(5, m.Call(uint64_t(0x00000000de3bcb20)));
CHECK_EQ(4, m.Call(uint64_t(0x00000000d7e8a610)));
CHECK_EQ(3, m.Call(uint64_t(0x000000009afdbc88)));
CHECK_EQ(2, m.Call(uint64_t(0x000000009afdbc84)));
CHECK_EQ(1, m.Call(uint64_t(0x000000009afdbc82)));
CHECK_EQ(0, m.Call(uint64_t(0x000000009afdbc81)));
}
#endif // V8_TARGET_ARCH_64_BIT
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment