my_reviews.py 11.5 KB
Newer Older
1
#!/usr/bin/env python
2
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 4 5
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

6
"""Get rietveld stats about the review you done, or forgot to do.
7 8

Example:
9
  - my_reviews.py -r me@chromium.org -Q  for stats for last quarter.
10 11
"""
import datetime
12
import math
13 14 15 16
import optparse
import os
import sys

17
import auth
18 19 20
import rietveld


21
def username(email):
22
  """Keeps the username of an email address."""
23
  return email.split('@', 1)[0]
24

25

26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
def to_datetime(string):
  """Load UTC time as a string into a datetime object."""
  try:
    # Format is 2011-07-05 01:26:12.084316
    return datetime.datetime.strptime(
        string.split('.', 1)[0], '%Y-%m-%d %H:%M:%S')
  except ValueError:
    return datetime.datetime.strptime(string, '%Y-%m-%d')


def to_time(seconds):
  """Convert a number of seconds into human readable compact string."""
  prefix = ''
  if seconds < 0:
    prefix = '-'
    seconds *= -1
  minutes = math.floor(seconds / 60)
  seconds -= minutes * 60
  hours = math.floor(minutes / 60)
  minutes -= hours * 60
  days = math.floor(hours / 24)
  hours -= days * 24
  out = []
  if days > 0:
    out.append('%dd' % days)
  if hours > 0 or days > 0:
    out.append('%02dh' % hours)
  if minutes > 0 or hours > 0 or days > 0:
    out.append('%02dm' % minutes)
  if seconds > 0 and not out:
    # Skip seconds unless there's only seconds.
    out.append('%02ds' % seconds)
  return prefix + ''.join(out)


class Stats(object):
  def __init__(self):
    self.total = 0
    self.actually_reviewed = 0
65
    self.latencies = []
66 67 68 69
    self.lgtms = 0
    self.multiple_lgtms = 0
    self.drive_by = 0
    self.not_requested = 0
70
    self.self_review = 0
71 72 73 74

    self.percent_lgtm = 0.
    self.percent_drive_by = 0.
    self.percent_not_requested = 0.
75
    self.days = 0
76

77 78 79 80 81 82 83 84 85 86 87 88 89
  @property
  def average_latency(self):
    if not self.latencies:
      return 0
    return sum(self.latencies) / float(len(self.latencies))

  @property
  def median_latency(self):
    if not self.latencies:
      return 0
    length = len(self.latencies)
    latencies = sorted(self.latencies)
    if (length & 1) == 0:
90
      return (latencies[length/2] + latencies[length/2-1]) / 2.
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110
    else:
      return latencies[length/2]

  @property
  def percent_done(self):
    if not self.total:
      return 0
    return self.actually_reviewed * 100. / self.total

  @property
  def review_per_day(self):
    if not self.days:
      return 0
    return self.total * 1. / self.days

  @property
  def review_done_per_day(self):
    if not self.days:
      return 0
    return self.actually_reviewed * 1. / self.days
111 112 113

  def finalize(self, first_day, last_day):
    if self.actually_reviewed:
114
      assert self.actually_reviewed > 0
115 116 117 118
      self.percent_lgtm = (self.lgtms * 100. / self.actually_reviewed)
      self.percent_drive_by = (self.drive_by * 100. / self.actually_reviewed)
      self.percent_not_requested = (
          self.not_requested * 100. / self.actually_reviewed)
119
    assert bool(first_day) == bool(last_day)
120
    if first_day and last_day:
121
      assert first_day <= last_day
122
      self.days = (to_datetime(last_day) - to_datetime(first_day)).days + 1
123
      assert self.days > 0
124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172


def _process_issue_lgtms(issue, reviewer, stats):
  """Calculates LGTMs stats."""
  stats.actually_reviewed += 1
  reviewer_lgtms = len([
    msg for msg in issue['messages']
    if msg['approval'] and msg['sender'] == reviewer])
  if reviewer_lgtms > 1:
    stats.multiple_lgtms += 1
    return ' X '
  if reviewer_lgtms:
    stats.lgtms += 1
    return ' x '
  else:
    return ' o '


def _process_issue_latency(issue, reviewer, stats):
  """Calculates latency for an issue that was actually reviewed."""
  from_owner = [
    msg for msg in issue['messages'] if msg['sender'] == issue['owner_email']
  ]
  if not from_owner:
    # Probably requested by email.
    stats.not_requested += 1
    return '<no rqst sent>'

  first_msg_from_owner = None
  latency = None
  received = False
  for index, msg in enumerate(issue['messages']):
    if not first_msg_from_owner and msg['sender'] == issue['owner_email']:
      first_msg_from_owner = msg
    if index and not received and msg['sender'] == reviewer:
      # Not first email, reviewer never received one, reviewer sent a mesage.
      stats.drive_by += 1
      return '<drive-by>'
    received |= reviewer in msg['recipients']

    if first_msg_from_owner and msg['sender'] == reviewer:
      delta = msg['date'] - first_msg_from_owner['date']
      latency = delta.seconds + delta.days * 24 * 3600
      break

  if latency is None:
    stats.not_requested += 1
    return '<no rqst sent>'
  if latency > 0:
173
    stats.latencies.append(latency)
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
  else:
    stats.not_requested += 1
  return to_time(latency)


def _process_issue(issue):
  """Preprocesses the issue to simplify the remaining code."""
  issue['owner_email'] = username(issue['owner_email'])
  issue['reviewers'] = set(username(r) for r in issue['reviewers'])
  # By default, hide commit-bot.
  issue['reviewers'] -= set(['commit-bot'])
  for msg in issue['messages']:
    msg['sender'] = username(msg['sender'])
    msg['recipients'] = [username(r) for r in msg['recipients']]
    # Convert all times to datetime instances.
    msg['date'] = to_datetime(msg['date'])
  issue['messages'].sort(key=lambda x: x['date'])


def print_issue(issue, reviewer, stats):
  """Process an issue and prints stats about it."""
  stats.total += 1
  _process_issue(issue)
197 198 199 200 201
  if issue['owner_email'] == reviewer:
    stats.self_review += 1
    latency = '<self review>'
    reviewed = ''
  elif any(msg['sender'] == reviewer for msg in issue['messages']):
202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
    reviewed = _process_issue_lgtms(issue, reviewer, stats)
    latency = _process_issue_latency(issue, reviewer, stats)
  else:
    latency = 'N/A'
    reviewed = ''

  # More information is available, print issue.keys() to see them.
  print '%7d %10s %3s %14s %-15s  %s' % (
      issue['issue'],
      issue['created'][:10],
      reviewed,
      latency,
      issue['owner_email'],
      ', '.join(sorted(issue['reviewers'])))


218 219
def print_reviews(
    reviewer, created_after, created_before, instance_url, auth_config):
220
  """Prints issues |reviewer| received and potentially reviewed."""
221
  remote = rietveld.Rietveld(instance_url, auth_config)
222 223 224 225 226 227 228

  # The stats we gather. Feel free to send me a CL to get more stats.
  stats = Stats()

  # Column sizes need to match print_issue() output.
  print >> sys.stderr, (
      'Issue   Creation   Did         Latency Owner           Reviewers')
229 230

  # See def search() in rietveld.py to see all the filters you can use.
231
  issues = []
232 233 234 235
  for issue in remote.search(
      reviewer=reviewer,
      created_after=created_after,
      created_before=created_before,
236
      with_messages=True):
237
    issues.append(issue)
238
    print_issue(issue, username(reviewer), stats)
239 240 241 242 243 244 245

  issues.sort(key=lambda x: x['created'])
  first_day = None
  last_day = None
  if issues:
    first_day = issues[0]['created'][:10]
    last_day = issues[-1]['created'][:10]
246 247 248
  stats.finalize(first_day, last_day)

  print >> sys.stderr, (
249 250 251
      '%s reviewed %d issues out of %d (%1.1f%%). %d were self-review.' %
      (reviewer, stats.actually_reviewed, stats.total, stats.percent_done,
        stats.self_review))
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
  print >> sys.stderr, (
      '%4.1f review request/day during %3d days   (%4.1f r/d done).' % (
      stats.review_per_day, stats.days, stats.review_done_per_day))
  print >> sys.stderr, (
      '%4d were drive-bys                       (%5.1f%% of reviews done).' % (
        stats.drive_by, stats.percent_drive_by))
  print >> sys.stderr, (
      '%4d were requested over IM or irc        (%5.1f%% of reviews done).' % (
        stats.not_requested, stats.percent_not_requested))
  print >> sys.stderr, (
      ('%4d issues LGTM\'d                        (%5.1f%% of reviews done),'
       ' gave multiple LGTMs on %d issues.') % (
      stats.lgtms, stats.percent_lgtm, stats.multiple_lgtms))
  print >> sys.stderr, (
      'Average latency from request to first comment is %s.' %
      to_time(stats.average_latency))
268 269 270
  print >> sys.stderr, (
      'Median latency from request to first comment is %s.' %
      to_time(stats.median_latency))
271 272


273 274 275
def print_count(
    reviewer, created_after, created_before, instance_url, auth_config):
  remote = rietveld.Rietveld(instance_url, auth_config)
276 277 278 279
  print len(list(remote.search(
      reviewer=reviewer,
      created_after=created_after,
      created_before=created_before,
280
      keys_only=True)))
281 282


283 284 285 286 287 288
def get_previous_quarter(today):
  """There are four quarters, 01-03, 04-06, 07-09, 10-12.

  If today is in the last month of a quarter, assume it's the current quarter
  that is requested.
  """
289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
  end_year = today.year
  end_month = today.month - (today.month % 3) + 1
  if end_month <= 0:
    end_year -= 1
    end_month += 12
  if end_month > 12:
    end_year += 1
    end_month -= 12
  end = '%d-%02d-01' % (end_year, end_month)
  begin_year = end_year
  begin_month = end_month - 3
  if begin_month <= 0:
    begin_year -= 1
    begin_month += 12
  begin = '%d-%02d-01' % (begin_year, begin_month)
  return begin, end
305 306 307


def main():
308 309
  # Silence upload.py.
  rietveld.upload.verbosity = 0
310
  today = datetime.date.today()
311
  begin, end = get_previous_quarter(today)
312 313 314 315 316 317 318
  default_email = os.environ.get('EMAIL_ADDRESS')
  if not default_email:
    user = os.environ.get('USER')
    if user:
      default_email = user + '@chromium.org'

  parser = optparse.OptionParser(description=__doc__)
319 320 321
  parser.add_option(
      '--count', action='store_true',
      help='Just count instead of printing individual issues')
322
  parser.add_option(
323
      '-r', '--reviewer', metavar='<email>', default=default_email,
324
      help='Filter on issue reviewer, default=%default')
325
  parser.add_option(
326
      '-b', '--begin', metavar='<date>',
327 328
      help='Filter issues created after the date')
  parser.add_option(
329 330
      '-e', '--end', metavar='<date>',
      help='Filter issues created before the date')
331 332
  parser.add_option(
      '-Q', '--last_quarter', action='store_true',
333
      help='Use last quarter\'s dates, e.g. %s to %s' % (begin, end))
334 335 336 337
  parser.add_option(
      '-i', '--instance_url', metavar='<host>',
      default='http://codereview.chromium.org',
      help='Host to use, default is %default')
338
  auth.add_auth_options(parser)
339
  # Remove description formatting
340
  parser.format_description = (
341
      lambda _: parser.description)  # pylint: disable=no-member
342
  options, args = parser.parse_args()
343
  auth_config = auth.extract_auth_config_from_options(options)
344 345
  if args:
    parser.error('Args unsupported')
346 347 348
  if options.reviewer is None:
    parser.error('$EMAIL_ADDRESS and $USER are not set, please use -r')

349
  print >> sys.stderr, 'Searching for reviews by %s' % options.reviewer
350
  if options.last_quarter:
351 352
    options.begin = begin
    options.end = end
353
    print >> sys.stderr, 'Using range %s to %s' % (
354
        options.begin, options.end)
355 356 357
  else:
    if options.begin is None or options.end is None:
      parser.error('Please specify either --last_quarter or --begin and --end')
358 359 360 361 362 363 364 365

  # Validate dates.
  try:
    to_datetime(options.begin)
    to_datetime(options.end)
  except ValueError as e:
    parser.error('%s: %s - %s' % (e, options.begin, options.end))

366 367
  if options.count:
    print_count(
368
        options.reviewer,
369 370
        options.begin,
        options.end,
371 372
        options.instance_url,
        auth_config)
373 374
  else:
    print_reviews(
375
        options.reviewer,
376 377
        options.begin,
        options.end,
378 379
        options.instance_url,
        auth_config)
380 381 382 383
  return 0


if __name__ == '__main__':
384 385 386 387 388
  try:
    sys.exit(main())
  except KeyboardInterrupt:
    sys.stderr.write('interrupted\n')
    sys.exit(1)