Commit 1a54c22b authored by dpranke@chromium.org's avatar dpranke@chromium.org

Rework the owner-suggesting algorithm.

It turns out that we were weighting all possible owners equally,
and picking the last one out of the list. Given the way we traversed
owners files, and given that we got rid of the "set noparent"s, this
meant that we were always suggesting Ben for just about everything.

This change implements a much smarter algorithm that attempts to balance
number of reviewers and closeness to the files under review. The unit
tests added show specific examples and explanations for why things are
chosen the way they are.

R=maruel@chromium.org
BUG=76727


Review URL: https://chromiumcodereview.appspot.com/11567052

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@173784 0039d316-1c4b-4281-b951-d872f2087c98
parent e6ce537f
...@@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py. ...@@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py.
""" """
import collections import collections
import random
import re import re
...@@ -252,60 +253,59 @@ class Database(object): ...@@ -252,60 +253,59 @@ class Database(object):
('%s is not a "set" directive, "*", ' ('%s is not a "set" directive, "*", '
'or an email address: "%s"' % (line_type, directive))) 'or an email address: "%s"' % (line_type, directive)))
def _covering_set_of_owners_for(self, files): def _covering_set_of_owners_for(self, files):
# Get the set of directories from the files. dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
dirs = set() all_possible_owners = self._all_possible_owners(dirs_remaining)
for f in files: suggested_owners = set()
dirs.add(self._enclosing_dir_with_owners(f)) while dirs_remaining:
owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
suggested_owners.add(owner)
owned_dirs = {} for dirname, _ in all_possible_owners[owner]:
dir_owners = {} dirs_remaining.remove(dirname)
return suggested_owners
def _all_possible_owners(self, dirs):
"""Returns a list of (potential owner, distance-from-dir) tuples; a
distance of 1 is the lowest/closest possible distance (which makes the
subsequent math easier)."""
all_possible_owners = {}
for current_dir in dirs: for current_dir in dirs:
# Get the list of owners for each directory.
current_owners = set()
dirname = current_dir dirname = current_dir
while dirname in self.owners_for: distance = 1
current_owners |= self.owners_for[dirname] while True:
for owner in self.owners_for.get(dirname, []):
all_possible_owners.setdefault(owner, [])
all_possible_owners[owner].append((current_dir, distance))
if self._stop_looking(dirname): if self._stop_looking(dirname):
break break
prev_parent = dirname
dirname = self.os_path.dirname(dirname) dirname = self.os_path.dirname(dirname)
if prev_parent == dirname: distance += 1
break return all_possible_owners
# Map each directory to a list of its owners. @staticmethod
dir_owners[current_dir] = current_owners def lowest_cost_owner(all_possible_owners, dirs):
# We want to minimize both the number of reviewers and the distance
# Add the directory to the list of each owner. # from the files/dirs needing reviews. The "pow(X, 1.75)" below is
for owner in current_owners: # an arbitrarily-selected scaling factor that seems to work well - it
owned_dirs.setdefault(owner, set()).add(current_dir) # will select one reviewer in the parent directory over three reviewers
# in subdirs, but not one reviewer over just two.
final_owners = set() total_costs_by_owner = {}
while dirs: for owner in all_possible_owners:
# Find the owner that has the most directories. total_distance = 0
max_count = 0 num_directories_owned = 0
max_owner = None for dirname, distance in all_possible_owners[owner]:
owner_count = {} if dirname in dirs:
for dirname in dirs: total_distance += distance
for owner in dir_owners[dirname]: num_directories_owned += 1
count = owner_count.get(owner, 0) + 1 if num_directories_owned:
owner_count[owner] = count total_costs_by_owner[owner] = (total_distance /
if count >= max_count: pow(num_directories_owned, 1.75))
max_owner = owner
max_count = count # Return the lowest cost owner. In the case of a tie, pick one randomly.
lowest_cost = min(total_costs_by_owner.itervalues())
# If no more directories have OWNERS, we're done. lowest_cost_owners = filter(
if not max_owner: lambda owner: total_costs_by_owner[owner] == lowest_cost,
break total_costs_by_owner)
return random.Random().choice(lowest_cost_owners)
final_owners.add(max_owner)
# Remove all directories owned by the current owner from the remaining
# list.
for dirname in owned_dirs[max_owner]:
dirs.discard(dirname)
return final_owners
...@@ -58,7 +58,7 @@ def test_repo(): ...@@ -58,7 +58,7 @@ def test_repo():
}) })
class OwnersDatabaseTest(unittest.TestCase): class _BaseTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
self.repo = test_repo() self.repo = test_repo()
self.files = self.repo.files self.files = self.repo.files
...@@ -73,6 +73,8 @@ class OwnersDatabaseTest(unittest.TestCase): ...@@ -73,6 +73,8 @@ class OwnersDatabaseTest(unittest.TestCase):
glob = glob or self.glob glob = glob or self.glob
return owners.Database(root, fopen, os_path, glob) return owners.Database(root, fopen, os_path, glob)
class OwnersDatabaseTest(_BaseTestCase):
def test_constructor(self): def test_constructor(self):
self.assertNotEquals(self.db(), None) self.assertNotEquals(self.db(), None)
...@@ -205,16 +207,41 @@ class OwnersDatabaseTest(unittest.TestCase): ...@@ -205,16 +207,41 @@ class OwnersDatabaseTest(unittest.TestCase):
self.assertRaises(owners.SyntaxErrorInOwnersFile, self.assertRaises(owners.SyntaxErrorInOwnersFile,
self.db().directories_not_covered_by, ['DEPS'], [brett]) self.db().directories_not_covered_by, ['DEPS'], [brett])
def assert_reviewers_for(self, files, expected_reviewers): def assert_syntax_error(self, owners_file_contents):
db = self.db()
self.files['/foo/OWNERS'] = owners_file_contents
self.files['/foo/DEPS'] = ''
try:
db.reviewers_for(['foo/DEPS'])
self.fail() # pragma: no cover
except owners.SyntaxErrorInOwnersFile, e:
self.assertTrue(str(e).startswith('/foo/OWNERS:1'))
def test_syntax_error__unknown_token(self):
self.assert_syntax_error('{}\n')
def test_syntax_error__unknown_set(self):
self.assert_syntax_error('set myfatherisbillgates\n')
def test_syntax_error__bad_email(self):
self.assert_syntax_error('ben\n')
class ReviewersForTest(_BaseTestCase):
def assert_reviewers_for(self, files, *potential_suggested_reviewers):
db = self.db() db = self.db()
self.assertEquals(db.reviewers_for(set(files)), set(expected_reviewers)) suggested_reviewers = db.reviewers_for(set(files))
self.assertTrue(suggested_reviewers in
[set(suggestion) for suggestion in potential_suggested_reviewers])
def test_reviewers_for__basic_functionality(self): def test_reviewers_for__basic_functionality(self):
self.assert_reviewers_for(['chrome/gpu/gpu_channel.h'], self.assert_reviewers_for(['chrome/gpu/gpu_channel.h'],
[brett]) [ken])
def test_reviewers_for__set_noparent_works(self): def test_reviewers_for__set_noparent_works(self):
self.assert_reviewers_for(['content/content.gyp'], [darin]) self.assert_reviewers_for(['content/content.gyp'],
[john],
[darin])
def test_reviewers_for__valid_inputs(self): def test_reviewers_for__valid_inputs(self):
db = self.db() db = self.db()
...@@ -236,15 +263,16 @@ class OwnersDatabaseTest(unittest.TestCase): ...@@ -236,15 +263,16 @@ class OwnersDatabaseTest(unittest.TestCase):
self.assert_reviewers_for([ self.assert_reviewers_for([
'chrome/gpu/gpu_channel.h', 'chrome/gpu/gpu_channel.h',
'content/baz/froboz.h', 'content/baz/froboz.h',
'chrome/renderer/gpu/gpu_channel_host.h'], [brett]) 'chrome/renderer/gpu/gpu_channel_host.h'],
[brett])
def test_reviewers_for__two_owners(self): def test_reviewers_for__two_owners(self):
self.assert_reviewers_for([ self.assert_reviewers_for([
'chrome/gpu/gpu_channel.h', 'chrome/gpu/gpu_channel.h',
'content/content.gyp', 'content/content.gyp',
'content/baz/froboz.h', 'content/baz/froboz.h',
'content/views/pie.h' 'content/views/pie.h'],
], [john, brett]) [ken, john])
def test_reviewers_for__all_files(self): def test_reviewers_for__all_files(self):
self.assert_reviewers_for([ self.assert_reviewers_for([
...@@ -254,32 +282,96 @@ class OwnersDatabaseTest(unittest.TestCase): ...@@ -254,32 +282,96 @@ class OwnersDatabaseTest(unittest.TestCase):
'content/content.gyp', 'content/content.gyp',
'content/bar/foo.cc', 'content/bar/foo.cc',
'content/baz/froboz.h', 'content/baz/froboz.h',
'content/views/pie.h'], [john, brett]) 'content/views/pie.h'],
[peter, ken, john])
def test_reviewers_for__per_file_owners_file(self): def test_reviewers_for__per_file_owners_file(self):
self.files['/content/baz/OWNERS'] = owners_file(lines=[ self.files['/content/baz/OWNERS'] = owners_file(lines=[
'per-file ugly.*=tom@example.com']) 'per-file ugly.*=tom@example.com'])
self.assert_reviewers_for(['content/baz/OWNERS'], [darin]) self.assert_reviewers_for(['content/baz/OWNERS'],
[john],
def assert_syntax_error(self, owners_file_contents): [darin])
db = self.db()
self.files['/foo/OWNERS'] = owners_file_contents
self.files['/foo/DEPS'] = ''
try:
db.reviewers_for(['foo/DEPS'])
self.fail() # pragma: no cover
except owners.SyntaxErrorInOwnersFile, e:
self.assertTrue(str(e).startswith('/foo/OWNERS:1'))
def test_syntax_error__unknown_token(self):
self.assert_syntax_error('{}\n')
def test_syntax_error__unknown_set(self):
self.assert_syntax_error('set myfatherisbillgates\n')
def test_syntax_error__bad_email(self):
self.assert_syntax_error('ben\n')
def test_reviewers_for__per_file(self):
self.files['/content/baz/OWNERS'] = owners_file(lines=[
'per-file ugly.*=tom@example.com'])
self.assert_reviewers_for(['content/baz/ugly.cc'],
[tom])
class LowestCostOwnersTest(_BaseTestCase):
# Keep the data in the test_lowest_cost_owner* methods as consistent with
# test_repo() where possible to minimize confusion.
def check(self, possible_owners, dirs, *possible_lowest_cost_owners):
suggested_owner = owners.Database.lowest_cost_owner(possible_owners, dirs)
self.assertTrue(suggested_owner in possible_lowest_cost_owners)
def test_one_dir_with_owner(self):
# brett is the only immediate owner for stuff in baz; john is also
# an owner, but further removed. We should always get brett.
self.check({brett: [('content/baz', 1)],
john: [('content/baz', 2)]},
['content/baz'],
brett)
# john and darin are owners for content; the suggestion could be either.
def test_one_dir_with_two_owners(self):
self.check({john: [('content', 1)],
darin: [('content', 1)]},
['content'],
john, darin)
def test_one_dir_with_two_owners_in_parent(self):
# As long as the distance is the same, it shouldn't matter (brett isn't
# listed in this case).
self.check({john: [('content/baz', 2)],
darin: [('content/baz', 2)]},
['content/baz'],
john, darin)
def test_two_dirs_two_owners(self):
# If they both match both dirs, they should be treated equally.
self.check({john: [('content/baz', 2), ('content/bar', 2)],
darin: [('content/baz', 2), ('content/bar', 2)]},
['content/baz', 'content/bar'],
john, darin)
# Here brett is better since he's closer for one of the two dirs.
self.check({brett: [('content/baz', 1), ('content/views', 1)],
darin: [('content/baz', 2), ('content/views', 1)]},
['content/baz', 'content/views'],
brett)
def test_hierarchy(self):
# the choices in these tests are more arbitrary value judgements;
# also, here we drift away from test_repo() to cover more cases.
# Here ben isn't picked, even though he can review both; we prefer
# closer reviewers.
self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)]},
['chrome/gpu', 'chrome/renderer'],
ken, peter)
# Here we always pick ben since he can review either dir as well as
# the others but can review both (giving us fewer total reviewers).
self.check({ben: [('chrome/gpu', 1), ('chrome/renderer', 1)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)]},
['chrome/gpu', 'chrome/renderer'],
ben)
# However, three reviewers is too many, so ben gets this one.
self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2),
('chrome/browser', 2)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)],
brett: [('chrome/browser', 1)]},
['chrome/gpu', 'chrome/renderer',
'chrome/browser'],
ben)
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment