Commit 1a54c22b authored by dpranke@chromium.org's avatar dpranke@chromium.org

Rework the owner-suggesting algorithm.

It turns out that we were weighting all possible owners equally,
and picking the last one out of the list. Given the way we traversed
owners files, and given that we got rid of the "set noparent"s, this
meant that we were always suggesting Ben for just about everything.

This change implements a much smarter algorithm that attempts to balance
number of reviewers and closeness to the files under review. The unit
tests added show specific examples and explanations for why things are
chosen the way they are.

R=maruel@chromium.org
BUG=76727


Review URL: https://chromiumcodereview.appspot.com/11567052

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@173784 0039d316-1c4b-4281-b951-d872f2087c98
parent e6ce537f
......@@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py.
"""
import collections
import random
import re
......@@ -252,60 +253,59 @@ class Database(object):
('%s is not a "set" directive, "*", '
'or an email address: "%s"' % (line_type, directive)))
def _covering_set_of_owners_for(self, files):
# Get the set of directories from the files.
dirs = set()
for f in files:
dirs.add(self._enclosing_dir_with_owners(f))
owned_dirs = {}
dir_owners = {}
dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
all_possible_owners = self._all_possible_owners(dirs_remaining)
suggested_owners = set()
while dirs_remaining:
owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
suggested_owners.add(owner)
for dirname, _ in all_possible_owners[owner]:
dirs_remaining.remove(dirname)
return suggested_owners
def _all_possible_owners(self, dirs):
"""Returns a list of (potential owner, distance-from-dir) tuples; a
distance of 1 is the lowest/closest possible distance (which makes the
subsequent math easier)."""
all_possible_owners = {}
for current_dir in dirs:
# Get the list of owners for each directory.
current_owners = set()
dirname = current_dir
while dirname in self.owners_for:
current_owners |= self.owners_for[dirname]
distance = 1
while True:
for owner in self.owners_for.get(dirname, []):
all_possible_owners.setdefault(owner, [])
all_possible_owners[owner].append((current_dir, distance))
if self._stop_looking(dirname):
break
prev_parent = dirname
dirname = self.os_path.dirname(dirname)
if prev_parent == dirname:
break
# Map each directory to a list of its owners.
dir_owners[current_dir] = current_owners
# Add the directory to the list of each owner.
for owner in current_owners:
owned_dirs.setdefault(owner, set()).add(current_dir)
final_owners = set()
while dirs:
# Find the owner that has the most directories.
max_count = 0
max_owner = None
owner_count = {}
for dirname in dirs:
for owner in dir_owners[dirname]:
count = owner_count.get(owner, 0) + 1
owner_count[owner] = count
if count >= max_count:
max_owner = owner
max_count = count
# If no more directories have OWNERS, we're done.
if not max_owner:
break
final_owners.add(max_owner)
# Remove all directories owned by the current owner from the remaining
# list.
for dirname in owned_dirs[max_owner]:
dirs.discard(dirname)
return final_owners
distance += 1
return all_possible_owners
@staticmethod
def lowest_cost_owner(all_possible_owners, dirs):
# We want to minimize both the number of reviewers and the distance
# from the files/dirs needing reviews. The "pow(X, 1.75)" below is
# an arbitrarily-selected scaling factor that seems to work well - it
# will select one reviewer in the parent directory over three reviewers
# in subdirs, but not one reviewer over just two.
total_costs_by_owner = {}
for owner in all_possible_owners:
total_distance = 0
num_directories_owned = 0
for dirname, distance in all_possible_owners[owner]:
if dirname in dirs:
total_distance += distance
num_directories_owned += 1
if num_directories_owned:
total_costs_by_owner[owner] = (total_distance /
pow(num_directories_owned, 1.75))
# Return the lowest cost owner. In the case of a tie, pick one randomly.
lowest_cost = min(total_costs_by_owner.itervalues())
lowest_cost_owners = filter(
lambda owner: total_costs_by_owner[owner] == lowest_cost,
total_costs_by_owner)
return random.Random().choice(lowest_cost_owners)
......@@ -58,7 +58,7 @@ def test_repo():
})
class OwnersDatabaseTest(unittest.TestCase):
class _BaseTestCase(unittest.TestCase):
def setUp(self):
self.repo = test_repo()
self.files = self.repo.files
......@@ -73,6 +73,8 @@ class OwnersDatabaseTest(unittest.TestCase):
glob = glob or self.glob
return owners.Database(root, fopen, os_path, glob)
class OwnersDatabaseTest(_BaseTestCase):
def test_constructor(self):
self.assertNotEquals(self.db(), None)
......@@ -205,16 +207,41 @@ class OwnersDatabaseTest(unittest.TestCase):
self.assertRaises(owners.SyntaxErrorInOwnersFile,
self.db().directories_not_covered_by, ['DEPS'], [brett])
def assert_reviewers_for(self, files, expected_reviewers):
def assert_syntax_error(self, owners_file_contents):
db = self.db()
self.files['/foo/OWNERS'] = owners_file_contents
self.files['/foo/DEPS'] = ''
try:
db.reviewers_for(['foo/DEPS'])
self.fail() # pragma: no cover
except owners.SyntaxErrorInOwnersFile, e:
self.assertTrue(str(e).startswith('/foo/OWNERS:1'))
def test_syntax_error__unknown_token(self):
self.assert_syntax_error('{}\n')
def test_syntax_error__unknown_set(self):
self.assert_syntax_error('set myfatherisbillgates\n')
def test_syntax_error__bad_email(self):
self.assert_syntax_error('ben\n')
class ReviewersForTest(_BaseTestCase):
def assert_reviewers_for(self, files, *potential_suggested_reviewers):
db = self.db()
self.assertEquals(db.reviewers_for(set(files)), set(expected_reviewers))
suggested_reviewers = db.reviewers_for(set(files))
self.assertTrue(suggested_reviewers in
[set(suggestion) for suggestion in potential_suggested_reviewers])
def test_reviewers_for__basic_functionality(self):
self.assert_reviewers_for(['chrome/gpu/gpu_channel.h'],
[brett])
[ken])
def test_reviewers_for__set_noparent_works(self):
self.assert_reviewers_for(['content/content.gyp'], [darin])
self.assert_reviewers_for(['content/content.gyp'],
[john],
[darin])
def test_reviewers_for__valid_inputs(self):
db = self.db()
......@@ -236,15 +263,16 @@ class OwnersDatabaseTest(unittest.TestCase):
self.assert_reviewers_for([
'chrome/gpu/gpu_channel.h',
'content/baz/froboz.h',
'chrome/renderer/gpu/gpu_channel_host.h'], [brett])
'chrome/renderer/gpu/gpu_channel_host.h'],
[brett])
def test_reviewers_for__two_owners(self):
self.assert_reviewers_for([
'chrome/gpu/gpu_channel.h',
'content/content.gyp',
'content/baz/froboz.h',
'content/views/pie.h'
], [john, brett])
'content/views/pie.h'],
[ken, john])
def test_reviewers_for__all_files(self):
self.assert_reviewers_for([
......@@ -254,32 +282,96 @@ class OwnersDatabaseTest(unittest.TestCase):
'content/content.gyp',
'content/bar/foo.cc',
'content/baz/froboz.h',
'content/views/pie.h'], [john, brett])
'content/views/pie.h'],
[peter, ken, john])
def test_reviewers_for__per_file_owners_file(self):
self.files['/content/baz/OWNERS'] = owners_file(lines=[
'per-file ugly.*=tom@example.com'])
self.assert_reviewers_for(['content/baz/OWNERS'], [darin])
def assert_syntax_error(self, owners_file_contents):
db = self.db()
self.files['/foo/OWNERS'] = owners_file_contents
self.files['/foo/DEPS'] = ''
try:
db.reviewers_for(['foo/DEPS'])
self.fail() # pragma: no cover
except owners.SyntaxErrorInOwnersFile, e:
self.assertTrue(str(e).startswith('/foo/OWNERS:1'))
def test_syntax_error__unknown_token(self):
self.assert_syntax_error('{}\n')
def test_syntax_error__unknown_set(self):
self.assert_syntax_error('set myfatherisbillgates\n')
def test_syntax_error__bad_email(self):
self.assert_syntax_error('ben\n')
self.assert_reviewers_for(['content/baz/OWNERS'],
[john],
[darin])
def test_reviewers_for__per_file(self):
self.files['/content/baz/OWNERS'] = owners_file(lines=[
'per-file ugly.*=tom@example.com'])
self.assert_reviewers_for(['content/baz/ugly.cc'],
[tom])
class LowestCostOwnersTest(_BaseTestCase):
# Keep the data in the test_lowest_cost_owner* methods as consistent with
# test_repo() where possible to minimize confusion.
def check(self, possible_owners, dirs, *possible_lowest_cost_owners):
suggested_owner = owners.Database.lowest_cost_owner(possible_owners, dirs)
self.assertTrue(suggested_owner in possible_lowest_cost_owners)
def test_one_dir_with_owner(self):
# brett is the only immediate owner for stuff in baz; john is also
# an owner, but further removed. We should always get brett.
self.check({brett: [('content/baz', 1)],
john: [('content/baz', 2)]},
['content/baz'],
brett)
# john and darin are owners for content; the suggestion could be either.
def test_one_dir_with_two_owners(self):
self.check({john: [('content', 1)],
darin: [('content', 1)]},
['content'],
john, darin)
def test_one_dir_with_two_owners_in_parent(self):
# As long as the distance is the same, it shouldn't matter (brett isn't
# listed in this case).
self.check({john: [('content/baz', 2)],
darin: [('content/baz', 2)]},
['content/baz'],
john, darin)
def test_two_dirs_two_owners(self):
# If they both match both dirs, they should be treated equally.
self.check({john: [('content/baz', 2), ('content/bar', 2)],
darin: [('content/baz', 2), ('content/bar', 2)]},
['content/baz', 'content/bar'],
john, darin)
# Here brett is better since he's closer for one of the two dirs.
self.check({brett: [('content/baz', 1), ('content/views', 1)],
darin: [('content/baz', 2), ('content/views', 1)]},
['content/baz', 'content/views'],
brett)
def test_hierarchy(self):
# the choices in these tests are more arbitrary value judgements;
# also, here we drift away from test_repo() to cover more cases.
# Here ben isn't picked, even though he can review both; we prefer
# closer reviewers.
self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)]},
['chrome/gpu', 'chrome/renderer'],
ken, peter)
# Here we always pick ben since he can review either dir as well as
# the others but can review both (giving us fewer total reviewers).
self.check({ben: [('chrome/gpu', 1), ('chrome/renderer', 1)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)]},
['chrome/gpu', 'chrome/renderer'],
ben)
# However, three reviewers is too many, so ben gets this one.
self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2),
('chrome/browser', 2)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)],
brett: [('chrome/browser', 1)]},
['chrome/gpu', 'chrome/renderer',
'chrome/browser'],
ben)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment