Commit c591a700 authored by dpranke@chromium.org's avatar dpranke@chromium.org

Try #3 to improve the owners suggesting algorithm.

This version handles the case where we need to suggest two reviewers
who have overlapping sets of directories they can review.

R=maruel@chromium.org
BUG=76727


Review URL: https://chromiumcodereview.appspot.com/11639028

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@174216 0039d316-1c4b-4281-b951-d872f2087c98
parent 18bc90d2
......@@ -49,6 +49,7 @@ Examples for all of these combinations can be found in tests/owners_unittest.py.
"""
import collections
import random
import re
......@@ -252,60 +253,60 @@ class Database(object):
('%s is not a "set" directive, "*", '
'or an email address: "%s"' % (line_type, directive)))
def _covering_set_of_owners_for(self, files):
# Get the set of directories from the files.
dirs = set()
for f in files:
dirs.add(self._enclosing_dir_with_owners(f))
owned_dirs = {}
dir_owners = {}
dirs_remaining = set(self._enclosing_dir_with_owners(f) for f in files)
all_possible_owners = self._all_possible_owners(dirs_remaining)
suggested_owners = set()
while dirs_remaining:
owner = self.lowest_cost_owner(all_possible_owners, dirs_remaining)
suggested_owners.add(owner)
dirs_to_remove = set(el[0] for el in all_possible_owners[owner])
dirs_remaining -= dirs_to_remove
return suggested_owners
def _all_possible_owners(self, dirs):
"""Returns a list of (potential owner, distance-from-dir) tuples; a
distance of 1 is the lowest/closest possible distance (which makes the
subsequent math easier)."""
all_possible_owners = {}
for current_dir in dirs:
# Get the list of owners for each directory.
current_owners = set()
dirname = current_dir
while dirname in self.owners_for:
current_owners |= self.owners_for[dirname]
distance = 1
while True:
for owner in self.owners_for.get(dirname, []):
all_possible_owners.setdefault(owner, [])
# If the same person is in multiple OWNERS files above a given
# directory, only count the closest one.
if not any(current_dir == el[0] for el in all_possible_owners[owner]):
all_possible_owners[owner].append((current_dir, distance))
if self._stop_looking(dirname):
break
prev_parent = dirname
dirname = self.os_path.dirname(dirname)
if prev_parent == dirname:
break
# Map each directory to a list of its owners.
dir_owners[current_dir] = current_owners
# Add the directory to the list of each owner.
for owner in current_owners:
owned_dirs.setdefault(owner, set()).add(current_dir)
final_owners = set()
while dirs:
# Find the owner that has the most directories.
max_count = 0
max_owner = None
owner_count = {}
for dirname in dirs:
for owner in dir_owners[dirname]:
count = owner_count.get(owner, 0) + 1
owner_count[owner] = count
if count >= max_count:
max_owner = owner
max_count = count
# If no more directories have OWNERS, we're done.
if not max_owner:
break
final_owners.add(max_owner)
# Remove all directories owned by the current owner from the remaining
# list.
for dirname in owned_dirs[max_owner]:
dirs.discard(dirname)
return final_owners
distance += 1
return all_possible_owners
@staticmethod
def lowest_cost_owner(all_possible_owners, dirs):
# We want to minimize both the number of reviewers and the distance
# from the files/dirs needing reviews. The "pow(X, 1.75)" below is
# an arbitrarily-selected scaling factor that seems to work well - it
# will select one reviewer in the parent directory over three reviewers
# in subdirs, but not one reviewer over just two.
total_costs_by_owner = {}
for owner in all_possible_owners:
total_distance = 0
num_directories_owned = 0
for dirname, distance in all_possible_owners[owner]:
if dirname in dirs:
total_distance += distance
num_directories_owned += 1
if num_directories_owned:
total_costs_by_owner[owner] = (total_distance /
pow(num_directories_owned, 1.75))
# Return the lowest cost owner. In the case of a tie, pick one randomly.
lowest_cost = min(total_costs_by_owner.itervalues())
lowest_cost_owners = filter(
lambda owner: total_costs_by_owner[owner] == lowest_cost,
total_costs_by_owner)
return random.Random().choice(lowest_cost_owners)
......@@ -40,6 +40,8 @@ def test_repo():
'/OWNERS': owners_file(owners.EVERYONE),
'/base/vlog.h': '',
'/chrome/OWNERS': owners_file(ben, brett),
'/chrome/browser/OWNERS': owners_file(brett),
'/chrome/browser/defaults.h': '',
'/chrome/gpu/OWNERS': owners_file(ken),
'/chrome/gpu/gpu_channel.h': '',
'/chrome/renderer/OWNERS': owners_file(peter),
......@@ -58,7 +60,7 @@ def test_repo():
})
class OwnersDatabaseTest(unittest.TestCase):
class _BaseTestCase(unittest.TestCase):
def setUp(self):
self.repo = test_repo()
self.files = self.repo.files
......@@ -73,6 +75,8 @@ class OwnersDatabaseTest(unittest.TestCase):
glob = glob or self.glob
return owners.Database(root, fopen, os_path, glob)
class OwnersDatabaseTest(_BaseTestCase):
def test_constructor(self):
self.assertNotEquals(self.db(), None)
......@@ -205,16 +209,41 @@ class OwnersDatabaseTest(unittest.TestCase):
self.assertRaises(owners.SyntaxErrorInOwnersFile,
self.db().directories_not_covered_by, ['DEPS'], [brett])
def assert_reviewers_for(self, files, expected_reviewers):
def assert_syntax_error(self, owners_file_contents):
db = self.db()
self.files['/foo/OWNERS'] = owners_file_contents
self.files['/foo/DEPS'] = ''
try:
db.reviewers_for(['foo/DEPS'])
self.fail() # pragma: no cover
except owners.SyntaxErrorInOwnersFile, e:
self.assertTrue(str(e).startswith('/foo/OWNERS:1'))
def test_syntax_error__unknown_token(self):
self.assert_syntax_error('{}\n')
def test_syntax_error__unknown_set(self):
self.assert_syntax_error('set myfatherisbillgates\n')
def test_syntax_error__bad_email(self):
self.assert_syntax_error('ben\n')
class ReviewersForTest(_BaseTestCase):
def assert_reviewers_for(self, files, *potential_suggested_reviewers):
db = self.db()
self.assertEquals(db.reviewers_for(set(files)), set(expected_reviewers))
suggested_reviewers = db.reviewers_for(set(files))
self.assertTrue(suggested_reviewers in
[set(suggestion) for suggestion in potential_suggested_reviewers])
def test_reviewers_for__basic_functionality(self):
self.assert_reviewers_for(['chrome/gpu/gpu_channel.h'],
[brett])
[ken])
def test_reviewers_for__set_noparent_works(self):
self.assert_reviewers_for(['content/content.gyp'], [darin])
self.assert_reviewers_for(['content/content.gyp'],
[john],
[darin])
def test_reviewers_for__valid_inputs(self):
db = self.db()
......@@ -236,15 +265,16 @@ class OwnersDatabaseTest(unittest.TestCase):
self.assert_reviewers_for([
'chrome/gpu/gpu_channel.h',
'content/baz/froboz.h',
'chrome/renderer/gpu/gpu_channel_host.h'], [brett])
'chrome/renderer/gpu/gpu_channel_host.h'],
[brett])
def test_reviewers_for__two_owners(self):
self.assert_reviewers_for([
'chrome/gpu/gpu_channel.h',
'content/content.gyp',
'content/baz/froboz.h',
'content/views/pie.h'
], [john, brett])
'content/views/pie.h'],
[ken, john])
def test_reviewers_for__all_files(self):
self.assert_reviewers_for([
......@@ -254,32 +284,111 @@ class OwnersDatabaseTest(unittest.TestCase):
'content/content.gyp',
'content/bar/foo.cc',
'content/baz/froboz.h',
'content/views/pie.h'], [john, brett])
'content/views/pie.h'],
[peter, ken, john])
def test_reviewers_for__per_file_owners_file(self):
self.files['/content/baz/OWNERS'] = owners_file(lines=[
'per-file ugly.*=tom@example.com'])
self.assert_reviewers_for(['content/baz/OWNERS'], [darin])
def assert_syntax_error(self, owners_file_contents):
db = self.db()
self.files['/foo/OWNERS'] = owners_file_contents
self.files['/foo/DEPS'] = ''
try:
db.reviewers_for(['foo/DEPS'])
self.fail() # pragma: no cover
except owners.SyntaxErrorInOwnersFile, e:
self.assertTrue(str(e).startswith('/foo/OWNERS:1'))
def test_syntax_error__unknown_token(self):
self.assert_syntax_error('{}\n')
def test_syntax_error__unknown_set(self):
self.assert_syntax_error('set myfatherisbillgates\n')
def test_syntax_error__bad_email(self):
self.assert_syntax_error('ben\n')
self.assert_reviewers_for(['content/baz/OWNERS'],
[john],
[darin])
def test_reviewers_for__per_file(self):
self.files['/content/baz/OWNERS'] = owners_file(lines=[
'per-file ugly.*=tom@example.com'])
self.assert_reviewers_for(['content/baz/ugly.cc'],
[tom])
def test_reviewers_for__two_nested_dirs(self):
# The same owner is listed in two directories (one above the other)
self.assert_reviewers_for(['chrome/browser/defaults.h'],
[brett])
# Here, although either ben or brett could review both files,
# someone closer to the gpu_channel_host.h should also be suggested.
# This also tests that we can handle two suggested reviewers
# with overlapping sets of directories properly.
self.files['/chrome/renderer/gpu/OWNERS'] = owners_file(ken)
self.assert_reviewers_for(['chrome/OWNERS',
'chrome/renderer/gpu/gpu_channel_host.h'],
[ben, ken],
[brett, ken])
class LowestCostOwnersTest(_BaseTestCase):
# Keep the data in the test_lowest_cost_owner* methods as consistent with
# test_repo() where possible to minimize confusion.
def check(self, possible_owners, dirs, *possible_lowest_cost_owners):
suggested_owner = owners.Database.lowest_cost_owner(possible_owners, dirs)
self.assertTrue(suggested_owner in possible_lowest_cost_owners)
def test_one_dir_with_owner(self):
# brett is the only immediate owner for stuff in baz; john is also
# an owner, but further removed. We should always get brett.
self.check({brett: [('content/baz', 1)],
john: [('content/baz', 2)]},
['content/baz'],
brett)
# john and darin are owners for content; the suggestion could be either.
def test_one_dir_with_two_owners(self):
self.check({john: [('content', 1)],
darin: [('content', 1)]},
['content'],
john, darin)
def test_one_dir_with_two_owners_in_parent(self):
# As long as the distance is the same, it shouldn't matter (brett isn't
# listed in this case).
self.check({john: [('content/baz', 2)],
darin: [('content/baz', 2)]},
['content/baz'],
john, darin)
def test_two_dirs_two_owners(self):
# If they both match both dirs, they should be treated equally.
self.check({john: [('content/baz', 2), ('content/bar', 2)],
darin: [('content/baz', 2), ('content/bar', 2)]},
['content/baz', 'content/bar'],
john, darin)
# Here brett is better since he's closer for one of the two dirs.
self.check({brett: [('content/baz', 1), ('content/views', 1)],
darin: [('content/baz', 2), ('content/views', 1)]},
['content/baz', 'content/views'],
brett)
def test_hierarchy(self):
# the choices in these tests are more arbitrary value judgements;
# also, here we drift away from test_repo() to cover more cases.
# Here ben isn't picked, even though he can review both; we prefer
# closer reviewers.
self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)]},
['chrome/gpu', 'chrome/renderer'],
ken, peter)
# Here we always pick ben since he can review either dir as well as
# the others but can review both (giving us fewer total reviewers).
self.check({ben: [('chrome/gpu', 1), ('chrome/renderer', 1)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)]},
['chrome/gpu', 'chrome/renderer'],
ben)
# However, three reviewers is too many, so ben gets this one.
self.check({ben: [('chrome/gpu', 2), ('chrome/renderer', 2),
('chrome/browser', 2)],
ken: [('chrome/gpu', 1)],
peter: [('chrome/renderer', 1)],
brett: [('chrome/browser', 1)]},
['chrome/gpu', 'chrome/renderer',
'chrome/browser'],
ben)
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment