Commit 53456aa2 authored by iannucci@chromium.org's avatar iannucci@chromium.org

Add a git cache for gclient sync operations.

Instead of cloning straight into place, clones are made to a global cache dir,
and then local (using --shared) clones are made from the cache to the final
resting place. This means the 'final' clones are full repos with no shenanigans,
meaning that branches, commits, etc. all work, which should allow the rest of
the gclient ecosystem to work without change as well.

The primary benefit is, of course, reduced network IO, and a much lower cost for
'clobber' operations (assuming we don't clobber the cache). It also means that
a given bot can have a greater number of checkouts, since the entire git history
will only be stored once per machine, instead of once per checkout.

R=dpranke@chromium.org, szager@chromium.org
BUG=

Review URL: https://chromiumcodereview.appspot.com/18328003

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@210024 0039d316-1c4b-4281-b951-d872f2087c98
parent a41249c2
......@@ -898,6 +898,7 @@ solutions = [
"safesync_url": "%(safesync_url)s",
},
]
cache_dir = %(cache_dir)r
""")
DEFAULT_SNAPSHOT_SOLUTION_TEXT = ("""\
......@@ -950,6 +951,8 @@ solutions = [
else:
self._enforced_os = tuple(set(self._enforced_os).union(target_os))
gclient_scm.GitWrapper.cache_dir = config_dict.get('cache_dir')
if not target_os and config_dict.get('target_os_only', False):
raise gclient_utils.Error('Can\'t use target_os_only if target_os is '
'not specified')
......@@ -1004,13 +1007,14 @@ solutions = [
return client
def SetDefaultConfig(self, solution_name, deps_file, solution_url,
safesync_url, managed=True):
safesync_url, managed=True, cache_dir=None):
self.SetConfig(self.DEFAULT_CLIENT_FILE_TEXT % {
'solution_name': solution_name,
'solution_url': solution_url,
'deps_file': deps_file,
'safesync_url' : safesync_url,
'managed': managed,
'cache_dir': cache_dir,
})
def _SaveEntries(self):
......@@ -1405,6 +1409,10 @@ URL.
'will never sync them)')
parser.add_option('--git-deps', action='store_true',
help='sets the deps file to ".DEPS.git" instead of "DEPS"')
parser.add_option('--cache-dir',
help='(git only) Cache all git repos into this dir and do '
'shared clones from the cache, instead of cloning '
'directly from the remote. (experimental)')
parser.set_defaults(config_filename=None)
(options, args) = parser.parse_args(args)
if options.output_config_file:
......@@ -1432,7 +1440,8 @@ URL.
if len(args) > 1:
safesync_url = args[1]
client.SetDefaultConfig(name, deps_file, base_url, safesync_url,
managed=not options.unmanaged)
managed=not options.unmanaged,
cache_dir=options.cache_dir)
client.SaveConfig()
return 0
......@@ -1720,8 +1729,8 @@ def Parser():
'probably can\'t contain any newlines.')
parser.add_option('--no-nag-max', default=False, action='store_true',
help='If a subprocess runs for too long without generating'
' terminal output, generate warnings, but do not kill'
' the process.')
' terminal output, generate warnings, but do not kill'
' the process.')
# Integrate standard options processing.
old_parser = parser.parse_args
def Parse(args):
......
......@@ -4,11 +4,13 @@
"""Gclient-specific SCM-specific operations."""
import collections
import logging
import os
import posixpath
import re
import sys
import threading
import time
import gclient_utils
......@@ -152,9 +154,51 @@ class SCMWrapper(object):
return getattr(self, command)(options, args, file_list)
class GitFilter(object):
"""A filter_fn implementation for quieting down git output messages.
Allows a custom function to skip certain lines (predicate), and will throttle
the output of percentage completed lines to only output every X seconds.
"""
PERCENT_RE = re.compile('.* ([0-9]{1,2})% .*')
def __init__(self, time_throttle=0, predicate=None):
"""
Args:
time_throttle (int): GitFilter will throttle 'noisy' output (such as the
XX% complete messages) to only be printed at least |time_throttle|
seconds apart.
predicate (f(line)): An optional function which is invoked for every line.
The line will be skipped if predicate(line) returns False.
"""
self.last_time = 0
self.time_throttle = time_throttle
self.predicate = predicate
def __call__(self, line):
# git uses an escape sequence to clear the line; elide it.
esc = line.find(unichr(033))
if esc > -1:
line = line[:esc]
if self.predicate and not self.predicate(line):
return
now = time.time()
match = self.PERCENT_RE.match(line)
if not match:
self.last_time = 0
if (now - self.last_time) >= self.time_throttle:
self.last_time = now
print line
class GitWrapper(SCMWrapper):
"""Wrapper for Git"""
cache_dir = None
# If a given cache is used in a solution more than once, prevent multiple
# threads from updating it simultaneously.
cache_locks = collections.defaultdict(threading.Lock)
def __init__(self, url=None, root_dir=None, relpath=None):
"""Removes 'git+' fake prefix from git URL."""
if url.startswith('git+http://') or url.startswith('git+https://'):
......@@ -297,6 +341,8 @@ class GitWrapper(SCMWrapper):
verbose = ['--verbose']
printed_path = True
url = self._CreateOrUpdateCache(url, options)
if revision.startswith('refs/'):
rev_type = "branch"
elif revision.startswith('origin/'):
......@@ -674,6 +720,55 @@ class GitWrapper(SCMWrapper):
base_url = self.url
return base_url[:base_url.rfind('/')] + url
@staticmethod
def _NormalizeGitURL(url):
'''Takes a git url, strips the scheme, and ensures it ends with '.git'.'''
idx = url.find('://')
if idx != -1:
url = url[idx+3:]
if not url.endswith('.git'):
url += '.git'
return url
def _CreateOrUpdateCache(self, url, options):
"""Make a new git mirror or update existing mirror for |url|, and return the
mirror URI to clone from.
If no cache-dir is specified, just return |url| unchanged.
"""
if not self.cache_dir:
return url
# Replace - with -- to avoid ambiguity. / with - to flatten folder structure
folder = os.path.join(
self.cache_dir,
self._NormalizeGitURL(url).replace('-', '--').replace('/', '-'))
v = ['-v'] if options.verbose else []
filter_fn = lambda l: '[up to date]' not in l
with self.cache_locks[folder]:
gclient_utils.safe_makedirs(self.cache_dir)
if not os.path.exists(os.path.join(folder, 'config')):
gclient_utils.rmtree(folder)
self._Run(['clone'] + v + ['-c', 'core.deltaBaseCacheLimit=2g',
'--progress', '--mirror', url, folder],
options, git_filter=True, filter_fn=filter_fn,
cwd=self.cache_dir)
else:
# For now, assert that host/path/to/repo.git is identical. We may want
# to relax this restriction in the future to allow for smarter cache
# repo update schemes (such as pulling the same repo, but from a
# different host).
existing_url = self._Capture(['config', 'remote.origin.url'],
cwd=folder)
assert self._NormalizeGitURL(existing_url) == self._NormalizeGitURL(url)
# Would normally use `git remote update`, but it doesn't support
# --progress, so use fetch instead.
self._Run(['fetch'] + v + ['--multiple', '--progress', '--all'],
options, git_filter=True, filter_fn=filter_fn, cwd=folder)
return folder
def _Clone(self, revision, url, options):
"""Clone a git repository from the given URL.
......@@ -687,6 +782,8 @@ class GitWrapper(SCMWrapper):
# to stdout
print('')
clone_cmd = ['-c', 'core.deltaBaseCacheLimit=2g', 'clone', '--progress']
if self.cache_dir:
clone_cmd.append('--shared')
if revision.startswith('refs/heads/'):
clone_cmd.extend(['-b', revision.replace('refs/heads/', '')])
detach_head = False
......@@ -702,20 +799,9 @@ class GitWrapper(SCMWrapper):
if not os.path.exists(parent_dir):
gclient_utils.safe_makedirs(parent_dir)
percent_re = re.compile('.* ([0-9]{1,2})% .*')
def _GitFilter(line):
# git uses an escape sequence to clear the line; elide it.
esc = line.find(unichr(033))
if esc > -1:
line = line[:esc]
match = percent_re.match(line)
if not match or not int(match.group(1)) % 10:
print '%s' % line
for _ in range(3):
try:
self._Run(clone_cmd, options, cwd=self._root_dir, filter_fn=_GitFilter,
print_stdout=False)
self._Run(clone_cmd, options, cwd=self._root_dir, git_filter=True)
break
except subprocess2.CalledProcessError, e:
# Too bad we don't have access to the actual output yet.
......@@ -900,13 +986,13 @@ class GitWrapper(SCMWrapper):
return None
return branch
def _Capture(self, args):
def _Capture(self, args, cwd=None):
return subprocess2.check_output(
['git'] + args,
stderr=subprocess2.VOID,
nag_timer=self.nag_timer,
nag_max=self.nag_max,
cwd=self.checkout_path).strip()
cwd=cwd or self.checkout_path).strip()
def _UpdateBranchHeads(self, options, fetch=False):
"""Adds, and optionally fetches, "branch-heads" refspecs if requested."""
......@@ -930,11 +1016,16 @@ class GitWrapper(SCMWrapper):
time.sleep(backoff_time)
backoff_time *= 1.3
def _Run(self, args, options, **kwargs):
def _Run(self, args, _options, git_filter=False, **kwargs):
kwargs.setdefault('cwd', self.checkout_path)
kwargs.setdefault('print_stdout', True)
kwargs.setdefault('nag_timer', self.nag_timer)
kwargs.setdefault('nag_max', self.nag_max)
if git_filter:
kwargs['filter_fn'] = GitFilter(kwargs['nag_timer'] / 2,
kwargs.get('filter_fn'))
kwargs.setdefault('print_stdout', False)
else:
kwargs.setdefault('print_stdout', True)
stdout = kwargs.get('stdout', sys.stdout)
stdout.write('\n________ running \'git %s\' in \'%s\'\n' % (
' '.join(args), kwargs['cwd']))
......
......@@ -685,6 +685,7 @@ class BaseGitWrapperTestCase(GCBaseTestCase, StdoutCheck, TestCaseUtils,
self.reset = False
self.nohooks = False
self.upstream = False
self.cache_dir = None
self.merge = False
self.jobs = 1
self.delete_unversioned_trees = False
......@@ -795,6 +796,8 @@ class ManagedGitWrapperTestCase(BaseGitWrapperTestCase):
'GetRevisionDate',
'GetUsableRev',
'RunCommand',
'cache_dir',
'cache_locks',
'cleanup',
'diff',
'nag_max',
......
......@@ -170,7 +170,7 @@ class GClientSmoke(GClientSmokeBase):
"""testHelp: make sure no new command was added."""
result = self.gclient(['help'])
# Roughly, not too short, not too long.
self.assertTrue(1000 < len(result[0]) and len(result[0]) < 2100,
self.assertTrue(1000 < len(result[0]) and len(result[0]) < 2300,
'Too much written to stdout: %d bytes' % len(result[0]))
self.assertEquals(0, len(result[1]))
self.assertEquals(0, result[2])
......@@ -178,7 +178,7 @@ class GClientSmoke(GClientSmokeBase):
def testUnknown(self):
result = self.gclient(['foo'])
# Roughly, not too short, not too long.
self.assertTrue(1000 < len(result[0]) and len(result[0]) < 2100,
self.assertTrue(1000 < len(result[0]) and len(result[0]) < 2300,
'Too much written to stdout: %d bytes' % len(result[0]))
self.assertEquals(0, len(result[1]))
self.assertEquals(0, result[2])
......@@ -214,7 +214,8 @@ class GClientSmoke(GClientSmokeBase):
' },\n'
' "safesync_url": "",\n'
' },\n'
']\n') % self.svn_base)
']\n'
'cache_dir = None\n') % self.svn_base)
test(['config', self.git_base + 'repo_1', '--name', 'src'],
('solutions = [\n'
......@@ -226,7 +227,8 @@ class GClientSmoke(GClientSmokeBase):
' },\n'
' "safesync_url": "",\n'
' },\n'
']\n') % self.git_base)
']\n'
'cache_dir = None\n') % self.git_base)
test(['config', 'foo', 'faa'],
'solutions = [\n'
......@@ -238,7 +240,8 @@ class GClientSmoke(GClientSmokeBase):
' },\n'
' "safesync_url": "faa",\n'
' },\n'
']\n')
']\n'
'cache_dir = None\n')
test(['config', 'foo', '--deps', 'blah'],
'solutions = [\n'
......@@ -250,7 +253,8 @@ class GClientSmoke(GClientSmokeBase):
' },\n'
' "safesync_url": "",\n'
' },\n'
']\n')
']\n'
'cache_dir = None\n')
test(['config', '--spec', '["blah blah"]'], '["blah blah"]')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment