Commit 5439ea59 authored by primiano@chromium.org's avatar primiano@chromium.org

Reland: Add --no-history option to fetch and gclient for shallow clones.

Original CL: https://codereview.chromium.org/440263002/

Many people* have complained on chromium-dev about the long times
required to perform a full fetch over a DSL. This seems to be mostly
due to the huge size of chromium's history (~9 GB). On the other side,
not everybody is interested in downloading the full git history of
the projects. The size of git packs required to fetch a working HEAD
is one order of magnitude smaller (1.5 GB).
This change makes it possible to perform a shallow fetch (in a way
which is consistent with DEPS, leveraging git templates on clone),
reducing fetch times by 80% for those not interested in the history.

* See:
[chromium-dev] "fetch chromium" keeps hanging/getting stuck on Windows 7
[chromium-dev] Initial checkout with git taking long
[chromium-dev] Trying to get latest source code fails when fetching
[chromium-dev] Gclient sync takes too long

BUG=228996
TBR=iannucci@chromium.org,szager@chromium.org,wtc@chromium.org

Review URL: https://codereview.chromium.org/440273002

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@287793 0039d316-1c4b-4281-b951-d872f2087c98
parent 69177ea3
...@@ -114,6 +114,8 @@ class GclientGitCheckout(GclientCheckout, GitCheckout): ...@@ -114,6 +114,8 @@ class GclientGitCheckout(GclientCheckout, GitCheckout):
sync_cmd = ['sync'] sync_cmd = ['sync']
if self.options.nohooks: if self.options.nohooks:
sync_cmd.append('--nohooks') sync_cmd.append('--nohooks')
if self.options.no_history:
sync_cmd.append('--no-history')
if self.spec.get('with_branch_heads', False): if self.spec.get('with_branch_heads', False):
sync_cmd.append('--with_branch_heads') sync_cmd.append('--with_branch_heads')
self.run_gclient(*sync_cmd) self.run_gclient(*sync_cmd)
...@@ -207,6 +209,7 @@ Valid options: ...@@ -207,6 +209,7 @@ Valid options:
-h, --help, help Print this message. -h, --help, help Print this message.
--nohooks Don't run hooks after checkout. --nohooks Don't run hooks after checkout.
-n, --dry-run Don't run commands, only print them. -n, --dry-run Don't run commands, only print them.
--no-history Perform shallow clones, don't fetch the full git history.
""" % os.path.basename(sys.argv[0])) """ % os.path.basename(sys.argv[0]))
sys.exit(bool(msg)) sys.exit(bool(msg))
...@@ -220,6 +223,7 @@ def handle_args(argv): ...@@ -220,6 +223,7 @@ def handle_args(argv):
dry_run = False dry_run = False
nohooks = False nohooks = False
no_history = False
while len(argv) >= 2: while len(argv) >= 2:
arg = argv[1] arg = argv[1]
if not arg.startswith('-'): if not arg.startswith('-'):
...@@ -229,6 +233,8 @@ def handle_args(argv): ...@@ -229,6 +233,8 @@ def handle_args(argv):
dry_run = True dry_run = True
elif arg == '--nohooks': elif arg == '--nohooks':
nohooks = True nohooks = True
elif arg == '--no-history':
no_history = True
else: else:
usage('Invalid option %s.' % arg) usage('Invalid option %s.' % arg)
...@@ -241,7 +247,11 @@ def handle_args(argv): ...@@ -241,7 +247,11 @@ def handle_args(argv):
recipe = argv[1] recipe = argv[1]
props = argv[2:] props = argv[2:]
return optparse.Values({'dry_run':dry_run, 'nohooks':nohooks }), recipe, props return (
optparse.Values(
{'dry_run':dry_run, 'nohooks':nohooks, 'no_history': no_history }),
recipe,
props)
def run_recipe_fetch(recipe, props, aliased=False): def run_recipe_fetch(recipe, props, aliased=False):
......
...@@ -1808,6 +1808,9 @@ def CMDsync(parser, args): ...@@ -1808,6 +1808,9 @@ def CMDsync(parser, args):
parser.add_option('--output-json', parser.add_option('--output-json',
help='Output a json document to this path containing ' help='Output a json document to this path containing '
'summary information about the sync.') 'summary information about the sync.')
parser.add_option('--no-history', action='store_true',
help='GIT ONLY - Reduces the size/time of the checkout at '
'the cost of no history. Requires Git 1.9+')
parser.add_option('--shallow', action='store_true', parser.add_option('--shallow', action='store_true',
help='GIT ONLY - Do a shallow clone into the cache dir. ' help='GIT ONLY - Do a shallow clone into the cache dir. '
'Requires Git 1.9+') 'Requires Git 1.9+')
......
...@@ -825,6 +825,25 @@ class GitWrapper(SCMWrapper): ...@@ -825,6 +825,25 @@ class GitWrapper(SCMWrapper):
# create it, so we need to do it manually. # create it, so we need to do it manually.
parent_dir = os.path.dirname(self.checkout_path) parent_dir = os.path.dirname(self.checkout_path)
gclient_utils.safe_makedirs(parent_dir) gclient_utils.safe_makedirs(parent_dir)
template_dir = None
if hasattr(options, 'no_history') and options.no_history:
if gclient_utils.IsGitSha(revision):
# In the case of a subproject, the pinned sha is not necessarily the
# head of the remote branch (so we can't just use --depth=N). Instead,
# we tell git to fetch all the remote objects from SHA..HEAD by means of
# a template git dir which has a 'shallow' file pointing to the sha.
template_dir = tempfile.mkdtemp(
prefix='_gclient_gittmp_%s' % os.path.basename(self.checkout_path),
dir=parent_dir)
self._Run(['init', '--bare', template_dir], options, cwd=self._root_dir)
with open(os.path.join(template_dir, 'shallow'), 'w') as template_file:
template_file.write(revision)
clone_cmd.append('--template=' + template_dir)
else:
# Otherwise, we're just interested in the HEAD. Just use --depth.
clone_cmd.append('--depth=1')
tmp_dir = tempfile.mkdtemp( tmp_dir = tempfile.mkdtemp(
prefix='_gclient_%s_' % os.path.basename(self.checkout_path), prefix='_gclient_%s_' % os.path.basename(self.checkout_path),
dir=parent_dir) dir=parent_dir)
...@@ -841,6 +860,8 @@ class GitWrapper(SCMWrapper): ...@@ -841,6 +860,8 @@ class GitWrapper(SCMWrapper):
if os.listdir(tmp_dir): if os.listdir(tmp_dir):
self.Print('_____ removing non-empty tmp dir %s' % tmp_dir) self.Print('_____ removing non-empty tmp dir %s' % tmp_dir)
gclient_utils.rmtree(tmp_dir) gclient_utils.rmtree(tmp_dir)
if template_dir:
gclient_utils.rmtree(template_dir)
self._UpdateBranchHeads(options, fetch=True) self._UpdateBranchHeads(options, fetch=True)
self._Checkout(options, revision.replace('refs/heads/', ''), quiet=True) self._Checkout(options, revision.replace('refs/heads/', ''), quiet=True)
if self._GetCurrentBranch() is None: if self._GetCurrentBranch() is None:
......
...@@ -84,6 +84,11 @@ def SplitUrlRevision(url): ...@@ -84,6 +84,11 @@ def SplitUrlRevision(url):
return tuple(components) return tuple(components)
def IsGitSha(revision):
"""Returns true if the given string is a valid hex-encoded sha"""
return re.match('^[a-fA-F0-9]{6,40}$', revision) is not None
def IsDateRevision(revision): def IsDateRevision(revision):
"""Returns true if the given revision is of the form "{ ... }".""" """Returns true if the given revision is of the form "{ ... }"."""
return bool(revision and re.match(r'^\{.+\}$', str(revision))) return bool(revision and re.match(r'^\{.+\}$', str(revision)))
......
...@@ -784,6 +784,7 @@ class BaseGitWrapperTestCase(GCBaseTestCase, StdoutCheck, TestCaseUtils, ...@@ -784,6 +784,7 @@ class BaseGitWrapperTestCase(GCBaseTestCase, StdoutCheck, TestCaseUtils,
self.force = False self.force = False
self.reset = False self.reset = False
self.nohooks = False self.nohooks = False
self.no_history = False
self.upstream = False self.upstream = False
self.cache_dir = None self.cache_dir = None
self.merge = False self.merge = False
......
...@@ -1179,6 +1179,47 @@ class GClientSmokeGITMutates(GClientSmokeBase): ...@@ -1179,6 +1179,47 @@ class GClientSmokeGITMutates(GClientSmokeBase):
# files. # files.
self.assertEquals(0, len(out)) self.assertEquals(0, len(out))
def testSyncNoHistory(self):
if not self.enabled:
return
# Create an extra commit in repo_2 and point DEPS to its hash.
cur_deps = self.FAKE_REPOS.git_hashes['repo_1'][-1][1]['DEPS']
repo_2_hash_old = self.FAKE_REPOS.git_hashes['repo_2'][1][0][:7]
self.FAKE_REPOS._commit_git('repo_2', { # pylint: disable=W0212
'last_file': 'file created in last commit',
})
repo_2_hash_new = self.FAKE_REPOS.git_hashes['repo_2'][-1][0]
new_deps = cur_deps.replace(repo_2_hash_old, repo_2_hash_new)
self.assertNotEqual(new_deps, cur_deps)
self.FAKE_REPOS._commit_git('repo_1', { # pylint: disable=W0212
'DEPS': new_deps,
'origin': 'git/repo_1@4\n',
})
config_template = (
"""solutions = [{
"name" : "src",
"url" : "%(git_base)srepo_1",
"deps_file" : "DEPS",
"managed" : True,
}]""")
self.gclient(['config', '--spec', config_template % {
'git_base': self.git_base
}])
self.gclient(['sync', '--no-history', '--deps', 'mac'])
repo2_root = join(self.root_dir, 'src', 'repo2')
# Check that repo_2 is actually shallow and its log has only one entry.
rev_lists = subprocess2.check_output(['git', 'rev-list', 'HEAD'],
cwd=repo2_root)
self.assertEquals(repo_2_hash_new, rev_lists.strip('\r\n'))
# Check that we have actually checked out the right commit.
self.assertTrue(os.path.exists(join(repo2_root, 'last_file')))
class GClientSmokeBoth(GClientSmokeBase): class GClientSmokeBoth(GClientSmokeBase):
def setUp(self): def setUp(self):
super(GClientSmokeBoth, self).setUp() super(GClientSmokeBoth, self).setUp()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment