git_cache.py 27 KB
Newer Older
1 2 3 4 5 6 7
#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""A git command for managing a local cache of git repositories."""

8
from __future__ import print_function
9 10 11 12
import errno
import logging
import optparse
import os
13
import re
14
import tempfile
15
import threading
16
import time
17 18 19
import subprocess
import sys
import urlparse
20
import zipfile
21

22
from download_from_google_storage import Gsutil
23 24 25
import gclient_utils
import subcommand

26 27 28
# Analogous to gc.autopacklimit git config.
GC_AUTOPACKLIMIT = 50

29 30
GIT_CACHE_CORRUPT_MESSAGE = 'WARNING: The Git cache is corrupt.'

31
try:
32
  # pylint: disable=undefined-variable
33 34 35 36
  WinErr = WindowsError
except NameError:
  class WinErr(Exception):
    pass
37 38 39 40

class LockError(Exception):
  pass

41
class ClobberNeeded(Exception):
42
  pass
43

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79

def exponential_backoff_retry(fn, excs=(Exception,), name=None, count=10,
                              sleep_time=0.25, printerr=None):
  """Executes |fn| up to |count| times, backing off exponentially.

  Args:
    fn (callable): The function to execute. If this raises a handled
        exception, the function will retry with exponential backoff.
    excs (tuple): A tuple of Exception types to handle. If one of these is
        raised by |fn|, a retry will be attempted. If |fn| raises an Exception
        that is not in this list, it will immediately pass through. If |excs|
        is empty, the Exception base class will be used.
    name (str): Optional operation name to print in the retry string.
    count (int): The number of times to try before allowing the exception to
        pass through.
    sleep_time (float): The initial number of seconds to sleep in between
        retries. This will be doubled each retry.
    printerr (callable): Function that will be called with the error string upon
        failures. If None, |logging.warning| will be used.

  Returns: The return value of the successful fn.
  """
  printerr = printerr or logging.warning
  for i in xrange(count):
    try:
      return fn()
    except excs as e:
      if (i+1) >= count:
        raise

      printerr('Retrying %s in %.2f second(s) (%d / %d attempts): %s' % (
          (name or 'operation'), sleep_time, (i+1), count, e))
      time.sleep(sleep_time)
      sleep_time *= 2


80 81 82
class Lockfile(object):
  """Class to represent a cross-platform process-specific lockfile."""

83
  def __init__(self, path, timeout=0):
84
    self.path = os.path.abspath(path)
85
    self.timeout = timeout
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
    self.lockfile = self.path + ".lock"
    self.pid = os.getpid()

  def _read_pid(self):
    """Read the pid stored in the lockfile.

    Note: This method is potentially racy. By the time it returns the lockfile
    may have been unlocked, removed, or stolen by some other process.
    """
    try:
      with open(self.lockfile, 'r') as f:
        pid = int(f.readline().strip())
    except (IOError, ValueError):
      pid = None
    return pid

  def _make_lockfile(self):
    """Safely creates a lockfile containing the current pid."""
    open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
    fd = os.open(self.lockfile, open_flags, 0o644)
    f = os.fdopen(fd, 'w')
107
    print(self.pid, file=f)
108 109 110
    f.close()

  def _remove_lockfile(self):
111 112 113 114 115 116 117
    """Delete the lockfile. Complains (implicitly) if it doesn't exist.

    See gclient_utils.py:rmtree docstring for more explanation on the
    windows case.
    """
    if sys.platform == 'win32':
      lockfile = os.path.normcase(self.lockfile)
118 119

      def delete():
120 121
        exitcode = subprocess.call(['cmd.exe', '/c',
                                    'del', '/f', '/q', lockfile])
122 123 124 125 126 127
        if exitcode != 0:
          raise LockError('Failed to remove lock: %s' % (lockfile,))
      exponential_backoff_retry(
          delete,
          excs=(LockError,),
          name='del [%s]' % (lockfile,))
128 129
    else:
      os.remove(self.lockfile)
130 131 132 133

  def lock(self):
    """Acquire the lock.

134
    This will block with a deadline of self.timeout seconds.
135
    """
136 137 138 139 140 141 142
    elapsed = 0
    while True:
      try:
        self._make_lockfile()
        return
      except OSError as e:
        if elapsed < self.timeout:
143
          sleep_time = max(10, min(3, self.timeout - elapsed))
144 145 146 147 148 149 150 151 152
          logging.info('Could not create git cache lockfile; '
                       'will retry after sleep(%d).', sleep_time);
          elapsed += sleep_time
          time.sleep(sleep_time)
          continue
        if e.errno == errno.EEXIST:
          raise LockError("%s is already locked" % self.path)
        else:
          raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))
153 154 155

  def unlock(self):
    """Release the lock."""
156 157 158 159 160 161 162 163 164
    try:
      if not self.is_locked():
        raise LockError("%s is not locked" % self.path)
      if not self.i_am_locking():
        raise LockError("%s is locked, but not by me" % self.path)
      self._remove_lockfile()
    except WinErr:
      # Windows is unreliable when it comes to file locking.  YMMV.
      pass
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188

  def break_lock(self):
    """Remove the lock, even if it was created by someone else."""
    try:
      self._remove_lockfile()
      return True
    except OSError as exc:
      if exc.errno == errno.ENOENT:
        return False
      else:
        raise

  def is_locked(self):
    """Test if the file is locked by anyone.

    Note: This method is potentially racy. By the time it returns the lockfile
    may have been unlocked, removed, or stolen by some other process.
    """
    return os.path.exists(self.lockfile)

  def i_am_locking(self):
    """Test if the file is locked by this process."""
    return self.is_locked() and self.pid == self._read_pid()

189 190 191 192 193

class Mirror(object):

  git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
  gsutil_exe = os.path.join(
194
    os.path.dirname(os.path.abspath(__file__)), 'gsutil.py')
195
  cachepath_lock = threading.Lock()
196

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211
  @staticmethod
  def parse_fetch_spec(spec):
    """Parses and canonicalizes a fetch spec.

    Returns (fetchspec, value_regex), where value_regex can be used
    with 'git config --replace-all'.
    """
    parts = spec.split(':', 1)
    src = parts[0].lstrip('+').rstrip('/')
    if not src.startswith('refs/'):
      src = 'refs/heads/%s' % src
    dest = parts[1].rstrip('/') if len(parts) > 1 else src
    regex = r'\+%s:.*' % src.replace('*', r'\*')
    return ('+%s:%s' % (src, dest), regex)

212 213
  def __init__(self, url, refs=None, print_func=None):
    self.url = url
214
    self.fetch_specs = set([self.parse_fetch_spec(ref) for ref in (refs or [])])
215 216
    self.basedir = self.UrlToCacheDir(url)
    self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
217 218 219 220 221 222
    if print_func:
      self.print = self.print_without_file
      self.print_func = print_func
    else:
      self.print = print

223
  def print_without_file(self, message, **_kwargs):
224
    self.print_func(message)
225

226 227 228 229 230 231 232
  @property
  def bootstrap_bucket(self):
    if 'chrome-internal' in self.url:
      return 'chrome-git-cache'
    else:
      return 'chromium-git-cache'

233 234 235 236
  @classmethod
  def FromPath(cls, path):
    return cls(cls.CacheDirToUrl(path))

237 238 239 240 241 242 243 244 245
  @staticmethod
  def UrlToCacheDir(url):
    """Convert a git url to a normalized form for the cache dir path."""
    parsed = urlparse.urlparse(url)
    norm_url = parsed.netloc + parsed.path
    if norm_url.endswith('.git'):
      norm_url = norm_url[:-len('.git')]
    return norm_url.replace('-', '--').replace('/', '-').lower()

246 247 248 249 250 251
  @staticmethod
  def CacheDirToUrl(path):
    """Convert a cache dir path to its corresponding url."""
    netpath = re.sub(r'\b-\b', '/', os.path.basename(path)).replace('--', '-')
    return 'https://%s' % netpath

252 253
  @classmethod
  def SetCachePath(cls, cachepath):
254 255
    with cls.cachepath_lock:
      setattr(cls, 'cachepath', cachepath)
256 257 258

  @classmethod
  def GetCachePath(cls):
259 260 261 262 263 264 265 266 267 268 269 270
    with cls.cachepath_lock:
      if not hasattr(cls, 'cachepath'):
        try:
          cachepath = subprocess.check_output(
              [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip()
        except subprocess.CalledProcessError:
          cachepath = None
        if not cachepath:
          raise RuntimeError(
              'No global cache.cachepath git configuration found.')
        setattr(cls, 'cachepath', cachepath)
      return getattr(cls, 'cachepath')
271

272 273 274 275 276 277 278 279 280 281
  def Rename(self, src, dst):
    # This is somehow racy on Windows.
    # Catching OSError because WindowsError isn't portable and
    # pylint complains.
    exponential_backoff_retry(
        lambda: os.rename(src, dst),
        excs=(OSError,),
        name='rename [%s] => [%s]' % (src, dst),
        printerr=self.print)

282 283 284 285 286 287 288 289 290 291 292 293 294 295
  def RunGit(self, cmd, **kwargs):
    """Run git in a subprocess."""
    cwd = kwargs.setdefault('cwd', self.mirror_path)
    kwargs.setdefault('print_stdout', False)
    kwargs.setdefault('filter_fn', self.print)
    env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
    env.setdefault('GIT_ASKPASS', 'true')
    env.setdefault('SSH_ASKPASS', 'true')
    self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
    gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)

  def config(self, cwd=None):
    if cwd is None:
      cwd = self.mirror_path
296 297

    # Don't run git-gc in a daemon.  Bad things can happen if it gets killed.
298 299 300 301 302
    try:
      self.RunGit(['config', 'gc.autodetach', '0'], cwd=cwd)
    except subprocess.CalledProcessError:
      # Hard error, need to clobber.
      raise ClobberNeeded()
303 304 305 306

    # Don't combine pack files into one big pack file.  It's really slow for
    # repositories, and there's no way to track progress and make sure it's
    # not stuck.
307
    self.RunGit(['config', 'gc.autopacklimit', '0'], cwd=cwd)
308 309 310

    # Allocate more RAM for cache-ing delta chains, for better performance
    # of "Resolving deltas".
311
    self.RunGit(['config', 'core.deltaBaseCacheLimit',
312
                 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
313

314
    self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
315
    self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
316
                 '+refs/heads/*:refs/heads/*', r'\+refs/heads/\*:.*'], cwd=cwd)
317
    for spec, value_regex in self.fetch_specs:
318
      self.RunGit(
319
          ['config', '--replace-all', 'remote.origin.fetch', spec, value_regex],
320
          cwd=cwd)
321 322

  def bootstrap_repo(self, directory):
323 324 325 326
    """Bootstrap the repo from Google Stroage if possible.

    More apt-ly named bootstrap_repo_from_cloud_if_possible_else_do_nothing().
    """
327

328
    python_fallback = False
329 330
    if (sys.platform.startswith('win') and
        not gclient_utils.FindExecutable('7z')):
331 332 333 334
      python_fallback = True
    elif sys.platform.startswith('darwin'):
      # The OSX version of unzip doesn't support zip64.
      python_fallback = True
335
    elif not gclient_utils.FindExecutable('unzip'):
336
      python_fallback = True
337 338

    gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
339
    gsutil = Gsutil(self.gsutil_exe, boto_path=None)
340 341 342 343 344 345 346 347 348 349
    # Get the most recent version of the zipfile.
    _, ls_out, _ = gsutil.check_call('ls', gs_folder)
    ls_out_sorted = sorted(ls_out.splitlines())
    if not ls_out_sorted:
      # This repo is not on Google Storage.
      return False
    latest_checkout = ls_out_sorted[-1]

    # Download zip file to a temporary directory.
    try:
350
      tempdir = tempfile.mkdtemp(prefix='_cache_tmp', dir=self.GetCachePath())
351
      self.print('Downloading %s' % latest_checkout)
352
      code = gsutil.call('cp', latest_checkout, tempdir)
353 354 355 356
      if code:
        return False
      filename = os.path.join(tempdir, latest_checkout.split('/')[-1])

357 358 359 360 361 362 363
      # Unpack the file with 7z on Windows, unzip on linux, or fallback.
      if not python_fallback:
        if sys.platform.startswith('win'):
          cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
        else:
          cmd = ['unzip', filename, '-d', directory]
        retcode = subprocess.call(cmd)
364
      else:
365 366 367 368 369 370 371 372 373
        try:
          with zipfile.ZipFile(filename, 'r') as f:
            f.printdir()
            f.extractall(directory)
        except Exception as e:
          self.print('Encountered error: %s' % str(e), file=sys.stderr)
          retcode = 1
        else:
          retcode = 0
374 375
    finally:
      # Clean up the downloaded zipfile.
376 377 378 379 380 381 382 383 384
      #
      # This is somehow racy on Windows.
      # Catching OSError because WindowsError isn't portable and
      # pylint complains.
      exponential_backoff_retry(
          lambda: gclient_utils.rm_file_or_tree(tempdir),
          excs=(OSError,),
          name='rmtree [%s]' % (tempdir,),
          printerr=self.print)
385

386 387 388 389 390 391 392 393 394 395
    if retcode:
      self.print(
          'Extracting bootstrap zipfile %s failed.\n'
          'Resuming normal operations.' % filename)
      return False
    return True

  def exists(self):
    return os.path.isfile(os.path.join(self.mirror_path, 'config'))

396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
  def _preserve_fetchspec(self):
    """Read and preserve remote.origin.fetch from an existing mirror.

    This modifies self.fetch_specs.
    """
    if not self.exists():
      return
    try:
      config_fetchspecs = subprocess.check_output(
          [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
          cwd=self.mirror_path)
      for fetchspec in config_fetchspecs.splitlines():
        self.fetch_specs.add(self.parse_fetch_spec(fetchspec))
    except subprocess.CalledProcessError:
      logging.warn('Tried and failed to preserve remote.origin.fetch from the '
                   'existing cache directory.  You may need to manually edit '
                   '%s and "git cache fetch" again.'
                   % os.path.join(self.mirror_path, 'config'))

415 416 417 418 419 420 421 422 423
  def _ensure_bootstrapped(self, depth, bootstrap, force=False):
    tempdir = None
    pack_dir = os.path.join(self.mirror_path, 'objects', 'pack')
    pack_files = []

    if os.path.isdir(pack_dir):
      pack_files = [f for f in os.listdir(pack_dir) if f.endswith('.pack')]

    should_bootstrap = (force or
424
                        not self.exists() or
425 426
                        len(pack_files) > GC_AUTOPACKLIMIT)
    if should_bootstrap:
427 428 429
      if self.exists():
        # Re-bootstrapping an existing mirror; preserve existing fetch spec.
        self._preserve_fetchspec()
430 431 432 433 434
      tempdir = tempfile.mkdtemp(
          prefix='_cache_tmp', suffix=self.basedir, dir=self.GetCachePath())
      bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
      if bootstrapped:
        # Bootstrap succeeded; delete previous cache, if any.
435
        gclient_utils.rmtree(self.mirror_path)
436
      elif not self.exists():
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
        # Bootstrap failed, no previous cache; start with a bare git dir.
        self.RunGit(['init', '--bare'], cwd=tempdir)
      else:
        # Bootstrap failed, previous cache exists; warn and continue.
        logging.warn(
            'Git cache has a lot of pack files (%d).  Tried to re-bootstrap '
            'but failed.  Continuing with non-optimized repository.'
            % len(pack_files))
        gclient_utils.rmtree(tempdir)
        tempdir = None
    else:
      if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
        logging.warn(
            'Shallow fetch requested, but repo cache already exists.')
    return tempdir
452

453 454
  def _fetch(self, rundir, verbose, depth):
    self.config(rundir)
455
    v = []
456
    d = []
457 458 459 460
    if verbose:
      v = ['-v', '--progress']
    if depth:
      d = ['--depth', str(depth)]
461 462 463 464 465 466 467 468 469 470
    fetch_cmd = ['fetch'] + v + d + ['origin']
    fetch_specs = subprocess.check_output(
        [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
        cwd=rundir).strip().splitlines()
    for spec in fetch_specs:
      try:
        self.print('Fetching %s' % spec)
        self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
      except subprocess.CalledProcessError:
        if spec == '+refs/heads/*:refs/heads/*':
471
          raise ClobberNeeded()  # Corrupted cache.
472
        logging.warn('Fetch of %s failed' % spec)
473

474
  def populate(self, depth=None, shallow=False, bootstrap=False,
475
               verbose=False, ignore_lock=False, lock_timeout=0):
476 477 478 479
    assert self.GetCachePath()
    if shallow and not depth:
      depth = 10000
    gclient_utils.safe_makedirs(self.GetCachePath())
480

481
    lockfile = Lockfile(self.mirror_path, lock_timeout)
482 483 484
    if not ignore_lock:
      lockfile.lock()

485
    tempdir = None
486
    try:
487
      tempdir = self._ensure_bootstrapped(depth, bootstrap)
488
      rundir = tempdir or self.mirror_path
489
      self._fetch(rundir, verbose, depth)
490
    except ClobberNeeded:
491 492 493 494 495 496 497
      # This is a major failure, we need to clean and force a bootstrap.
      gclient_utils.rmtree(rundir)
      self.print(GIT_CACHE_CORRUPT_MESSAGE)
      tempdir = self._ensure_bootstrapped(depth, bootstrap, force=True)
      assert tempdir
      self._fetch(tempdir or self.mirror_path, verbose, depth)
    finally:
498
      if tempdir:
499 500
        if os.path.exists(self.mirror_path):
          gclient_utils.rmtree(self.mirror_path)
501
        self.Rename(tempdir, self.mirror_path)
502 503
      if not ignore_lock:
        lockfile.unlock()
504

505
  def update_bootstrap(self, prune=False):
506 507 508
    # The files are named <git number>.zip
    gen_number = subprocess.check_output(
        [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
509 510
    # Run Garbage Collect to compress packfile.
    self.RunGit(['gc', '--prune=all'])
511 512 513 514 515
    # Creating a temp file and then deleting it ensures we can use this name.
    _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
    os.remove(tmp_zipfile)
    subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
    gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
516 517
    gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
    dest_name = '%s/%s.zip' % (gs_folder, gen_number)
518 519 520
    gsutil.call('cp', tmp_zipfile, dest_name)
    os.remove(tmp_zipfile)

521 522 523 524 525 526 527 528
    # Remove all other files in the same directory.
    if prune:
      _, ls_out, _ = gsutil.check_call('ls', gs_folder)
      for filename in ls_out.splitlines():
        if filename == dest_name:
          continue
        gsutil.call('rm', filename)

529 530 531
  @staticmethod
  def DeleteTmpPackFiles(path):
    pack_dir = os.path.join(path, 'objects', 'pack')
532 533
    if not os.path.isdir(pack_dir):
      return
534 535 536 537 538 539 540 541 542
    pack_files = [f for f in os.listdir(pack_dir) if
                  f.startswith('.tmp-') or f.startswith('tmp_pack_')]
    for f in pack_files:
      f = os.path.join(pack_dir, f)
      try:
        os.remove(f)
        logging.warn('Deleted stale temporary pack file %s' % f)
      except OSError:
        logging.warn('Unable to delete temporary pack file %s' % f)
543

544 545
  @classmethod
  def BreakLocks(cls, path):
546 547 548 549 550 551 552 553 554 555
    did_unlock = False
    lf = Lockfile(path)
    if lf.break_lock():
      did_unlock = True
    # Look for lock files that might have been left behind by an interrupted
    # git process.
    lf = os.path.join(path, 'config.lock')
    if os.path.exists(lf):
      os.remove(lf)
      did_unlock = True
556
    cls.DeleteTmpPackFiles(path)
557 558
    return did_unlock

559
  def unlock(self):
560 561 562 563 564
    return self.BreakLocks(self.mirror_path)

  @classmethod
  def UnlockAll(cls):
    cachepath = cls.GetCachePath()
565 566
    if not cachepath:
      return
567 568 569 570
    dirlist = os.listdir(cachepath)
    repo_dirs = set([os.path.join(cachepath, path) for path in dirlist
                     if os.path.isdir(os.path.join(cachepath, path))])
    for dirent in dirlist:
571
      if dirent.startswith('_cache_tmp') or dirent.startswith('tmp'):
572
        gclient_utils.rm_file_or_tree(os.path.join(cachepath, dirent))
573
      elif (dirent.endswith('.lock') and
574 575 576 577 578 579 580 581 582
          os.path.isfile(os.path.join(cachepath, dirent))):
        repo_dirs.add(os.path.join(cachepath, dirent[:-5]))

    unlocked_repos = []
    for repo_dir in repo_dirs:
      if cls.BreakLocks(repo_dir):
        unlocked_repos.append(repo_dir)

    return unlocked_repos
583 584 585 586

@subcommand.usage('[url of repo to check for caching]')
def CMDexists(parser, args):
  """Check to see if there already is a cache of the given repo."""
587
  _, args = parser.parse_args(args)
588 589 590
  if not len(args) == 1:
    parser.error('git cache exists only takes exactly one repo url.')
  url = args[0]
591 592 593
  mirror = Mirror(url)
  if mirror.exists():
    print(mirror.mirror_path)
594 595 596 597
    return 0
  return 1


598 599 600 601 602
@subcommand.usage('[url of repo to create a bootstrap zip file]')
def CMDupdate_bootstrap(parser, args):
  """Create and uploads a bootstrap tarball."""
  # Lets just assert we can't do this on Windows.
  if sys.platform.startswith('win'):
603
    print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
604 605
    return 1

606 607 608
  parser.add_option('--prune', action='store_true',
                    help='Prune all other cached zipballs of the same repo.')

609 610
  # First, we need to ensure the cache is populated.
  populate_args = args[:]
611
  populate_args.append('--no-bootstrap')
612 613 614
  CMDpopulate(parser, populate_args)

  # Get the repo directory.
615
  options, args = parser.parse_args(args)
616
  url = args[0]
617
  mirror = Mirror(url)
618
  mirror.update_bootstrap(options.prune)
619
  return 0
620 621


622 623 624 625 626 627 628 629 630
@subcommand.usage('[url of repo to add to or update in cache]')
def CMDpopulate(parser, args):
  """Ensure that the cache has all up-to-date objects for the given repo."""
  parser.add_option('--depth', type='int',
                    help='Only cache DEPTH commits of history')
  parser.add_option('--shallow', '-s', action='store_true',
                    help='Only cache 10000 commits of history')
  parser.add_option('--ref', action='append',
                    help='Specify additional refs to be fetched')
631 632
  parser.add_option('--no_bootstrap', '--no-bootstrap',
                    action='store_true',
633
                    help='Don\'t bootstrap from Google Storage')
634 635
  parser.add_option('--ignore_locks', '--ignore-locks',
                    action='store_true',
636
                    help='Don\'t try to lock repository')
637

638 639 640 641 642
  options, args = parser.parse_args(args)
  if not len(args) == 1:
    parser.error('git cache populate only takes exactly one repo url.')
  url = args[0]

643 644 645 646 647
  mirror = Mirror(url, refs=options.ref)
  kwargs = {
      'verbose': options.verbose,
      'shallow': options.shallow,
      'bootstrap': not options.no_bootstrap,
648
      'ignore_lock': options.ignore_locks,
649
      'lock_timeout': options.timeout,
650
  }
651
  if options.depth:
652 653
    kwargs['depth'] = options.depth
  mirror.populate(**kwargs)
654 655


656 657 658 659
@subcommand.usage('Fetch new commits into cache and current checkout')
def CMDfetch(parser, args):
  """Update mirror, and fetch in cwd."""
  parser.add_option('--all', action='store_true', help='Fetch all remotes')
660 661 662
  parser.add_option('--no_bootstrap', '--no-bootstrap',
                    action='store_true',
                    help='Don\'t (re)bootstrap from Google Storage')
663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692
  options, args = parser.parse_args(args)

  # Figure out which remotes to fetch.  This mimics the behavior of regular
  # 'git fetch'.  Note that in the case of "stacked" or "pipelined" branches,
  # this will NOT try to traverse up the branching structure to find the
  # ultimate remote to update.
  remotes = []
  if options.all:
    assert not args, 'fatal: fetch --all does not take a repository argument'
    remotes = subprocess.check_output([Mirror.git_exe, 'remote']).splitlines()
  elif args:
    remotes = args
  else:
    current_branch = subprocess.check_output(
        [Mirror.git_exe, 'rev-parse', '--abbrev-ref', 'HEAD']).strip()
    if current_branch != 'HEAD':
      upstream = subprocess.check_output(
          [Mirror.git_exe, 'config', 'branch.%s.remote' % current_branch]
      ).strip()
      if upstream and upstream != '.':
        remotes = [upstream]
  if not remotes:
    remotes = ['origin']

  cachepath = Mirror.GetCachePath()
  git_dir = os.path.abspath(subprocess.check_output(
      [Mirror.git_exe, 'rev-parse', '--git-dir']))
  git_dir = os.path.abspath(git_dir)
  if git_dir.startswith(cachepath):
    mirror = Mirror.FromPath(git_dir)
693 694
    mirror.populate(
        bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
695 696 697 698 699 700 701 702
    return 0
  for remote in remotes:
    remote_url = subprocess.check_output(
        [Mirror.git_exe, 'config', 'remote.%s.url' % remote]).strip()
    if remote_url.startswith(cachepath):
      mirror = Mirror.FromPath(remote_url)
      mirror.print = lambda *args: None
      print('Updating git cache...')
703 704
      mirror.populate(
          bootstrap=not options.no_bootstrap, lock_timeout=options.timeout)
705 706 707 708
    subprocess.check_call([Mirror.git_exe, 'fetch', remote])
  return 0


709 710 711 712 713 714 715 716 717 718 719
@subcommand.usage('[url of repo to unlock, or -a|--all]')
def CMDunlock(parser, args):
  """Unlock one or all repos if their lock files are still around."""
  parser.add_option('--force', '-f', action='store_true',
                    help='Actually perform the action')
  parser.add_option('--all', '-a', action='store_true',
                    help='Unlock all repository caches')
  options, args = parser.parse_args(args)
  if len(args) > 1 or (len(args) == 0 and not options.all):
    parser.error('git cache unlock takes exactly one repo url, or --all')

720
  if not options.force:
721
    cachepath = Mirror.GetCachePath()
722
    lockfiles = [os.path.join(cachepath, path)
723
                 for path in os.listdir(cachepath)
724
                 if path.endswith('.lock') and os.path.isfile(path)]
725 726 727 728
    parser.error('git cache unlock requires -f|--force to do anything. '
                 'Refusing to unlock the following repo caches: '
                 ', '.join(lockfiles))

729
  unlocked_repos = []
730 731 732 733 734 735
  if options.all:
    unlocked_repos.extend(Mirror.UnlockAll())
  else:
    m = Mirror(args[0])
    if m.unlock():
      unlocked_repos.append(m.mirror_path)
736

737 738 739
  if unlocked_repos:
    logging.info('Broke locks on these caches:\n  %s' % '\n  '.join(
        unlocked_repos))
740 741 742 743 744 745 746 747 748


class OptionParser(optparse.OptionParser):
  """Wrapper class for OptionParser to handle global options."""

  def __init__(self, *args, **kwargs):
    optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
    self.add_option('-c', '--cache-dir',
                    help='Path to the directory containing the cache')
749
    self.add_option('-v', '--verbose', action='count', default=1,
750
                    help='Increase verbosity (can be passed multiple times)')
751 752
    self.add_option('-q', '--quiet', action='store_true',
                    help='Suppress all extraneous output')
753 754
    self.add_option('--timeout', type='int', default=0,
                    help='Timeout for acquiring cache lock, in seconds')
755 756 757

  def parse_args(self, args=None, values=None):
    options, args = optparse.OptionParser.parse_args(self, args, values)
758 759 760 761 762
    if options.quiet:
      options.verbose = 0

    levels = [logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])
763 764

    try:
765 766 767 768 769 770 771 772 773
      global_cache_dir = Mirror.GetCachePath()
    except RuntimeError:
      global_cache_dir = None
    if options.cache_dir:
      if global_cache_dir and (
          os.path.abspath(options.cache_dir) !=
          os.path.abspath(global_cache_dir)):
        logging.warn('Overriding globally-configured cache directory.')
      Mirror.SetCachePath(options.cache_dir)
774 775 776 777 778 779 780 781 782 783

    return options, args


def main(argv):
  dispatcher = subcommand.CommandDispatcher(__name__)
  return dispatcher.execute(OptionParser(), argv)


if __name__ == '__main__':
784 785 786 787 788
  try:
    sys.exit(main(sys.argv[1:]))
  except KeyboardInterrupt:
    sys.stderr.write('interrupted\n')
    sys.exit(1)