git_cache.py 17 KB
Newer Older
1 2 3 4 5 6 7
#!/usr/bin/env python
# Copyright 2014 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

"""A git command for managing a local cache of git repositories."""

8
from __future__ import print_function
9 10 11 12 13
import errno
import logging
import optparse
import os
import tempfile
14
import time
15 16 17
import subprocess
import sys
import urlparse
18
import zipfile
19

20
from download_from_google_storage import Gsutil
21 22 23
import gclient_utils
import subcommand

24 25 26 27 28 29
try:
  # pylint: disable=E0602
  WinErr = WindowsError
except NameError:
  class WinErr(Exception):
    pass
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60

class LockError(Exception):
  pass


class Lockfile(object):
  """Class to represent a cross-platform process-specific lockfile."""

  def __init__(self, path):
    self.path = os.path.abspath(path)
    self.lockfile = self.path + ".lock"
    self.pid = os.getpid()

  def _read_pid(self):
    """Read the pid stored in the lockfile.

    Note: This method is potentially racy. By the time it returns the lockfile
    may have been unlocked, removed, or stolen by some other process.
    """
    try:
      with open(self.lockfile, 'r') as f:
        pid = int(f.readline().strip())
    except (IOError, ValueError):
      pid = None
    return pid

  def _make_lockfile(self):
    """Safely creates a lockfile containing the current pid."""
    open_flags = (os.O_CREAT | os.O_EXCL | os.O_WRONLY)
    fd = os.open(self.lockfile, open_flags, 0o644)
    f = os.fdopen(fd, 'w')
61
    print(self.pid, file=f)
62 63 64
    f.close()

  def _remove_lockfile(self):
65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
    """Delete the lockfile. Complains (implicitly) if it doesn't exist.

    See gclient_utils.py:rmtree docstring for more explanation on the
    windows case.
    """
    if sys.platform == 'win32':
      lockfile = os.path.normcase(self.lockfile)
      for _ in xrange(3):
        exitcode = subprocess.call(['cmd.exe', '/c',
                                    'del', '/f', '/q', lockfile])
        if exitcode == 0:
          return
        time.sleep(3)
      raise LockError('Failed to remove lock: %s' % lockfile)
    else:
      os.remove(self.lockfile)
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131

  def lock(self):
    """Acquire the lock.

    Note: This is a NON-BLOCKING FAIL-FAST operation.
    Do. Or do not. There is no try.
    """
    try:
      self._make_lockfile()
    except OSError as e:
      if e.errno == errno.EEXIST:
        raise LockError("%s is already locked" % self.path)
      else:
        raise LockError("Failed to create %s (err %s)" % (self.path, e.errno))

  def unlock(self):
    """Release the lock."""
    if not self.is_locked():
      raise LockError("%s is not locked" % self.path)
    if not self.i_am_locking():
      raise LockError("%s is locked, but not by me" % self.path)
    self._remove_lockfile()

  def break_lock(self):
    """Remove the lock, even if it was created by someone else."""
    try:
      self._remove_lockfile()
      return True
    except OSError as exc:
      if exc.errno == errno.ENOENT:
        return False
      else:
        raise

  def is_locked(self):
    """Test if the file is locked by anyone.

    Note: This method is potentially racy. By the time it returns the lockfile
    may have been unlocked, removed, or stolen by some other process.
    """
    return os.path.exists(self.lockfile)

  def i_am_locking(self):
    """Test if the file is locked by this process."""
    return self.is_locked() and self.pid == self._read_pid()

  def __enter__(self):
    self.lock()
    return self

  def __exit__(self, *_exc):
132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
    # Windows is unreliable when it comes to file locking.  YMMV.
    try:
      self.unlock()
    except WinErr:
      pass


class Mirror(object):

  git_exe = 'git.bat' if sys.platform.startswith('win') else 'git'
  gsutil_exe = os.path.join(
    os.path.dirname(os.path.abspath(__file__)),
    'third_party', 'gsutil', 'gsutil')
  bootstrap_bucket = 'chromium-git-cache'

  def __init__(self, url, refs=None, print_func=None):
    self.url = url
    self.refs = refs or []
    self.basedir = self.UrlToCacheDir(url)
    self.mirror_path = os.path.join(self.GetCachePath(), self.basedir)
    self.print = print_func or print

  @staticmethod
  def UrlToCacheDir(url):
    """Convert a git url to a normalized form for the cache dir path."""
    parsed = urlparse.urlparse(url)
    norm_url = parsed.netloc + parsed.path
    if norm_url.endswith('.git'):
      norm_url = norm_url[:-len('.git')]
    return norm_url.replace('-', '--').replace('/', '-').lower()

  @staticmethod
  def FindExecutable(executable):
    """This mimics the "which" utility."""
    path_folders = os.environ.get('PATH').split(os.pathsep)

    for path_folder in path_folders:
      target = os.path.join(path_folder, executable)
      # Just incase we have some ~/blah paths.
      target = os.path.abspath(os.path.expanduser(target))
      if os.path.isfile(target) and os.access(target, os.X_OK):
        return target
174 175 176
      if sys.platform.startswith('win'):
        for suffix in ('.bat', '.cmd', '.exe'):
          alt_target = target + suffix
177
          if os.path.isfile(alt_target) and os.access(alt_target, os.X_OK):
178
            return alt_target
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
    return None

  @classmethod
  def SetCachePath(cls, cachepath):
    setattr(cls, 'cachepath', cachepath)

  @classmethod
  def GetCachePath(cls):
    if not hasattr(cls, 'cachepath'):
      try:
        cachepath = subprocess.check_output(
            [cls.git_exe, 'config', '--global', 'cache.cachepath']).strip()
      except subprocess.CalledProcessError:
        cachepath = None
      if not cachepath:
        raise RuntimeError('No global cache.cachepath git configuration found.')
      setattr(cls, 'cachepath', cachepath)
    return getattr(cls, 'cachepath')

  def RunGit(self, cmd, **kwargs):
    """Run git in a subprocess."""
    cwd = kwargs.setdefault('cwd', self.mirror_path)
    kwargs.setdefault('print_stdout', False)
    kwargs.setdefault('filter_fn', self.print)
    env = kwargs.get('env') or kwargs.setdefault('env', os.environ.copy())
    env.setdefault('GIT_ASKPASS', 'true')
    env.setdefault('SSH_ASKPASS', 'true')
    self.print('running "git %s" in "%s"' % (' '.join(cmd), cwd))
    gclient_utils.CheckCallAndFilter([self.git_exe] + cmd, **kwargs)

  def config(self, cwd=None):
    if cwd is None:
      cwd = self.mirror_path
    self.RunGit(['config', 'core.deltaBaseCacheLimit',
                 gclient_utils.DefaultDeltaBaseCacheLimit()], cwd=cwd)
    self.RunGit(['config', 'remote.origin.url', self.url], cwd=cwd)
    self.RunGit(['config', '--replace-all', 'remote.origin.fetch',
                 '+refs/heads/*:refs/heads/*'], cwd=cwd)
    for ref in self.refs:
      ref = ref.lstrip('+').rstrip('/')
      if ref.startswith('refs/'):
        refspec = '+%s:%s' % (ref, ref)
      else:
        refspec = '+refs/%s/*:refs/%s/*' % (ref, ref)
      self.RunGit(['config', '--add', 'remote.origin.fetch', refspec], cwd=cwd)

  def bootstrap_repo(self, directory):
226
    """Bootstrap the repo from Google Stroage if possible."""
227

228 229 230 231 232 233 234 235
    python_fallback = False
    if sys.platform.startswith('win') and not self.FindExecutable('7z'):
      python_fallback = True
    elif sys.platform.startswith('darwin'):
      # The OSX version of unzip doesn't support zip64.
      python_fallback = True
    elif not self.FindExecutable('unzip'):
      python_fallback = True
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257

    gs_folder = 'gs://%s/%s' % (self.bootstrap_bucket, self.basedir)
    gsutil = Gsutil(
        self.gsutil_exe, boto_path=os.devnull, bypass_prodaccess=True)
    # Get the most recent version of the zipfile.
    _, ls_out, _ = gsutil.check_call('ls', gs_folder)
    ls_out_sorted = sorted(ls_out.splitlines())
    if not ls_out_sorted:
      # This repo is not on Google Storage.
      return False
    latest_checkout = ls_out_sorted[-1]

    # Download zip file to a temporary directory.
    try:
      tempdir = tempfile.mkdtemp()
      self.print('Downloading %s' % latest_checkout)
      code, out, err = gsutil.check_call('cp', latest_checkout, tempdir)
      if code:
        self.print('%s\n%s' % (out, err))
        return False
      filename = os.path.join(tempdir, latest_checkout.split('/')[-1])

258 259 260 261 262 263 264
      # Unpack the file with 7z on Windows, unzip on linux, or fallback.
      if not python_fallback:
        if sys.platform.startswith('win'):
          cmd = ['7z', 'x', '-o%s' % directory, '-tzip', filename]
        else:
          cmd = ['unzip', filename, '-d', directory]
        retcode = subprocess.call(cmd)
265
      else:
266 267 268 269 270 271 272 273 274
        try:
          with zipfile.ZipFile(filename, 'r') as f:
            f.printdir()
            f.extractall(directory)
        except Exception as e:
          self.print('Encountered error: %s' % str(e), file=sys.stderr)
          retcode = 1
        else:
          retcode = 0
275 276 277
    finally:
      # Clean up the downloaded zipfile.
      gclient_utils.rmtree(tempdir)
278

279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
    if retcode:
      self.print(
          'Extracting bootstrap zipfile %s failed.\n'
          'Resuming normal operations.' % filename)
      return False
    return True

  def exists(self):
    return os.path.isfile(os.path.join(self.mirror_path, 'config'))

  def populate(self, depth=None, shallow=False, bootstrap=False,
               verbose=False):
    if shallow and not depth:
      depth = 10000
    gclient_utils.safe_makedirs(self.GetCachePath())

    v = []
    if verbose:
      v = ['-v', '--progress']

    d = []
    if depth:
      d = ['--depth', str(depth)]


    with Lockfile(self.mirror_path):
      # Setup from scratch if the repo is new or is in a bad state.
      tempdir = None
      if not os.path.exists(os.path.join(self.mirror_path, 'config')):
        gclient_utils.rmtree(self.mirror_path)
        tempdir = tempfile.mkdtemp(
            suffix=self.basedir, dir=self.GetCachePath())
        bootstrapped = not depth and bootstrap and self.bootstrap_repo(tempdir)
        if not bootstrapped:
          self.RunGit(['init', '--bare'], cwd=tempdir)
      else:
        if depth and os.path.exists(os.path.join(self.mirror_path, 'shallow')):
          logging.warn(
              'Shallow fetch requested, but repo cache already exists.')
        d = []

      rundir = tempdir or self.mirror_path
      self.config(rundir)
      fetch_cmd = ['fetch'] + v + d + ['origin']
      fetch_specs = subprocess.check_output(
          [self.git_exe, 'config', '--get-all', 'remote.origin.fetch'],
          cwd=rundir).strip().splitlines()
      for spec in fetch_specs:
        try:
          self.RunGit(fetch_cmd + [spec], cwd=rundir, retry=True)
        except subprocess.CalledProcessError:
          logging.warn('Fetch of %s failed' % spec)
      if tempdir:
        os.rename(tempdir, self.mirror_path)

  def update_bootstrap(self):
    # The files are named <git number>.zip
    gen_number = subprocess.check_output(
        [self.git_exe, 'number', 'master'], cwd=self.mirror_path).strip()
    self.RunGit(['gc'])  # Run Garbage Collect to compress packfile.
    # Creating a temp file and then deleting it ensures we can use this name.
    _, tmp_zipfile = tempfile.mkstemp(suffix='.zip')
    os.remove(tmp_zipfile)
    subprocess.call(['zip', '-r', tmp_zipfile, '.'], cwd=self.mirror_path)
    gsutil = Gsutil(path=self.gsutil_exe, boto_path=None)
    dest_name = 'gs://%s/%s/%s.zip' % (
        self.bootstrap_bucket, self.basedir, gen_number)
    gsutil.call('cp', tmp_zipfile, dest_name)
    os.remove(tmp_zipfile)

  def unlock(self):
    lf = Lockfile(self.mirror_path)
    config_lock = os.path.join(self.mirror_path, 'config.lock')
    if os.path.exists(config_lock):
      os.remove(config_lock)
    lf.break_lock()
355 356 357 358

@subcommand.usage('[url of repo to check for caching]')
def CMDexists(parser, args):
  """Check to see if there already is a cache of the given repo."""
359
  _, args = parser.parse_args(args)
360 361 362
  if not len(args) == 1:
    parser.error('git cache exists only takes exactly one repo url.')
  url = args[0]
363 364 365
  mirror = Mirror(url)
  if mirror.exists():
    print(mirror.mirror_path)
366 367 368 369
    return 0
  return 1


370 371 372 373 374
@subcommand.usage('[url of repo to create a bootstrap zip file]')
def CMDupdate_bootstrap(parser, args):
  """Create and uploads a bootstrap tarball."""
  # Lets just assert we can't do this on Windows.
  if sys.platform.startswith('win'):
375
    print('Sorry, update bootstrap will not work on Windows.', file=sys.stderr)
376 377 378 379 380 381 382 383
    return 1

  # First, we need to ensure the cache is populated.
  populate_args = args[:]
  populate_args.append('--no_bootstrap')
  CMDpopulate(parser, populate_args)

  # Get the repo directory.
384
  _, args = parser.parse_args(args)
385
  url = args[0]
386 387 388
  mirror = Mirror(url)
  mirror.update_bootstrap()
  return 0
389 390


391 392 393 394 395 396 397 398 399
@subcommand.usage('[url of repo to add to or update in cache]')
def CMDpopulate(parser, args):
  """Ensure that the cache has all up-to-date objects for the given repo."""
  parser.add_option('--depth', type='int',
                    help='Only cache DEPTH commits of history')
  parser.add_option('--shallow', '-s', action='store_true',
                    help='Only cache 10000 commits of history')
  parser.add_option('--ref', action='append',
                    help='Specify additional refs to be fetched')
400 401 402
  parser.add_option('--no_bootstrap', action='store_true',
                    help='Don\'t bootstrap from Google Storage')

403 404 405 406 407
  options, args = parser.parse_args(args)
  if not len(args) == 1:
    parser.error('git cache populate only takes exactly one repo url.')
  url = args[0]

408 409 410 411 412 413
  mirror = Mirror(url, refs=options.ref)
  kwargs = {
      'verbose': options.verbose,
      'shallow': options.shallow,
      'bootstrap': not options.no_bootstrap,
  }
414
  if options.depth:
415 416
    kwargs['depth'] = options.depth
  mirror.populate(**kwargs)
417 418 419 420 421 422 423 424 425 426 427 428 429


@subcommand.usage('[url of repo to unlock, or -a|--all]')
def CMDunlock(parser, args):
  """Unlock one or all repos if their lock files are still around."""
  parser.add_option('--force', '-f', action='store_true',
                    help='Actually perform the action')
  parser.add_option('--all', '-a', action='store_true',
                    help='Unlock all repository caches')
  options, args = parser.parse_args(args)
  if len(args) > 1 or (len(args) == 0 and not options.all):
    parser.error('git cache unlock takes exactly one repo url, or --all')

430
  repo_dirs = []
431 432
  if not options.all:
    url = args[0]
433
    repo_dirs.append(Mirror(url).mirror_path)
434
  else:
435 436 437 438 439
    cachepath = Mirror.GetCachePath()
    repo_dirs = [os.path.join(cachepath, path)
                 for path in os.listdir(cachepath)
                 if os.path.isdir(os.path.join(cachepath, path))]
    repo_dirs.extend([os.path.join(cachepath,
440
                                   lockfile.replace('.lock', ''))
441 442
                      for lockfile in os.listdir(cachepath)
                      if os.path.isfile(os.path.join(cachepath,
443 444
                                                     lockfile))
                      and lockfile.endswith('.lock')
445
                      and os.path.join(cachepath, lockfile)
446
                          not in repo_dirs])
447 448 449 450 451 452 453 454
  lockfiles = [repo_dir + '.lock' for repo_dir in repo_dirs
               if os.path.exists(repo_dir + '.lock')]

  if not options.force:
    parser.error('git cache unlock requires -f|--force to do anything. '
                 'Refusing to unlock the following repo caches: '
                 ', '.join(lockfiles))

455 456
  unlocked_repos = []
  untouched_repos = []
457 458
  for repo_dir in repo_dirs:
    lf = Lockfile(repo_dir)
459 460 461 462 463
    config_lock = os.path.join(repo_dir, 'config.lock')
    unlocked = False
    if os.path.exists(config_lock):
      os.remove(config_lock)
      unlocked = True
464
    if lf.break_lock():
465 466 467
      unlocked = True

    if unlocked:
468
      unlocked_repos.append(repo_dir)
469
    else:
470
      untouched_repos.append(repo_dir)
471

472 473 474 475 476 477
  if unlocked_repos:
    logging.info('Broke locks on these caches:\n  %s' % '\n  '.join(
        unlocked_repos))
  if untouched_repos:
    logging.debug('Did not touch these caches:\n %s' % '\n  '.join(
        untouched_repos))
478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493


class OptionParser(optparse.OptionParser):
  """Wrapper class for OptionParser to handle global options."""

  def __init__(self, *args, **kwargs):
    optparse.OptionParser.__init__(self, *args, prog='git cache', **kwargs)
    self.add_option('-c', '--cache-dir',
                    help='Path to the directory containing the cache')
    self.add_option('-v', '--verbose', action='count', default=0,
                    help='Increase verbosity (can be passed multiple times)')

  def parse_args(self, args=None, values=None):
    options, args = optparse.OptionParser.parse_args(self, args, values)

    try:
494 495 496 497 498 499 500 501 502
      global_cache_dir = Mirror.GetCachePath()
    except RuntimeError:
      global_cache_dir = None
    if options.cache_dir:
      if global_cache_dir and (
          os.path.abspath(options.cache_dir) !=
          os.path.abspath(global_cache_dir)):
        logging.warn('Overriding globally-configured cache directory.')
      Mirror.SetCachePath(options.cache_dir)
503 504 505 506 507 508 509 510 511 512 513 514 515 516

    levels = [logging.WARNING, logging.INFO, logging.DEBUG]
    logging.basicConfig(level=levels[min(options.verbose, len(levels) - 1)])

    return options, args


def main(argv):
  dispatcher = subcommand.CommandDispatcher(__name__)
  return dispatcher.execute(OptionParser(), argv)


if __name__ == '__main__':
  sys.exit(main(sys.argv[1:]))