scm.py 35 KB
Newer Older
1
# Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 3 4
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.

5
"""SCM-specific utility classes."""
6

7
import cStringIO
8
import glob
9
import logging
10 11
import os
import re
12
import shutil
13 14
import subprocess
import sys
15
import tempfile
16
import time
17
from xml.etree import ElementTree
18 19

import gclient_utils
20 21
import subprocess2

22

23
def ValidateEmail(email):
24 25
  return (re.match(r"^[a-zA-Z0-9._%-+]+@[a-zA-Z0-9._%-]+.[a-zA-Z]{2,6}$", email)
          is not None)
26

27

28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
def GetCasedPath(path):
  """Elcheapos way to get the real path case on Windows."""
  if sys.platform.startswith('win') and os.path.exists(path):
    # Reconstruct the path.
    path = os.path.abspath(path)
    paths = path.split('\\')
    for i in range(len(paths)):
      if i == 0:
        # Skip drive letter.
        continue
      subpath = '\\'.join(paths[:i+1])
      prev = len('\\'.join(paths[:i]))
      # glob.glob will return the cased path for the last item only. This is why
      # we are calling it in a loop. Extract the data we want and put it back
      # into the list.
      paths[i] = glob.glob(subpath + '*')[0][prev+1:len(subpath)]
    path = '\\'.join(paths)
  return path


48 49 50
def GenFakeDiff(filename):
  """Generates a fake diff from a file."""
  file_content = gclient_utils.FileRead(filename, 'rb').splitlines(True)
51
  filename = filename.replace(os.sep, '/')
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
  nb_lines = len(file_content)
  # We need to use / since patch on unix will fail otherwise.
  data = cStringIO.StringIO()
  data.write("Index: %s\n" % filename)
  data.write('=' * 67 + '\n')
  # Note: Should we use /dev/null instead?
  data.write("--- %s\n" % filename)
  data.write("+++ %s\n" % filename)
  data.write("@@ -0,0 +1,%d @@\n" % nb_lines)
  # Prepend '+' to every lines.
  for line in file_content:
    data.write('+')
    data.write(line)
  result = data.getvalue()
  data.close()
  return result


70 71 72 73 74 75 76
def determine_scm(root):
  """Similar to upload.py's version but much simpler.

  Returns 'svn', 'git' or None.
  """
  if os.path.isdir(os.path.join(root, '.svn')):
    return 'svn'
77
  elif os.path.isdir(os.path.join(root, '.git')):
78 79
    return 'git'
  else:
80 81
    try:
      subprocess2.check_output(
82
          ['git', 'rev-parse', '--show-cdup'],
83 84
          stdout=subprocess2.VOID,
          cwd=root)
85
      return 'git'
86
    except (OSError, subprocess2.CalledProcessError):
87 88 89
      return None


90 91
class GIT(object):
  @staticmethod
92 93 94
  def Capture(args, **kwargs):
    return gclient_utils.CheckCall(['git'] + args, print_error=False,
        **kwargs)[0]
95 96

  @staticmethod
97
  def CaptureStatus(files, upstream_branch=None):
98 99 100 101 102
    """Returns git status.

    @files can be a string (one file) or a list of files.

    Returns an array of (status, file) tuples."""
103 104 105
    if upstream_branch is None:
      upstream_branch = GIT.GetUpstreamBranch(os.getcwd())
      if upstream_branch is None:
106 107
        raise gclient_utils.Error('Cannot determine upstream branch')
    command = ['diff', '--name-status', '-r', '%s...' % upstream_branch]
108 109 110 111 112 113
    if not files:
      pass
    elif isinstance(files, basestring):
      command.append(files)
    else:
      command.extend(files)
114
    status = GIT.Capture(command).rstrip()
115 116
    results = []
    if status:
117
      for statusline in status.splitlines():
118 119 120 121
        # 3-way merges can cause the status can be 'MMM' instead of 'M'. This
        # can happen when the user has 2 local branches and he diffs between
        # these 2 branches instead diffing to upstream.
        m = re.match('^(\w)+\t(.+)$', statusline)
122
        if not m:
123 124
          raise gclient_utils.Error(
              'status currently unsupported: %s' % statusline)
125 126
        # Only grab the first letter.
        results.append(('%s      ' % m.group(1)[0], m.group(2)))
127 128
    return results

129
  @staticmethod
130
  def GetEmail(cwd):
131 132 133
    """Retrieves the user email address if known."""
    # We could want to look at the svn cred when it has a svn remote but it
    # should be fine for now, users should simply configure their git settings.
134 135 136 137
    try:
      return GIT.Capture(['config', 'user.email'], cwd=cwd).strip()
    except gclient_utils.CheckCallError:
      return ''
138 139 140 141 142 143 144 145

  @staticmethod
  def ShortBranchName(branch):
    """Converts a name like 'refs/heads/foo' to just 'foo'."""
    return branch.replace('refs/heads/', '')

  @staticmethod
  def GetBranchRef(cwd):
146
    """Returns the full branch reference, e.g. 'refs/heads/master'."""
147
    return GIT.Capture(['symbolic-ref', 'HEAD'], cwd=cwd).strip()
148

149 150 151
  @staticmethod
  def GetBranch(cwd):
    """Returns the short branch name, e.g. 'master'."""
152
    return GIT.ShortBranchName(GIT.GetBranchRef(cwd))
153

154 155 156 157 158
  @staticmethod
  def IsGitSvn(cwd):
    """Returns true if this repo looks like it's using git-svn."""
    # If you have any "svn-remote.*" config keys, we think you're using svn.
    try:
159
      GIT.Capture(['config', '--get-regexp', r'^svn-remote\.'], cwd=cwd)
160 161 162 163
      return True
    except gclient_utils.CheckCallError:
      return False

164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
  @staticmethod
  def MatchSvnGlob(url, base_url, glob_spec, allow_wildcards):
    """Return the corresponding git ref if |base_url| together with |glob_spec|
    matches the full |url|.

    If |allow_wildcards| is true, |glob_spec| can contain wildcards (see below).
    """
    fetch_suburl, as_ref = glob_spec.split(':')
    if allow_wildcards:
      glob_match = re.match('(.+/)?(\*|{[^/]*})(/.+)?', fetch_suburl)
      if glob_match:
        # Parse specs like "branches/*/src:refs/remotes/svn/*" or
        # "branches/{472,597,648}/src:refs/remotes/svn/*".
        branch_re = re.escape(base_url)
        if glob_match.group(1):
          branch_re += '/' + re.escape(glob_match.group(1))
        wildcard = glob_match.group(2)
        if wildcard == '*':
          branch_re += '([^/]*)'
        else:
          # Escape and replace surrounding braces with parentheses and commas
          # with pipe symbols.
          wildcard = re.escape(wildcard)
          wildcard = re.sub('^\\\\{', '(', wildcard)
          wildcard = re.sub('\\\\,', '|', wildcard)
          wildcard = re.sub('\\\\}$', ')', wildcard)
          branch_re += wildcard
        if glob_match.group(3):
          branch_re += re.escape(glob_match.group(3))
        match = re.match(branch_re, url)
        if match:
          return re.sub('\*$', match.group(1), as_ref)

    # Parse specs like "trunk/src:refs/remotes/origin/trunk".
    if fetch_suburl:
      full_url = base_url + '/' + fetch_suburl
    else:
      full_url = base_url
    if full_url == url:
      return as_ref
    return None

206 207 208 209 210
  @staticmethod
  def GetSVNBranch(cwd):
    """Returns the svn branch name if found."""
    # Try to figure out which remote branch we're based on.
    # Strategy:
211 212
    # 1) iterate through our branch history and find the svn URL.
    # 2) find the svn-remote that fetches from the URL.
213 214 215 216

    # regexp matching the git-svn line that contains the URL.
    git_svn_re = re.compile(r'^\s*git-svn-id: (\S+)@', re.MULTILINE)

217 218 219 220 221
    # We don't want to go through all of history, so read a line from the
    # pipe at a time.
    # The -100 is an arbitrary limit so we don't search forever.
    cmd = ['git', 'log', '-100', '--pretty=medium']
    proc = gclient_utils.Popen(cmd, stdout=subprocess.PIPE)
222
    url = None
223 224 225 226 227 228 229 230 231 232 233 234 235
    for line in proc.stdout:
      match = git_svn_re.match(line)
      if match:
        url = match.group(1)
        proc.stdout.close()  # Cut pipe.
        break

    if url:
      svn_remote_re = re.compile(r'^svn-remote\.([^.]+)\.url (.*)$')
      remotes = GIT.Capture(['config', '--get-regexp',
                            r'^svn-remote\..*\.url'], cwd=cwd).splitlines()
      for remote in remotes:
        match = svn_remote_re.match(remote)
236
        if match:
237 238
          remote = match.group(1)
          base_url = match.group(2)
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
          try:
            fetch_spec = GIT.Capture(
                ['config', 'svn-remote.%s.fetch' % remote],
                cwd=cwd).strip()
            branch = GIT.MatchSvnGlob(url, base_url, fetch_spec, False)
          except gclient_utils.CheckCallError:
            branch = None
          if branch:
            return branch
          try:
            branch_spec = GIT.Capture(
                ['config', 'svn-remote.%s.branches' % remote],
                cwd=cwd).strip()
            branch = GIT.MatchSvnGlob(url, base_url, branch_spec, True)
          except gclient_utils.CheckCallError:
            branch = None
          if branch:
            return branch
          try:
            tag_spec = GIT.Capture(
                ['config', 'svn-remote.%s.tags' % remote],
                cwd=cwd).strip()
            branch = GIT.MatchSvnGlob(url, base_url, tag_spec, True)
          except gclient_utils.CheckCallError:
            branch = None
          if branch:
            return branch
266 267 268 269 270

  @staticmethod
  def FetchUpstreamTuple(cwd):
    """Returns a tuple containg remote and remote ref,
       e.g. 'origin', 'refs/heads/master'
271
       Tries to be intelligent and understand git-svn.
272 273
    """
    remote = '.'
274
    branch = GIT.GetBranch(cwd)
275 276 277
    try:
      upstream_branch = GIT.Capture(
          ['config', 'branch.%s.merge' % branch], cwd=cwd).strip()
278
    except (gclient_utils.Error, subprocess2.CalledProcessError):
279
      upstream_branch = None
280
    if upstream_branch:
281 282 283
      try:
        remote = GIT.Capture(
            ['config', 'branch.%s.remote' % branch], cwd=cwd).strip()
284
      except (gclient_utils.Error, subprocess2.CalledProcessError):
285
        pass
286
    else:
287 288 289
      try:
        upstream_branch = GIT.Capture(
            ['config', 'rietveld.upstream-branch'], cwd=cwd).strip()
290
      except (gclient_utils.Error, subprocess2.CalledProcessError):
291 292 293 294 295
        upstream_branch = None
      if upstream_branch:
        try:
          remote = GIT.Capture(
              ['config', 'rietveld.upstream-remote'], cwd=cwd).strip()
296
        except (gclient_utils.Error, subprocess2.CalledProcessError):
297
          pass
298
      else:
299 300 301
        # Fall back on trying a git-svn upstream branch.
        if GIT.IsGitSvn(cwd):
          upstream_branch = GIT.GetSVNBranch(cwd)
302
        else:
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
          # Else, try to guess the origin remote.
          remote_branches = GIT.Capture(['branch', '-r'], cwd=cwd).split()
          if 'origin/master' in remote_branches:
            # Fall back on origin/master if it exits.
            remote = 'origin'
            upstream_branch = 'refs/heads/master'
          elif 'origin/trunk' in remote_branches:
            # Fall back on origin/trunk if it exists. Generally a shared
            # git-svn clone
            remote = 'origin'
            upstream_branch = 'refs/heads/trunk'
          else:
            # Give up.
            remote = None
            upstream_branch = None
318 319 320
    return remote, upstream_branch

  @staticmethod
321
  def GetUpstreamBranch(cwd):
322 323
    """Gets the current branch's upstream branch."""
    remote, upstream_branch = GIT.FetchUpstreamTuple(cwd)
324
    if remote != '.' and upstream_branch:
325 326 327 328
      upstream_branch = upstream_branch.replace('heads', 'remotes/' + remote)
    return upstream_branch

  @staticmethod
329 330
  def GenerateDiff(cwd, branch=None, branch_head='HEAD', full_move=False,
                   files=None):
331 332 333 334
    """Diffs against the upstream branch or optionally another branch.

    full_move means that move or copy operations should completely recreate the
    files, usually in the prospect to apply the patch for a try job."""
335
    if not branch:
336
      branch = GIT.GetUpstreamBranch(cwd)
337 338
    command = ['diff', '-p', '--no-prefix', '--no-ext-diff',
               branch + "..." + branch_head]
339 340
    if not full_move:
      command.append('-C')
341 342 343 344
    # TODO(maruel): --binary support.
    if files:
      command.append('--')
      command.extend(files)
345
    diff = GIT.Capture(command, cwd=cwd).splitlines(True)
346 347 348 349 350 351
    for i in range(len(diff)):
      # In the case of added files, replace /dev/null with the path to the
      # file being added.
      if diff[i].startswith('--- /dev/null'):
        diff[i] = '--- %s' % diff[i+1][4:]
    return ''.join(diff)
352

353 354 355 356
  @staticmethod
  def GetDifferentFiles(cwd, branch=None, branch_head='HEAD'):
    """Returns the list of modified files between two branches."""
    if not branch:
357
      branch = GIT.GetUpstreamBranch(cwd)
358
    command = ['diff', '--name-only', branch + "..." + branch_head]
359
    return GIT.Capture(command, cwd=cwd).splitlines(False)
360

361 362 363
  @staticmethod
  def GetPatchName(cwd):
    """Constructs a name for this patch."""
364
    short_sha = GIT.Capture(['rev-parse', '--short=4', 'HEAD'], cwd=cwd).strip()
365
    return "%s#%s" % (GIT.GetBranch(cwd), short_sha)
366 367

  @staticmethod
368
  def GetCheckoutRoot(cwd):
369
    """Returns the top level directory of a git checkout as an absolute path.
370
    """
371 372
    root = GIT.Capture(['rev-parse', '--show-cdup'], cwd=cwd).strip()
    return os.path.abspath(os.path.join(cwd, root))
373

374 375 376 377 378 379 380 381
  @staticmethod
  def AssertVersion(min_version):
    """Asserts git's version is at least min_version."""
    def only_int(val):
      if val.isdigit():
        return int(val)
      else:
        return 0
382
    current_version =  GIT.Capture(['--version']).split()[-1]
383 384 385 386 387 388 389 390 391
    current_version_list = map(only_int, current_version.split('.'))
    for min_ver in map(int, min_version.split('.')):
      ver = current_version_list.pop(0)
      if ver < min_ver:
        return (False, current_version)
      elif ver > min_ver:
        return (True, current_version)
    return (True, current_version)

392 393

class SVN(object):
394
  current_version = None
395 396

  @staticmethod
397 398
  def Capture(args, **kwargs):
    """Always redirect stderr.
399

400 401 402
    Throws an exception if non-0 is returned."""
    return gclient_utils.CheckCall(['svn'] + args, print_error=False,
        **kwargs)[0]
403 404

  @staticmethod
405
  def RunAndGetFileList(verbose, args, cwd, file_list, stdout=None):
406 407 408 409 410 411 412 413 414
    """Runs svn checkout, update, or status, output to stdout.

    The first item in args must be either "checkout", "update", or "status".

    svn's stdout is parsed to collect a list of files checked out or updated.
    These files are appended to file_list.  svn's stdout is also printed to
    sys.stdout as in Run.

    Args:
415
      verbose: If True, uses verbose output
416
      args: A sequence of command line parameters to be passed to svn.
417
      cwd: The directory where svn is to be run.
418 419 420 421

    Raises:
      Error: An error occurred while running the svn command.
    """
422
    stdout = stdout or sys.stdout
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444

    # svn update and svn checkout use the same pattern: the first three columns
    # are for file status, property status, and lock status.  This is followed
    # by two spaces, and then the path to the file.
    update_pattern = '^...  (.*)$'

    # The first three columns of svn status are the same as for svn update and
    # svn checkout.  The next three columns indicate addition-with-history,
    # switch, and remote lock status.  This is followed by one space, and then
    # the path to the file.
    status_pattern = '^...... (.*)$'

    # args[0] must be a supported command.  This will blow up if it's something
    # else, which is good.  Note that the patterns are only effective when
    # these commands are used in their ordinary forms, the patterns are invalid
    # for "svn status --show-updates", for example.
    pattern = {
          'checkout': update_pattern,
          'status':   status_pattern,
          'update':   update_pattern,
        }[args[0]]
    compiled_pattern = re.compile(pattern)
445
    # Place an upper limit.
446
    backoff_time = 5
447
    retries = 0
448
    while True:
449
      retries += 1
450 451
      previous_list_len = len(file_list)
      failure = []
452

453 454 455 456 457
      def CaptureMatchingLines(line):
        match = compiled_pattern.search(line)
        if match:
          file_list.append(match.group(1))
        if line.startswith('svn: '):
458
          failure.append(line)
459

460
      try:
461 462 463 464 465 466
        gclient_utils.CheckCallAndFilterAndHeader(
            ['svn'] + args,
            cwd=cwd,
            always=verbose,
            filter_fn=CaptureMatchingLines,
            stdout=stdout)
467
      except (gclient_utils.Error, subprocess2.CalledProcessError):
468 469 470 471 472
        def IsKnownFailure():
          for x in failure:
            if (x.startswith('svn: OPTIONS of') or
                x.startswith('svn: PROPFIND of') or
                x.startswith('svn: REPORT of') or
473 474
                x.startswith('svn: Unknown hostname') or
                x.startswith('svn: Server sent unexpected return value')):
475 476 477
              return True
          return False

478 479 480 481 482
        # Subversion client is really misbehaving with Google Code.
        if args[0] == 'checkout':
          # Ensure at least one file was checked out, otherwise *delete* the
          # directory.
          if len(file_list) == previous_list_len:
483
            if not IsKnownFailure():
484 485
              # No known svn error was found, bail out.
              raise
486 487 488 489 490 491
            # No file were checked out, so make sure the directory is
            # deleted in case it's messed up and try again.
            # Warning: It's bad, it assumes args[2] is the directory
            # argument.
            if os.path.isdir(args[2]):
              gclient_utils.RemoveDirectory(args[2])
492 493 494
          else:
            # Progress was made, convert to update since an aborted checkout
            # is now an update.
495
            args = ['update'] + args[1:]
496 497
        else:
          # It was an update or export.
498 499 500 501
          # We enforce that some progress has been made or a known failure.
          if len(file_list) == previous_list_len and not IsKnownFailure():
            # No known svn error was found and no progress, bail out.
            raise
502
        if retries == 10:
503
          raise
504 505 506
        print "Sleeping %.1f seconds and retrying...." % backoff_time
        time.sleep(backoff_time)
        backoff_time *= 1.3
507
        continue
508
      break
509 510

  @staticmethod
511
  def CaptureInfo(cwd):
512 513
    """Returns a dictionary from the svn info output for the given file.

514
    Throws an exception if svn info fails."""
515
    result = {}
516 517 518 519 520
    output = SVN.Capture(['info', '--xml', cwd])
    info = ElementTree.XML(output)
    if info is None:
      return result
    entry = info.find('entry')
521 522
    if entry is None:
      return result
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550

    # Use .text when the item is not optional.
    result['Path'] = entry.attrib['path']
    result['Revision'] = int(entry.attrib['revision'])
    result['Node Kind'] = entry.attrib['kind']
    # Differs across versions.
    if result['Node Kind'] == 'dir':
      result['Node Kind'] = 'directory'
    result['URL'] = entry.find('url').text
    repository = entry.find('repository')
    result['Repository Root'] = repository.find('root').text
    result['UUID'] = repository.find('uuid')
    wc_info = entry.find('wc-info')
    if wc_info is not None:
      result['Schedule'] = wc_info.find('schedule').text
      result['Copied From URL'] = wc_info.find('copy-from-url')
      result['Copied From Rev'] = wc_info.find('copy-from-rev')
    else:
      result['Schedule'] = None
      result['Copied From URL'] = None
      result['Copied From Rev'] = None
    for key in result.keys():
      if isinstance(result[key], unicode):
        # Unicode results interferes with the higher layers matching up things
        # in the deps dictionary.
        result[key] = result[key].encode()
      # Automatic conversion of optional parameters.
      result[key] = getattr(result[key], 'text', result[key])
551 552 553
    return result

  @staticmethod
554
  def CaptureRevision(cwd):
555 556 557 558 559
    """Get the base revision of a SVN repository.

    Returns:
      Int base revision
    """
560
    return SVN.CaptureInfo(cwd).get('Revision')
561

562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
  @staticmethod
  def CaptureStatus(files):
    """Returns the svn 1.5 svn status emulated output.

    @files can be a string (one file) or a list of files.

    Returns an array of (status, file) tuples."""
    command = ["status", "--xml"]
    if not files:
      pass
    elif isinstance(files, basestring):
      command.append(files)
    else:
      command.extend(files)

    status_letter = {
      None: ' ',
      '': ' ',
      'added': 'A',
      'conflicted': 'C',
      'deleted': 'D',
      'external': 'X',
      'ignored': 'I',
      'incomplete': '!',
      'merged': 'G',
      'missing': '!',
      'modified': 'M',
      'none': ' ',
      'normal': ' ',
      'obstructed': '~',
      'replaced': 'R',
      'unversioned': '?',
    }
595
    dom = ElementTree.XML(SVN.Capture(command))
596
    results = []
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638
    if dom is None:
      return results
    # /status/target/entry/(wc-status|commit|author|date)
    for target in dom.findall('target'):
      for entry in target.findall('entry'):
        file_path = entry.attrib['path']
        wc_status = entry.find('wc-status')
        # Emulate svn 1.5 status ouput...
        statuses = [' '] * 7
        # Col 0
        xml_item_status = wc_status.attrib['item']
        if xml_item_status in status_letter:
          statuses[0] = status_letter[xml_item_status]
        else:
          raise gclient_utils.Error(
              'Unknown item status "%s"; please implement me!' %
                  xml_item_status)
        # Col 1
        xml_props_status = wc_status.attrib['props']
        if xml_props_status == 'modified':
          statuses[1] = 'M'
        elif xml_props_status == 'conflicted':
          statuses[1] = 'C'
        elif (not xml_props_status or xml_props_status == 'none' or
              xml_props_status == 'normal'):
          pass
        else:
          raise gclient_utils.Error(
              'Unknown props status "%s"; please implement me!' %
                  xml_props_status)
        # Col 2
        if wc_status.attrib.get('wc-locked') == 'true':
          statuses[2] = 'L'
        # Col 3
        if wc_status.attrib.get('copied') == 'true':
          statuses[3] = '+'
        # Col 4
        if wc_status.attrib.get('switched') == 'true':
          statuses[4] = 'S'
        # TODO(maruel): Col 5 and 6
        item = (''.join(statuses), file_path)
        results.append(item)
639 640 641 642 643
    return results

  @staticmethod
  def IsMoved(filename):
    """Determine if a file has been added through svn mv"""
644 645 646 647 648
    return SVN.IsMovedInfo(SVN.CaptureInfo(filename))

  @staticmethod
  def IsMovedInfo(info):
    """Determine if a file has been added through svn mv"""
649 650 651 652 653
    return (info.get('Copied From URL') and
            info.get('Copied From Rev') and
            info.get('Schedule') == 'add')

  @staticmethod
654
  def GetFileProperty(filename, property_name):
655 656 657
    """Returns the value of an SVN property for the given file.

    Args:
658
      filename: The file to check
659 660 661 662 663 664 665
      property_name: The name of the SVN property, e.g. "svn:mime-type"

    Returns:
      The value of the property, which will be the empty string if the property
      is not set on the file.  If the file is not under version control, the
      empty string is also returned.
    """
666 667
    try:
      return SVN.Capture(['propget', property_name, filename])
668
    except (gclient_utils.Error, subprocess2.CalledProcessError):
669
      return ''
670 671

  @staticmethod
672
  def DiffItem(filename, full_move=False, revision=None):
673 674
    """Diffs a single file.

675
    Should be simple, eh? No it isn't.
676
    Be sure to be in the appropriate directory before calling to have the
677 678 679
    expected relative path.
    full_move means that move or copy operations should completely recreate the
    files, usually in the prospect to apply the patch for a try job."""
680 681 682 683 684 685
    # If the user specified a custom diff command in their svn config file,
    # then it'll be used when we do svn diff, which we don't want to happen
    # since we want the unified diff.  Using --diff-cmd=diff doesn't always
    # work, since they can have another diff executable in their path that
    # gives different line endings.  So we use a bogus temp directory as the
    # config directory, which gets around these problems.
686 687
    bogus_dir = tempfile.mkdtemp()
    try:
688 689
      # Use "svn info" output instead of os.path.isdir because the latter fails
      # when the file is deleted.
690 691
      return SVN._DiffItemInternal(filename, SVN.CaptureInfo(filename),
                                   bogus_dir,
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728
                                   full_move=full_move, revision=revision)
    finally:
      shutil.rmtree(bogus_dir)

  @staticmethod
  def _DiffItemInternal(filename, info, bogus_dir, full_move=False,
                        revision=None):
    """Grabs the diff data."""
    command = ["diff", "--config-dir", bogus_dir, filename]
    if revision:
      command.extend(['--revision', revision])
    data = None
    if SVN.IsMovedInfo(info):
      if full_move:
        if info.get("Node Kind") == "directory":
          # Things become tricky here. It's a directory copy/move. We need to
          # diff all the files inside it.
          # This will put a lot of pressure on the heap. This is why StringIO
          # is used and converted back into a string at the end. The reason to
          # return a string instead of a StringIO is that StringIO.write()
          # doesn't accept a StringIO object. *sigh*.
          for (dirpath, dirnames, filenames) in os.walk(filename):
            # Cleanup all files starting with a '.'.
            for d in dirnames:
              if d.startswith('.'):
                dirnames.remove(d)
            for f in filenames:
              if f.startswith('.'):
                filenames.remove(f)
            for f in filenames:
              if data is None:
                data = cStringIO.StringIO()
              data.write(GenFakeDiff(os.path.join(dirpath, f)))
          if data:
            tmp = data.getvalue()
            data.close()
            data = tmp
729
        else:
730 731 732
          data = GenFakeDiff(filename)
      else:
        if info.get("Node Kind") != "directory":
733
          # svn diff on a mv/cp'd file outputs nothing if there was no change.
734
          data = SVN.Capture(command)
735 736
          if not data:
            # We put in an empty Index entry so upload.py knows about them.
737
            data = "Index: %s\n" % filename.replace(os.sep, '/')
738 739 740 741
        # Otherwise silently ignore directories.
    else:
      if info.get("Node Kind") != "directory":
        # Normal simple case.
742 743 744 745 746 747 748
        try:
          data = SVN.Capture(command)
        except gclient_utils.CheckCallError:
          if revision:
            data = GenFakeDiff(filename)
          else:
            raise
749
      # Otherwise silently ignore directories.
750
    return data
751

752
  @staticmethod
753
  def GenerateDiff(filenames, root=None, full_move=False, revision=None):
754 755 756 757 758 759 760
    """Returns a string containing the diff for the given file list.

    The files in the list should either be absolute paths or relative to the
    given root. If no root directory is provided, the repository root will be
    used.
    The diff will always use relative paths.
    """
761
    assert isinstance(filenames, (list, tuple))
762
    previous_cwd = os.getcwd()
763 764
    root = root or SVN.GetCheckoutRoot(previous_cwd)
    root = os.path.normcase(os.path.join(root, ''))
765 766
    def RelativePath(path, root):
      """We must use relative paths."""
767
      if os.path.normcase(path).startswith(root):
768 769
        return path[len(root):]
      return path
770 771 772 773 774 775 776
    # If the user specified a custom diff command in their svn config file,
    # then it'll be used when we do svn diff, which we don't want to happen
    # since we want the unified diff.  Using --diff-cmd=diff doesn't always
    # work, since they can have another diff executable in their path that
    # gives different line endings.  So we use a bogus temp directory as the
    # config directory, which gets around these problems.
    bogus_dir = tempfile.mkdtemp()
777 778
    try:
      os.chdir(root)
779 780 781 782
      # Cleanup filenames
      filenames = [RelativePath(f, root) for f in filenames]
      # Get information about the modified items (files and directories)
      data = dict([(f, SVN.CaptureInfo(f)) for f in filenames])
783
      diffs = []
784 785 786 787 788 789 790 791 792 793 794
      if full_move:
        # Eliminate modified files inside moved/copied directory.
        for (filename, info) in data.iteritems():
          if SVN.IsMovedInfo(info) and info.get("Node Kind") == "directory":
            # Remove files inside the directory.
            filenames = [f for f in filenames
                         if not f.startswith(filename + os.path.sep)]
        for filename in data.keys():
          if not filename in filenames:
            # Remove filtered out items.
            del data[filename]
795 796 797 798 799 800 801 802 803 804 805
      else:
        metaheaders = []
        for (filename, info) in data.iteritems():
          if SVN.IsMovedInfo(info):
            # for now, the most common case is a head copy,
            # so let's just encode that as a straight up cp.
            srcurl = info.get('Copied From URL')
            root = info.get('Repository Root')
            rev = int(info.get('Copied From Rev'))
            assert srcurl.startswith(root)
            src = srcurl[len(root)+1:]
806 807 808 809 810 811 812 813
            try:
              srcinfo = SVN.CaptureInfo(srcurl)
            except gclient_utils.CheckCallError, e:
              if not 'Not a valid URL' in e.stderr:
                raise
              # Assume the file was deleted. No idea how to figure out at which
              # revision the file was deleted.
              srcinfo = {'Revision': rev}
814 815 816 817 818 819 820 821 822 823 824 825 826
            if (srcinfo.get('Revision') != rev and
                SVN.Capture(['diff', '-r', '%d:head' % rev, srcurl])):
              metaheaders.append("#$ svn cp -r %d %s %s "
                                 "### WARNING: note non-trunk copy\n" %
                                 (rev, src, filename))
            else:
              metaheaders.append("#$ cp %s %s\n" % (src,
                                                    filename))

        if metaheaders:
          diffs.append("### BEGIN SVN COPY METADATA\n")
          diffs.extend(metaheaders)
          diffs.append("### END SVN COPY METADATA\n")
827 828 829 830 831 832 833 834 835 836 837 838 839
      # Now ready to do the actual diff.
      for filename in sorted(data.iterkeys()):
        diffs.append(SVN._DiffItemInternal(filename, data[filename], bogus_dir,
                                           full_move=full_move,
                                           revision=revision))
      # Use StringIO since it can be messy when diffing a directory move with
      # full_move=True.
      buf = cStringIO.StringIO()
      for d in filter(None, diffs):
        buf.write(d)
      result = buf.getvalue()
      buf.close()
      return result
840 841
    finally:
      os.chdir(previous_cwd)
842
      shutil.rmtree(bogus_dir)
843

844 845 846
  @staticmethod
  def GetEmail(repo_root):
    """Retrieves the svn account which we assume is an email address."""
847 848
    try:
      infos = SVN.CaptureInfo(repo_root)
849
    except (gclient_utils.Error, subprocess2.CalledProcessError):
850 851 852
      return None

    # Should check for uuid but it is incorrectly saved for https creds.
853
    root = infos['Repository Root']
854
    realm = root.rsplit('/', 1)[0]
855
    uuid = infos['UUID']
856 857 858 859 860 861 862 863 864
    if root.startswith('https') or not uuid:
      regexp = re.compile(r'<%s:\d+>.*' % realm)
    else:
      regexp = re.compile(r'<%s:\d+> %s' % (realm, uuid))
    if regexp is None:
      return None
    if sys.platform.startswith('win'):
      if not 'APPDATA' in os.environ:
        return None
865 866
      auth_dir = os.path.join(os.environ['APPDATA'], 'Subversion', 'auth',
                              'svn.simple')
867 868 869 870 871 872 873 874 875 876 877 878 879 880
    else:
      if not 'HOME' in os.environ:
        return None
      auth_dir = os.path.join(os.environ['HOME'], '.subversion', 'auth',
                              'svn.simple')
    for credfile in os.listdir(auth_dir):
      cred_info = SVN.ReadSimpleAuth(os.path.join(auth_dir, credfile))
      if regexp.match(cred_info.get('svn:realmstring')):
        return cred_info.get('username')

  @staticmethod
  def ReadSimpleAuth(filename):
    f = open(filename, 'r')
    values = {}
881 882
    def ReadOneItem(item_type):
      m = re.match(r'%s (\d+)' % item_type, f.readline())
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898
      if not m:
        return None
      data = f.read(int(m.group(1)))
      if f.read(1) != '\n':
        return None
      return data

    while True:
      key = ReadOneItem('K')
      if not key:
        break
      value = ReadOneItem('V')
      if not value:
        break
      values[key] = value
    return values
899 900 901 902 903 904 905

  @staticmethod
  def GetCheckoutRoot(directory):
    """Returns the top level directory of the current repository.

    The directory is returned as an absolute path.
    """
906
    directory = os.path.abspath(directory)
907
    try:
908 909 910
      info = SVN.CaptureInfo(directory)
      cur_dir_repo_root = info['Repository Root']
      url = info['URL']
911
    except (gclient_utils.Error, subprocess2.CalledProcessError):
912 913 914
      return None
    while True:
      parent = os.path.dirname(directory)
915
      try:
916 917 918
        info = SVN.CaptureInfo(parent)
        if (info['Repository Root'] != cur_dir_repo_root or
            info['URL'] != os.path.dirname(url)):
919
          break
920
        url = info['URL']
921
      except (gclient_utils.Error, subprocess2.CalledProcessError):
922 923
        break
      directory = parent
924
    return GetCasedPath(directory)
925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943

  @staticmethod
  def AssertVersion(min_version):
    """Asserts svn's version is at least min_version."""
    def only_int(val):
      if val.isdigit():
        return int(val)
      else:
        return 0
    if not SVN.current_version:
      SVN.current_version = SVN.Capture(['--version']).split()[2]
    current_version_list = map(only_int, SVN.current_version.split('.'))
    for min_ver in map(int, min_version.split('.')):
      ver = current_version_list.pop(0)
      if ver < min_ver:
        return (False, SVN.current_version)
      elif ver > min_ver:
        return (True, SVN.current_version)
    return (True, SVN.current_version)
944 945 946 947 948 949 950 951 952 953 954 955

  @staticmethod
  def Revert(repo_root, callback=None, ignore_externals=False):
    """Reverts all svn modifications in repo_root, including properties.

    Deletes any modified files or directory.

    A "svn update --revision BASE" call is required after to revive deleted
    files.
    """
    for file_status in SVN.CaptureStatus(repo_root):
      file_path = os.path.join(repo_root, file_status[1])
956 957 958
      if (ignore_externals and
          file_status[0][0] == 'X' and
          file_status[0][1:].isspace()):
959 960 961 962 963 964 965
        # Ignore externals.
        logging.info('Ignoring external %s' % file_status[1])
        continue

      if callback:
        callback(file_status)

966 967 968 969 970 971 972 973 974 975 976 977 978 979 980 981
      if os.path.exists(file_path):
        # svn revert is really stupid. It fails on inconsistent line-endings,
        # on switched directories, etc. So take no chance and delete everything!
        # In theory, it wouldn't be necessary for property-only change but then
        # it'd have to look for switched directories, etc so it's not worth
        # optimizing this use case.
        if os.path.isfile(file_path) or os.path.islink(file_path):
          logging.info('os.remove(%s)' % file_path)
          os.remove(file_path)
        elif os.path.isdir(file_path):
          logging.info('gclient_utils.RemoveDirectory(%s)' % file_path)
          gclient_utils.RemoveDirectory(file_path)
        else:
          logging.critical(
            ('No idea what is %s.\nYou just found a bug in gclient'
              ', please ping maruel@chromium.org ASAP!') % file_path)
982

983 984
      if (file_status[0][0] in ('D', 'A', '!') or
          not file_status[0][1:].isspace()):
985
        # Added, deleted file requires manual intervention and require calling
986
        # revert, like for properties.
987 988 989 990 991 992
        try:
          SVN.Capture(['revert', file_status[1]], cwd=repo_root)
        except gclient_utils.CheckCallError:
          if not os.path.exists(file_path):
            continue
          raise