# coding=utf8 # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Utility functions to handle patches.""" import posixpath import os import re class UnsupportedPatchFormat(Exception): def __init__(self, filename, status): super(UnsupportedPatchFormat, self).__init__(filename, status) self.filename = filename self.status = status def __str__(self): out = 'Can\'t process patch for file %s.' % self.filename if self.status: out += '\n%s' % self.status return out class FilePatchBase(object): """Defines a single file being modified. '/' is always used instead of os.sep for consistency. """ is_delete = False is_binary = False def __init__(self, filename): self.filename = None self._set_filename(filename) def _set_filename(self, filename): self.filename = filename.replace('\\', '/') # Blacklist a few characters for simplicity. for i in ('%', '$', '..', '\'', '"'): if i in self.filename: self._fail('Can\'t use \'%s\' in filename.' % i) for i in ('/', 'CON', 'COM'): if self.filename.startswith(i): self._fail('Filename can\'t start with \'%s\'.' % i) def get(self): raise NotImplementedError('Nothing to grab') def set_relpath(self, relpath): if not relpath: return relpath = relpath.replace('\\', '/') if relpath[0] == '/': self._fail('Relative path starts with %s' % relpath[0]) self._set_filename(posixpath.join(relpath, self.filename)) def _fail(self, msg): raise UnsupportedPatchFormat(self.filename, msg) class FilePatchDelete(FilePatchBase): """Deletes a file.""" is_delete = True def __init__(self, filename, is_binary): super(FilePatchDelete, self).__init__(filename) self.is_binary = is_binary def get(self): raise NotImplementedError('Nothing to grab') class FilePatchBinary(FilePatchBase): """Content of a new binary file.""" is_binary = True def __init__(self, filename, data, svn_properties): super(FilePatchBinary, self).__init__(filename) self.data = data self.svn_properties = svn_properties or [] def get(self): return self.data class FilePatchDiff(FilePatchBase): """Patch for a single file.""" def __init__(self, filename, diff, svn_properties): super(FilePatchDiff, self).__init__(filename) if not diff: self._fail('File doesn\'t have a diff.') self.diff_header, self.diff_hunks = self._split_header(diff) self.svn_properties = svn_properties or [] self.is_git_diff = self._is_git_diff_header(self.diff_header) self.patchlevel = 0 if self.is_git_diff: self._verify_git_header() assert not svn_properties else: self._verify_svn_header() def get(self): return self.diff_header + self.diff_hunks def set_relpath(self, relpath): old_filename = self.filename super(FilePatchDiff, self).set_relpath(relpath) # Update the header too. self.diff_header = self.diff_header.replace(old_filename, self.filename) def _split_header(self, diff): """Splits a diff in two: the header and the hunks.""" header = [] hunks = diff.splitlines(True) while hunks: header.append(hunks.pop(0)) if header[-1].startswith('--- '): break else: # Some diff may not have a ---/+++ set like a git rename with no change or # a svn diff with only property change. pass if hunks: if not hunks[0].startswith('+++ '): self._fail('Inconsistent header') header.append(hunks.pop(0)) if hunks: if not hunks[0].startswith('@@ '): self._fail('Inconsistent hunk header') # Mangle any \\ in the header to /. header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---') basename = os.path.basename(self.filename) for i in xrange(len(header)): if (header[i].split(' ', 1)[0] in header_lines or header[i].endswith(basename)): header[i] = header[i].replace('\\', '/') return ''.join(header), ''.join(hunks) @staticmethod def _is_git_diff_header(diff_header): """Returns True if the diff for a single files was generated with git.""" # Delete: http://codereview.chromium.org/download/issue6368055_22_29.diff # Rename partial change: # http://codereview.chromium.org/download/issue6250123_3013_6010.diff # Rename no change: # http://codereview.chromium.org/download/issue6287022_3001_4010.diff return any(l.startswith('diff --git') for l in diff_header.splitlines()) def mangle(self, string): """Mangle a file path.""" return '/'.join(string.replace('\\', '/').split('/')[self.patchlevel:]) def _verify_git_header(self): """Sanity checks the header. Expects the following format: <garbagge> diff --git (|a/)<filename> (|b/)<filename> <similarity> <filemode changes> <index> <copy|rename from> <copy|rename to> --- <filename> +++ <filename> Everything is optional except the diff --git line. """ lines = self.diff_header.splitlines() # Verify the diff --git line. old = None new = None while lines: match = re.match(r'^diff \-\-git (.*?) (.*)$', lines.pop(0)) if not match: continue old = match.group(1).replace('\\', '/') new = match.group(2).replace('\\', '/') if old.startswith('a/') and new.startswith('b/'): self.patchlevel = 1 old = old[2:] new = new[2:] # The rename is about the new file so the old file can be anything. if new not in (self.filename, 'dev/null'): self._fail('Unexpected git diff output name %s.' % new) if old == 'dev/null' and new == 'dev/null': self._fail('Unexpected /dev/null git diff.') break if not old or not new: self._fail('Unexpected git diff; couldn\'t find git header.') # Handle these: # rename from <> # rename to <> # copy from <> # copy to <> while lines: if lines[0].startswith('--- '): break match = re.match(r'^(rename|copy) from (.+)$', lines.pop(0)) if not match: continue if old != match.group(2): self._fail('Unexpected git diff input name for %s.' % match.group(1)) if not lines: self._fail('Missing git diff output name for %s.' % match.group(1)) match = re.match(r'^(rename|copy) to (.+)$', lines.pop(0)) if not match: self._fail('Missing git diff output name for %s.' % match.group(1)) if new != match.group(2): self._fail('Unexpected git diff output name for %s.' % match.group(1)) # Handle ---/+++ while lines: match = re.match(r'^--- (.*)$', lines.pop(0)) if not match: continue if old != self.mangle(match.group(1)) and match.group(1) != '/dev/null': self._fail('Unexpected git diff: %s != %s.' % (old, match.group(1))) if not lines: self._fail('Missing git diff output name.') match = re.match(r'^\+\+\+ (.*)$', lines.pop(0)) if not match: self._fail('Unexpected git diff: --- not following +++.') if new != self.mangle(match.group(1)) and '/dev/null' != match.group(1): self._fail('Unexpected git diff: %s != %s.' % (new, match.group(1))) assert not lines, '_split_header() is broken' break def _verify_svn_header(self): """Sanity checks the header. A svn diff can contain only property changes, in that case there will be no proper header. To make things worse, this property change header is localized. """ lines = self.diff_header.splitlines() while lines: match = re.match(r'^--- ([^\t]+).*$', lines.pop(0)) if not match: continue # For copy and renames, it's possible that the -- line doesn't match +++, # so don't check match.group(1) to match self.filename or '/dev/null', it # can be anything else. # TODO(maruel): Handle rename/copy explicitly. # if match.group(1) not in (self.filename, '/dev/null'): # self.source_file = match.group(1) if not lines: self._fail('Nothing after header.') match = re.match(r'^\+\+\+ ([^\t]+).*$', lines.pop(0)) if not match: self._fail('Unexpected diff: --- not following +++.') if match.group(1) not in (self.filename, '/dev/null'): self._fail('Unexpected diff: %s.' % match.group(1)) assert not lines, '_split_header() is broken' break else: # Cheap check to make sure the file name is at least mentioned in the # 'diff' header. That the only remaining invariant. if not self.filename in self.diff_header: self._fail('Diff seems corrupted.') class PatchSet(object): """A list of FilePatch* objects.""" def __init__(self, patches): self.patches = patches for p in self.patches: assert isinstance(p, FilePatchBase) def set_relpath(self, relpath): """Used to offset the patch into a subdirectory.""" for patch in self.patches: patch.set_relpath(relpath) def __iter__(self): for patch in self.patches: yield patch @property def filenames(self): return [p.filename for p in self.patches]