Commit 8fab6b66 authored by maruel@chromium.org's avatar maruel@chromium.org

Fix unicode upgrade of patch.

When a diff contains utf8 character, the filename would be stored as a unicode
object instead of a str. This is even if the buffer is originally sent as a
utf-8 encoded str. Then once the diff is reconstructed to be sent to 'patch', a
unicode object instead of a str would be sent, confusing patch.

R=cmp@chromium.org
BUG=
TEST=


Review URL: http://codereview.chromium.org/9387024

git-svn-id: svn://svn.chromium.org/chrome/trunk/tools/depot_tools@122361 0039d316-1c4b-4281-b951-d872f2087c98
parent 98e69458
......@@ -37,6 +37,15 @@ class FilePatchBase(object):
# Set when the file is copied or moved.
self.source_filename = None
@property
def filename_utf8(self):
return self.filename.encode('utf-8')
@property
def source_filename_utf8(self):
if self.source_filename is not None:
return self.source_filename.encode('utf-8')
@staticmethod
def _process_filename(filename):
filename = filename.replace('\\', '/')
......@@ -88,8 +97,8 @@ class FilePatchBase(object):
out += ' '
out += ' '
if self.source_filename:
out += '%s->' % self.source_filename
return out + str(self.filename)
out += '%s->' % self.source_filename_utf8
return out + self.filename_utf8
class FilePatchDelete(FilePatchBase):
......@@ -153,25 +162,27 @@ class FilePatchDiff(FilePatchBase):
# patch is stupid. It patches the source_filename instead so get rid of
# any source_filename reference if needed.
return (
self.diff_header.replace(self.source_filename, self.filename) +
self.diff_header.replace(
self.source_filename_utf8, self.filename_utf8) +
self.diff_hunks)
def set_relpath(self, relpath):
old_filename = self.filename
old_source_filename = self.source_filename or self.filename
old_filename = self.filename_utf8
old_source_filename = self.source_filename_utf8 or self.filename_utf8
super(FilePatchDiff, self).set_relpath(relpath)
# Update the header too.
source_filename = self.source_filename or self.filename
filename = self.filename_utf8
source_filename = self.source_filename_utf8 or self.filename_utf8
lines = self.diff_header.splitlines(True)
for i, line in enumerate(lines):
if line.startswith('diff --git'):
lines[i] = line.replace(
'a/' + old_source_filename, source_filename).replace(
'b/' + old_filename, self.filename)
'b/' + old_filename, filename)
elif re.match(r'^\w+ from .+$', line) or line.startswith('---'):
lines[i] = line.replace(old_source_filename, source_filename)
elif re.match(r'^\w+ to .+$', line) or line.startswith('+++'):
lines[i] = line.replace(old_filename, self.filename)
lines[i] = line.replace(old_filename, filename)
self.diff_header = ''.join(lines)
def _split_header(self, diff):
......@@ -197,7 +208,7 @@ class FilePatchDiff(FilePatchBase):
# Mangle any \\ in the header to /.
header_lines = ('Index:', 'diff', 'copy', 'rename', '+++', '---')
basename = os.path.basename(self.filename)
basename = os.path.basename(self.filename_utf8)
for i in xrange(len(header)):
if (header[i].split(' ', 1)[0] in header_lines or
header[i].endswith(basename)):
......@@ -314,7 +325,7 @@ class FilePatchDiff(FilePatchBase):
new = self.mangle(match.group(2))
# The rename is about the new file so the old file can be anything.
if new not in (self.filename, 'dev/null'):
if new not in (self.filename_utf8, 'dev/null'):
self._fail('Unexpected git diff output name %s.' % new)
if old == 'dev/null' and new == 'dev/null':
self._fail('Unexpected /dev/null git diff.')
......@@ -323,9 +334,9 @@ class FilePatchDiff(FilePatchBase):
if not old or not new:
self._fail('Unexpected git diff; couldn\'t find git header.')
if old not in (self.filename, 'dev/null'):
if old not in (self.filename_utf8, 'dev/null'):
# Copy or rename.
self.source_filename = old
self.source_filename = old.decode('utf-8')
self.is_new = True
last_line = ''
......@@ -337,7 +348,7 @@ class FilePatchDiff(FilePatchBase):
# Cheap check to make sure the file name is at least mentioned in the
# 'diff' header. That the only remaining invariant.
if not self.filename in self.diff_header:
if not self.filename_utf8 in self.diff_header:
self._fail('Diff seems corrupted.')
def _verify_git_header_process_line(self, lines, line, last_line):
......@@ -349,7 +360,7 @@ class FilePatchDiff(FilePatchBase):
http://www.kernel.org/pub/software/scm/git/docs/git-diff.html
"""
match = re.match(r'^(rename|copy) from (.+)$', line)
old = self.source_filename or self.filename
old = self.source_filename_utf8 or self.filename_utf8
if match:
if old != match.group(2):
self._fail('Unexpected git diff input name for line %s.' % line)
......@@ -361,7 +372,7 @@ class FilePatchDiff(FilePatchBase):
match = re.match(r'^(rename|copy) to (.+)$', line)
if match:
if self.filename != match.group(2):
if self.filename_utf8 != match.group(2):
self._fail('Unexpected git diff output name for line %s.' % line)
if not last_line.startswith('%s from ' % match.group(1)):
self._fail(
......@@ -404,7 +415,7 @@ class FilePatchDiff(FilePatchBase):
self._fail('Unexpected git diff: --- not following +++.')
if '/dev/null' == match.group(1):
self.is_delete = True
elif self.filename != self.mangle(match.group(1)):
elif self.filename_utf8 != self.mangle(match.group(1)):
self._fail(
'Unexpected git diff: %s != %s.' % (self.filename, match.group(1)))
if lines:
......@@ -429,7 +440,7 @@ class FilePatchDiff(FilePatchBase):
# Cheap check to make sure the file name is at least mentioned in the
# 'diff' header. That the only remaining invariant.
if not self.filename in self.diff_header:
if not self.filename_utf8 in self.diff_header:
self._fail('Diff seems corrupted.')
def _verify_svn_header_process_line(self, lines, line, last_line):
......@@ -443,9 +454,9 @@ class FilePatchDiff(FilePatchBase):
self._fail('--- and +++ are reversed')
if match.group(1) == '/dev/null':
self.is_new = True
elif self.mangle(match.group(1)) != self.filename:
elif self.mangle(match.group(1)) != self.filename_utf8:
# guess the source filename.
self.source_filename = match.group(1)
self.source_filename = match.group(1).decode('utf-8')
self.is_new = True
if not lines or not lines[0].startswith('+++'):
self._fail('Nothing after header.')
......@@ -457,7 +468,7 @@ class FilePatchDiff(FilePatchBase):
self._fail('Unexpected diff: --- not following +++.')
if match.group(1) == '/dev/null':
self.is_delete = True
elif self.mangle(match.group(1)) != self.filename:
elif self.mangle(match.group(1)) != self.filename_utf8:
self._fail('Unexpected diff: %s.' % match.group(1))
if lines:
self._fail('Crap after +++')
......@@ -479,10 +490,10 @@ class PatchSet(object):
Deletes are last.
"""
if p.source_filename:
return (p.is_delete, p.source_filename, p.filename)
return (p.is_delete, p.source_filename_utf8, p.filename_utf8)
else:
# tuple are always greater than string, abuse that fact.
return (p.is_delete, (p.filename,), p.filename)
return (p.is_delete, (p.filename_utf8,), p.filename_utf8)
self.patches = sorted(patches, key=key)
......
# coding: utf-8
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
......@@ -122,6 +123,18 @@ class RAW(object):
'-{"traces": {"1t_proc": ["2.0", "0.0"], "1t_proc_ref": ["1.0", ...\n'
'+{"traces": {"1t_proc": ["2.0", "0.0"], "1t_proc_ref": ["1.0", ...\n')
RENAME_UTF8 = (
u'--- file_à\n'
u'+++ filé_b\n'
u'@@ -3,6 +3,7 @@ bb\n'
u' ccc\n'
u' ddé\n'
u' e\n'
u'+FÔÒ!\n'
u' ff\n'
u' ggg\n'
u' hh\n').encode('utf-8')
class GIT(object):
"""Sample patches generated by git diff."""
......
#!/usr/bin/env python
# coding: utf-8
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
......@@ -275,6 +276,19 @@ class PatchTest(unittest.TestCase):
p, 'file_b', RAW.MINIMAL_RENAME, source_filename='file_a', is_new=True,
nb_hunks=0)
def testUnicodeFilenameGet(self):
p = patch.FilePatchDiff(u'filé_b', RAW.RENAME_UTF8, [])
self._check_patch(
p, u'filé_b', RAW.RENAME_UTF8, source_filename=u'file_à', is_new=True,
nb_hunks=1)
self.assertTrue(isinstance(p.get(False), str))
p.set_relpath('foo')
self.assertTrue(isinstance(p.get(False), str))
self.assertEquals(u'foo/file_à'.encode('utf-8'), p.source_filename_utf8)
self.assertEquals(u'foo/file_à', p.source_filename)
self.assertEquals(u'foo/filé_b'.encode('utf-8'), p.filename_utf8)
self.assertEquals(u'foo/filé_b', p.filename)
def testGitCopyPartial(self):
p = patch.FilePatchDiff('wtf2', GIT.COPY_PARTIAL, [])
self._check_patch(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment