Commit a148b5ee authored by Matt Giuca's avatar Matt Giuca Committed by Commit Bot

git hyper-blame: Fix tabulation of Unicode characters in author name.

Previously, it counted the number of UTF-8 bytes when spacing out the
table, not the number of code points.

Bug: 808905
Change-Id: Ice5504089e0f7097e108c6dfbbb810620b9dfc94
Reviewed-on: https://chromium-review.googlesource.com/901142
Commit-Queue: Matt Giuca <mgiuca@chromium.org>
Reviewed-by: 's avatarRobbie Iannucci <iannucci@chromium.org>
parent 63560ada
...@@ -94,6 +94,11 @@ def parse_blame(blameoutput): ...@@ -94,6 +94,11 @@ def parse_blame(blameoutput):
yield BlameLine(commit, context, lineno_then, lineno_now, False) yield BlameLine(commit, context, lineno_then, lineno_now, False)
def num_codepoints(s):
"""Gets the length of a UTF-8 byte string, in Unicode codepoints."""
return len(s.decode('utf-8', errors='replace'))
def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout): def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout):
"""Print a 2D rectangular array, aligning columns with spaces. """Print a 2D rectangular array, aligning columns with spaces.
...@@ -107,9 +112,10 @@ def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout): ...@@ -107,9 +112,10 @@ def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout):
colwidths = None colwidths = None
for row in table: for row in table:
if colwidths is None: if colwidths is None:
colwidths = [len(x) for x in row] colwidths = [num_codepoints(x) for x in row]
else: else:
colwidths = [max(colwidths[i], len(x)) for i, x in enumerate(row)] colwidths = [max(colwidths[i], num_codepoints(x))
for i, x in enumerate(row)]
if align is None: # pragma: no cover if align is None: # pragma: no cover
align = 'l' * len(colwidths) align = 'l' * len(colwidths)
...@@ -117,7 +123,7 @@ def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout): ...@@ -117,7 +123,7 @@ def print_table(table, colsep=' ', rowsep='\n', align=None, out=sys.stdout):
for row in table: for row in table:
cells = [] cells = []
for i, cell in enumerate(row): for i, cell in enumerate(row):
padding = ' ' * (colwidths[i] - len(cell)) padding = ' ' * (colwidths[i] - num_codepoints(cell))
if align[i] == 'r': if align[i] == 'r':
cell = padding + cell cell = padding + cell
elif i < len(row) - 1: elif i < len(row) - 1:
......
...@@ -20,6 +20,8 @@ from testing_support import git_test_utils ...@@ -20,6 +20,8 @@ from testing_support import git_test_utils
import git_common import git_common
GitRepo = git_test_utils.GitRepo
class GitHyperBlameTestBase(git_test_utils.GitRepoReadOnlyTestBase): class GitHyperBlameTestBase(git_test_utils.GitRepoReadOnlyTestBase):
@classmethod @classmethod
...@@ -36,16 +38,21 @@ class GitHyperBlameTestBase(git_test_utils.GitRepoReadOnlyTestBase): ...@@ -36,16 +38,21 @@ class GitHyperBlameTestBase(git_test_utils.GitRepoReadOnlyTestBase):
revision=revision, out=stdout, err=stderr) revision=revision, out=stdout, err=stderr)
return retval, stdout.getvalue().rstrip().split('\n') return retval, stdout.getvalue().rstrip().split('\n')
def blame_line(self, commit_name, rest, filename=None): def blame_line(self, commit_name, rest, author=None, filename=None):
"""Generate a blame line from a commit. """Generate a blame line from a commit.
Args: Args:
commit_name: The commit's schema name. commit_name: The commit's schema name.
rest: The blame line after the timestamp. e.g., '2) file2 - merged'. rest: The blame line after the timestamp. e.g., '2) file2 - merged'.
author: The author's name. If omitted, reads the name out of the commit.
filename: The filename. If omitted, not shown in the blame line.
""" """
short = self.repo[commit_name][:8] short = self.repo[commit_name][:8]
start = '%s %s' % (short, filename) if filename else short start = '%s %s' % (short, filename) if filename else short
author = self.repo.show_commit(commit_name, format_string='%an %ai') if author is None:
author = self.repo.show_commit(commit_name, format_string='%an %ai')
else:
author += self.repo.show_commit(commit_name, format_string=' %ai')
return '%s (%s %s' % (start, author, rest) return '%s (%s %s' % (start, author, rest)
class GitHyperBlameMainTest(GitHyperBlameTestBase): class GitHyperBlameMainTest(GitHyperBlameTestBase):
...@@ -568,6 +575,65 @@ class GitHyperBlameLineNumberTest(GitHyperBlameTestBase): ...@@ -568,6 +575,65 @@ class GitHyperBlameLineNumberTest(GitHyperBlameTestBase):
self.assertEqual(expected_output, output) self.assertEqual(expected_output, output)
class GitHyperBlameUnicodeTest(GitHyperBlameTestBase):
REPO_SCHEMA = """
A B C
"""
COMMIT_A = {
GitRepo.AUTHOR_NAME: 'ASCII Author',
'file': {'data': 'red\nblue\n'},
}
# Add a line.
COMMIT_B = {
GitRepo.AUTHOR_NAME: u'\u4e2d\u56fd\u4f5c\u8005'.encode('utf-8'),
'file': {'data': 'red\ngreen\nblue\n'},
}
# Modify a line with non-UTF-8 author and file text.
COMMIT_C = {
GitRepo.AUTHOR_NAME: u'Lat\u00edn-1 Author'.encode('latin-1'),
'file': {'data': u'red\ngre\u00e9n\nblue\n'.encode('latin-1')},
}
def testNonASCIIAuthorName(self):
"""Ensures correct tabulation.
Tests the case where there are non-ASCII (UTF-8) characters in the author
name.
Regression test for https://crbug.com/808905.
"""
expected_output = [
self.blame_line('A', '1) red', author='ASCII Author'),
# Expect 8 spaces, to line up with the other name.
self.blame_line('B', '2) green',
author=u'\u4e2d\u56fd\u4f5c\u8005 '.encode('utf-8')),
self.blame_line('A', '3) blue', author='ASCII Author'),
]
retval, output = self.run_hyperblame([], 'file', 'tag_B')
self.assertEqual(0, retval)
self.assertEqual(expected_output, output)
def testNonUTF8Data(self):
"""Ensures correct behaviour even if author or file data is not UTF-8.
There is no guarantee that a file will be UTF-8-encoded, so this is
realistic.
"""
expected_output = [
self.blame_line('A', '1) red', author='ASCII Author '),
# The Author has been re-encoded as UTF-8. The file data is preserved as
# raw byte data.
self.blame_line('C', '2) gre\xe9n', author='Lat\xc3\xadn-1 Author'),
self.blame_line('A', '3) blue', author='ASCII Author '),
]
retval, output = self.run_hyperblame([], 'file', 'tag_C')
self.assertEqual(0, retval)
self.assertEqual(expected_output, output)
if __name__ == '__main__': if __name__ == '__main__':
sys.exit(coverage_utils.covered_main( sys.exit(coverage_utils.covered_main(
os.path.join(DEPOT_TOOLS_ROOT, 'git_hyper_blame.py'))) os.path.join(DEPOT_TOOLS_ROOT, 'git_hyper_blame.py')))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment