Commit 9c062012 authored by Bruce Dawson's avatar Bruce Dawson Committed by Commit Bot

Fix multiple confusingly escaped regex strings

Python (prior to 3.8) treats meaningless string escape sequences as if
they were a slash followed by the character. That is, '\w' == '\\w'.
Python 3.8 rejects this, and it's confusing. This change fixes seven of these
regex strings found in depot_tools (through a regex search, natch). Most of
the fixes don't actually change the value of the strings, and this was
manually verified:

>>> '(/c(/.*/\+)?)?/(\d+)(/(\d+)?/?)?$' == r'(/c(/.*/\+)?)?/(\d+)(/(\d+)?/?)?$'
True
>>> '#\s*OWNERS_STATUS\s+=\s+(.+)$' == r'#\s*OWNERS_STATUS\s+=\s+(.+)$'
True
>>> 'COM\d' == r'COM\d'
True
>>> '^\s+Change-Id:\s*(\S+)$' == r'^\s+Change-Id:\s*(\S+)$'
True
>>> 'ETag:\s+([a-z0-9]{32})' == r'ETag:\s+([a-z0-9]{32})'
True

Two exceptions were the regex expressions in filter_demo_output.py and scm.py.
These were turned into raw strings despite this changing the value of the
string passed to re. This works because re supports the \x, \d, \w, \t, and
other escape sequences needed to make this work.

TL;DR - use raw strings for regex to avoid melting your brain. If bulk changing
regex strings to raw watch out for double-slashes.

Bug: 958138
Change-Id: Ic45264cfc63e8bae9cfcffe2cd88a57c2d3dcdae
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/1590534
Commit-Queue: Bruce Dawson <brucedawson@chromium.org>
Reviewed-by: 's avatarDirk Pranke <dpranke@chromium.org>
parent 0647cd50
......@@ -2467,7 +2467,7 @@ class _GerritChangelistImpl(_ChangelistCodereviewBase):
part = parsed_url.fragment
else:
part = parsed_url.path
match = re.match('(/c(/.*/\+)?)?/(\d+)(/(\d+)?/?)?$', part)
match = re.match(r'(/c(/.*/\+)?)?/(\d+)(/(\d+)?/?)?$', part)
if match:
return _ParsedIssueNumberArgument(
issue=int(match.group(3)),
......
......@@ -66,7 +66,7 @@ def main():
backend = sys.argv[1]
output = sys.stdin.read().rstrip()
callout_re = re.compile('\x1b\[(\d+)c\n')
callout_re = re.compile(r'\x1b\[(\d+)c\n')
callouts = collections.defaultdict(int)
for i, line in enumerate(output.splitlines(True)):
m = callout_re.match(line)
......
......@@ -308,7 +308,7 @@ class Database(object):
line = line.strip()
if line.startswith('#'):
if is_toplevel:
m = re.match('#\s*OWNERS_STATUS\s+=\s+(.+)$', line)
m = re.match(r'#\s*OWNERS_STATUS\s+=\s+(.+)$', line)
if m:
self._status_file = m.group(1).strip()
continue
......
......@@ -60,7 +60,7 @@ class FilePatchBase(object):
if filename == 'CON':
raise UnsupportedPatchFormat(
filename, 'Filename can\'t be \'CON\'.')
if re.match('COM\d', filename):
if re.match(r'COM\d', filename):
raise UnsupportedPatchFormat(
filename, 'Filename can\'t be \'%s\'.' % filename)
return filename
......
......@@ -147,7 +147,7 @@ class GIT(object):
# 3-way merges can cause the status can be 'MMM' instead of 'M'. This
# can happen when the user has 2 local branches and he diffs between
# these 2 branches instead diffing to upstream.
m = re.match('^(\w)+\t(.+)$', statusline)
m = re.match(r'^(\w)+\t(.+)$', statusline)
if not m:
raise gclient_utils.Error(
'status currently unsupported: %s' % statusline)
......
......@@ -77,7 +77,7 @@ class GerritTestCase(unittest.TestCase):
"""
COMMIT_RE = re.compile(r'^commit ([0-9a-fA-F]{40})$')
CHANGEID_RE = re.compile('^\s+Change-Id:\s*(\S+)$')
CHANGEID_RE = re.compile(r'^\s+Change-Id:\s*(\S+)$')
DEVNULL = open(os.devnull, 'w')
TEST_USERNAME = 'test-username'
TEST_EMAIL = 'test-username@test.org'
......
......@@ -77,7 +77,7 @@ def _upload_worker(
if gsutil.check_call('ls', file_url)[0] == 0 and not force:
# File exists, check MD5 hash.
_, out, _ = gsutil.check_call_with_retries('ls', '-L', file_url)
etag_match = re.search('ETag:\s+([a-z0-9]{32})', out)
etag_match = re.search(r'ETag:\s+([a-z0-9]{32})', out)
if etag_match:
remote_md5 = etag_match.group(1)
# Calculate the MD5 checksum to match it to Google Storage's ETag.
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment