Commit ae97943b authored by Raphael Kubo da Costa's avatar Raphael Kubo da Costa Committed by Commit Bot

watchlists: Precompile filepath regular expressions before using them.

Instead of calling re.search() for every entry in WATCHLIST_DEFINITIONS for
every file being processed, create the regular expressions object beforehand
when parsing the watchlist file.

Processing a Chromium commit with 17k files went down from about 25 minutes
to 10 seconds with this change.

Bug: 780055
Change-Id: I6493971b67a7466ce8e1e3b28537018a724bbf47
Reviewed-on: https://chromium-review.googlesource.com/751463Reviewed-by: 's avatarAaron Gable <agable@chromium.org>
Commit-Queue: Raphael Kubo da Costa (rakuco) <raphael.kubo.da.costa@intel.com>
parent 70dea427
...@@ -38,6 +38,7 @@ class Watchlists(object): ...@@ -38,6 +38,7 @@ class Watchlists(object):
_RULES_FILENAME = _RULES _RULES_FILENAME = _RULES
_repo_root = None _repo_root = None
_defns = {} # Definitions _defns = {} # Definitions
_path_regexps = {} # Name -> Regular expression mapping
_watchlists = {} # name to email mapping _watchlists = {} # name to email mapping
def __init__(self, repo_root): def __init__(self, repo_root):
...@@ -88,6 +89,15 @@ class Watchlists(object): ...@@ -88,6 +89,15 @@ class Watchlists(object):
self._defns = defns self._defns = defns
self._watchlists = watchlists self._watchlists = watchlists
# Compile the regular expressions ahead of time to avoid creating them
# on-the-fly multiple times per file.
self._path_regexps = {}
for name, rule in defns.iteritems():
filepath = rule.get('filepath')
if not filepath:
continue
self._path_regexps[name] = re.compile(filepath)
# Verify that all watchlist names are defined # Verify that all watchlist names are defined
for name in watchlists: for name in watchlists:
if name not in defns: if name not in defns:
...@@ -105,13 +115,10 @@ class Watchlists(object): ...@@ -105,13 +115,10 @@ class Watchlists(object):
watchers = set() # A set, to avoid duplicates watchers = set() # A set, to avoid duplicates
for path in paths: for path in paths:
path = path.replace(os.sep, '/') path = path.replace(os.sep, '/')
for name, rule in self._defns.iteritems(): for name, rule in self._path_regexps.iteritems():
if name not in self._watchlists: if name not in self._watchlists:
continue continue
rex_str = rule.get('filepath') if rule.search(path):
if not rex_str:
continue
if re.search(rex_str, path):
map(watchers.add, self._watchlists[name]) map(watchers.add, self._watchlists[name])
return list(watchers) return list(watchers)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment