#!/usr/bin/env python # Copyright (c) 2011 The Chromium Authors. All rights reserved. # Use of this source code is governed by a BSD-style license that can be # found in the LICENSE file. """Generate fake repositories for testing.""" import atexit import datetime import errno import logging import os import pprint import re import socket import sys import tempfile import time # trial_dir must be first for non-system libraries. from testing_support import trial_dir import gclient_utils import scm import subprocess2 def write(path, content): f = open(path, 'wb') f.write(content) f.close() join = os.path.join def read_tree(tree_root): """Returns a dict of all the files in a tree. Defaults to self.root_dir.""" tree = {} for root, dirs, files in os.walk(tree_root): for d in filter(lambda x: x.startswith('.'), dirs): dirs.remove(d) for f in [join(root, f) for f in files if not f.startswith('.')]: filepath = f[len(tree_root) + 1:].replace(os.sep, '/') assert len(filepath), f tree[filepath] = open(join(root, f), 'rU').read() return tree def dict_diff(dict1, dict2): diff = {} for k, v in dict1.iteritems(): if k not in dict2: diff[k] = v elif v != dict2[k]: diff[k] = (v, dict2[k]) for k, v in dict2.iteritems(): if k not in dict1: diff[k] = v return diff def commit_git(repo): """Commits the changes and returns the new hash.""" subprocess2.check_call(['git', 'add', '-A', '-f'], cwd=repo) subprocess2.check_call(['git', 'commit', '-q', '--message', 'foo'], cwd=repo) rev = subprocess2.check_output( ['git', 'show-ref', '--head', 'HEAD'], cwd=repo).split(' ', 1)[0] logging.debug('At revision %s' % rev) return rev def test_port(host, port): s = socket.socket() try: return s.connect_ex((host, port)) == 0 finally: s.close() def find_free_port(host, base_port): """Finds a listening port free to listen to.""" while base_port < (2<<16): if not test_port(host, base_port): return base_port base_port += 1 assert False, 'Having issues finding an available port' def wait_for_port_to_bind(host, port, process): sock = socket.socket() if sys.platform == 'darwin': # On Mac SnowLeopard, if we attempt to connect to the socket # immediately, it fails with EINVAL and never gets a chance to # connect (putting us into a hard spin and then failing). # Linux doesn't need this. time.sleep(0.2) try: start = datetime.datetime.utcnow() maxdelay = datetime.timedelta(seconds=30) while (datetime.datetime.utcnow() - start) < maxdelay: try: sock.connect((host, port)) logging.debug('%d is now bound' % port) return except (socket.error, EnvironmentError): pass logging.debug('%d is still not bound' % port) finally: sock.close() # The process failed to bind. Kill it and dump its ouput. process.kill() logging.error('%s' % process.communicate()[0]) assert False, '%d is still not bound' % port def wait_for_port_to_free(host, port): start = datetime.datetime.utcnow() maxdelay = datetime.timedelta(seconds=30) while (datetime.datetime.utcnow() - start) < maxdelay: try: sock = socket.socket() sock.connect((host, port)) logging.debug('%d was bound, waiting to free' % port) except (socket.error, EnvironmentError): logging.debug('%d now free' % port) return finally: sock.close() assert False, '%d is still bound' % port class FakeReposBase(object): """Generate git repositories to test gclient functionality. Many DEPS functionalities need to be tested: Var, File, From, deps_os, hooks, use_relative_paths. And types of dependencies: Relative urls, Full urls, git. populateGit() needs to be implemented by the subclass. """ # Hostname NB_GIT_REPOS = 1 USERS = [ ('user1@example.com', 'foo'), ('user2@example.com', 'bar'), ] def __init__(self, host=None): self.trial = trial_dir.TrialDir('repos') self.host = host or '127.0.0.1' # Format is { repo: [ None, (hash, tree), (hash, tree), ... ], ... } # so reference looks like self.git_hashes[repo][rev][0] for hash and # self.git_hashes[repo][rev][1] for it's tree snapshot. # It is 1-based too. self.git_hashes = {} self.gitdaemon = None self.git_pid_file = None self.git_root = None self.git_dirty = False self.git_port = None self.git_base = None @property def root_dir(self): return self.trial.root_dir def set_up(self): """All late initialization comes here.""" self.cleanup_dirt() if not self.root_dir: try: # self.root_dir is not set before this call. self.trial.set_up() self.git_root = join(self.root_dir, 'git') finally: # Registers cleanup. atexit.register(self.tear_down) def cleanup_dirt(self): """For each dirty repository, destroy it.""" if self.git_dirty: if not self.tear_down_git(): logging.error('Using both leaking checkout and git dirty checkout') def tear_down(self): """Kills the servers and delete the directories.""" self.tear_down_git() # This deletes the directories. self.trial.tear_down() self.trial = None def tear_down_git(self): if self.gitdaemon: logging.debug('Killing git-daemon pid %s' % self.gitdaemon.pid) self.gitdaemon.kill() self.gitdaemon = None if self.git_pid_file: pid = int(self.git_pid_file.read()) self.git_pid_file.close() logging.debug('Killing git daemon pid %s' % pid) try: subprocess2.kill_pid(pid) except OSError as e: if e.errno != errno.ESRCH: # no such process raise self.git_pid_file = None wait_for_port_to_free(self.host, self.git_port) self.git_port = None self.git_base = None if not self.trial.SHOULD_LEAK: logging.debug('Removing %s' % self.git_root) gclient_utils.rmtree(self.git_root) else: return False return True @staticmethod def _genTree(root, tree_dict): """For a dictionary of file contents, generate a filesystem.""" if not os.path.isdir(root): os.makedirs(root) for (k, v) in tree_dict.iteritems(): k_os = k.replace('/', os.sep) k_arr = k_os.split(os.sep) if len(k_arr) > 1: p = os.sep.join([root] + k_arr[:-1]) if not os.path.isdir(p): os.makedirs(p) if v is None: os.remove(join(root, k)) else: write(join(root, k), v) def set_up_git(self): """Creates git repositories and start the servers.""" self.set_up() if self.gitdaemon: return True assert self.git_pid_file == None try: subprocess2.check_output(['git', '--version']) except (OSError, subprocess2.CalledProcessError): return False for repo in ['repo_%d' % r for r in range(1, self.NB_GIT_REPOS + 1)]: subprocess2.check_call(['git', 'init', '-q', join(self.git_root, repo)]) self.git_hashes[repo] = [None] self.git_port = find_free_port(self.host, 20000) self.git_base = 'git://%s:%d/git/' % (self.host, self.git_port) # Start the daemon. self.git_pid_file = tempfile.NamedTemporaryFile() cmd = ['git', 'daemon', '--export-all', '--reuseaddr', '--base-path=' + self.root_dir, '--pid-file=' + self.git_pid_file.name, '--port=%d' % self.git_port] if self.host == '127.0.0.1': cmd.append('--listen=' + self.host) self.check_port_is_free(self.git_port) self.gitdaemon = subprocess2.Popen( cmd, cwd=self.root_dir, stdout=subprocess2.PIPE, stderr=subprocess2.PIPE) wait_for_port_to_bind(self.host, self.git_port, self.gitdaemon) self.populateGit() self.git_dirty = False return True def _commit_git(self, repo, tree): repo_root = join(self.git_root, repo) self._genTree(repo_root, tree) commit_hash = commit_git(repo_root) if self.git_hashes[repo][-1]: new_tree = self.git_hashes[repo][-1][1].copy() new_tree.update(tree) else: new_tree = tree.copy() self.git_hashes[repo].append((commit_hash, new_tree)) def check_port_is_free(self, port): sock = socket.socket() try: sock.connect((self.host, port)) # It worked, throw. assert False, '%d shouldn\'t be bound' % port except (socket.error, EnvironmentError): pass finally: sock.close() def populateGit(self): raise NotImplementedError() class FakeRepos(FakeReposBase): """Implements populateGit().""" NB_GIT_REPOS = 5 def populateGit(self): # Testing: # - dependency disappear # - dependency renamed # - versioned and unversioned reference # - relative and full reference # - deps_os # - var # - hooks # - From # TODO(maruel): # - File: File is hard to test here because it's SVN-only. It's # implementation should probably be replaced to use urllib instead. # - $matching_files # - use_relative_paths self._commit_git('repo_3', { 'origin': 'git/repo_3@1\n', }) self._commit_git('repo_3', { 'origin': 'git/repo_3@2\n', }) self._commit_git('repo_1', { 'DEPS': """ vars = { 'DummyVariable': 'repo', } deps = { 'src/repo2': '%(git_base)srepo_2', 'src/repo2/repo3': '/' + Var('DummyVariable') + '_3@%(hash3)s', } deps_os = { 'mac': { 'src/repo4': '/repo_4', }, }""" % { 'git_base': self.git_base, # See self.__init__() for the format. Grab's the hash of the first # commit in repo_2. Only keep the first 7 character because of: # TODO(maruel): http://crosbug.com/3591 We need to strip the hash.. # duh. 'hash3': self.git_hashes['repo_3'][1][0][:7] }, 'origin': 'git/repo_1@1\n', }) self._commit_git('repo_2', { 'origin': 'git/repo_2@1\n', 'DEPS': """ deps = { 'foo/bar': '/repo_3', } """, }) self._commit_git('repo_2', { 'origin': 'git/repo_2@2\n', }) self._commit_git('repo_4', { 'origin': 'git/repo_4@1\n', }) self._commit_git('repo_4', { 'origin': 'git/repo_4@2\n', }) self._commit_git('repo_1', { 'DEPS': """ deps = { 'src/repo2': '%(git_base)srepo_2@%(hash)s', #'src/repo2/repo_renamed': '/repo_3', 'src/repo2/repo_renamed': From('src/repo2', 'foo/bar'), } # I think this is wrong to have the hooks run from the base of the gclient # checkout. It's maybe a bit too late to change that behavior. hooks = [ { 'pattern': '.', 'action': ['python', '-c', 'open(\\'src/git_hooked1\\', \\'w\\').write(\\'git_hooked1\\')'], }, { # Should not be run. 'pattern': 'nonexistent', 'action': ['python', '-c', 'open(\\'src/git_hooked2\\', \\'w\\').write(\\'git_hooked2\\')'], }, ] """ % { 'git_base': self.git_base, # See self.__init__() for the format. Grab's the hash of the first # commit in repo_2. Only keep the first 7 character because of: # TODO(maruel): http://crosbug.com/3591 We need to strip the hash.. duh. 'hash': self.git_hashes['repo_2'][1][0][:7] }, 'origin': 'git/repo_1@2\n', }) self._commit_git('repo_5', {'origin': 'git/repo_5@1\n'}) self._commit_git('repo_5', { 'DEPS': """ deps = { 'src/repo1': '%(git_base)srepo_1@%(hash1)s', 'src/repo2': '%(git_base)srepo_2@%(hash2)s', } # Hooks to run after a project is processed but before its dependencies are # processed. pre_deps_hooks = [ { 'action': ['python', '-c', 'print "pre-deps hook"; open(\\'src/git_pre_deps_hooked\\', \\'w\\').write(\\'git_pre_deps_hooked\\')'], } ] """ % { 'git_base': self.git_base, 'hash1': self.git_hashes['repo_1'][2][0][:7], 'hash2': self.git_hashes['repo_2'][1][0][:7], }, 'origin': 'git/repo_5@2\n', }) self._commit_git('repo_5', { 'DEPS': """ deps = { 'src/repo1': '%(git_base)srepo_1@%(hash1)s', 'src/repo2': '%(git_base)srepo_2@%(hash2)s', } # Hooks to run after a project is processed but before its dependencies are # processed. pre_deps_hooks = [ { 'action': ['python', '-c', 'print "pre-deps hook"; open(\\'src/git_pre_deps_hooked\\', \\'w\\').write(\\'git_pre_deps_hooked\\')'], }, { 'action': ['python', '-c', 'import sys; sys.exit(1)'], } ] """ % { 'git_base': self.git_base, 'hash1': self.git_hashes['repo_1'][2][0][:7], 'hash2': self.git_hashes['repo_2'][1][0][:7], }, 'origin': 'git/repo_5@3\n', }) class FakeRepoSkiaDEPS(FakeReposBase): """Simulates the Skia DEPS transition in Chrome.""" NB_GIT_REPOS = 5 DEPS_git_pre = """deps = { 'src/third_party/skia/gyp': '%(git_base)srepo_3', 'src/third_party/skia/include': '%(git_base)srepo_4', 'src/third_party/skia/src': '%(git_base)srepo_5', }""" DEPS_post = """deps = { 'src/third_party/skia': '%(git_base)srepo_1', }""" def populateGit(self): # Skia repo. self._commit_git('repo_1', { 'skia_base_file': 'root-level file.', 'gyp/gyp_file': 'file in the gyp directory', 'include/include_file': 'file in the include directory', 'src/src_file': 'file in the src directory', }) self._commit_git('repo_3', { # skia/gyp 'gyp_file': 'file in the gyp directory', }) self._commit_git('repo_4', { # skia/include 'include_file': 'file in the include directory', }) self._commit_git('repo_5', { # skia/src 'src_file': 'file in the src directory', }) # Chrome repo. self._commit_git('repo_2', { 'DEPS': self.DEPS_git_pre % {'git_base': self.git_base}, 'myfile': 'src/trunk/src@1' }) self._commit_git('repo_2', { 'DEPS': self.DEPS_post % {'git_base': self.git_base}, 'myfile': 'src/trunk/src@2' }) class FakeRepoBlinkDEPS(FakeReposBase): """Simulates the Blink DEPS transition in Chrome.""" NB_GIT_REPOS = 2 DEPS_pre = 'deps = {"src/third_party/WebKit": "%(git_base)srepo_2",}' DEPS_post = 'deps = {}' def populateGit(self): # Blink repo. self._commit_git('repo_2', { 'OWNERS': 'OWNERS-pre', 'Source/exists_always': '_ignored_', 'Source/exists_before_but_not_after': '_ignored_', }) # Chrome repo. self._commit_git('repo_1', { 'DEPS': self.DEPS_pre % {'git_base': self.git_base}, 'myfile': 'myfile@1', '.gitignore': '/third_party/WebKit', }) self._commit_git('repo_1', { 'DEPS': self.DEPS_post % {'git_base': self.git_base}, 'myfile': 'myfile@2', '.gitignore': '', 'third_party/WebKit/OWNERS': 'OWNERS-post', 'third_party/WebKit/Source/exists_always': '_ignored_', 'third_party/WebKit/Source/exists_after_but_not_before': '_ignored', }) def populateSvn(self): raise NotImplementedError() class FakeReposTestBase(trial_dir.TestCase): """This is vaguely inspired by twisted.""" # Static FakeRepos instances. Lazy loaded. CACHED_FAKE_REPOS = {} # Override if necessary. FAKE_REPOS_CLASS = FakeRepos def setUp(self): super(FakeReposTestBase, self).setUp() if not self.FAKE_REPOS_CLASS in self.CACHED_FAKE_REPOS: self.CACHED_FAKE_REPOS[self.FAKE_REPOS_CLASS] = self.FAKE_REPOS_CLASS() self.FAKE_REPOS = self.CACHED_FAKE_REPOS[self.FAKE_REPOS_CLASS] # No need to call self.FAKE_REPOS.setUp(), it will be called by the child # class. # Do not define tearDown(), since super's version does the right thing and # self.FAKE_REPOS is kept across tests. @property def git_base(self): """Shortcut.""" return self.FAKE_REPOS.git_base def checkString(self, expected, result, msg=None): """Prints the diffs to ease debugging.""" if expected != result: # Strip the begining while expected and result and expected[0] == result[0]: expected = expected[1:] result = result[1:] # The exception trace makes it hard to read so dump it too. if '\n' in result: print result self.assertEquals(expected, result, msg) def check(self, expected, results): """Checks stdout, stderr, returncode.""" self.checkString(expected[0], results[0]) self.checkString(expected[1], results[1]) self.assertEquals(expected[2], results[2]) def assertTree(self, tree, tree_root=None): """Diff the checkout tree with a dict.""" if not tree_root: tree_root = self.root_dir actual = read_tree(tree_root) diff = dict_diff(tree, actual) if diff: logging.debug('Actual %s\n%s' % (tree_root, pprint.pformat(actual))) logging.debug('Expected\n%s' % pprint.pformat(tree)) logging.debug('Diff\n%s' % pprint.pformat(diff)) self.assertEquals(diff, {}) def mangle_git_tree(self, *args): """Creates a 'virtual directory snapshot' to compare with the actual result on disk.""" result = {} for item, new_root in args: repo, rev = item.split('@', 1) tree = self.gittree(repo, rev) for k, v in tree.iteritems(): result[join(new_root, k)] = v return result def githash(self, repo, rev): """Sort-hand: Returns the hash for a git 'revision'.""" return self.FAKE_REPOS.git_hashes[repo][int(rev)][0] def gittree(self, repo, rev): """Sort-hand: returns the directory tree for a git 'revision'.""" return self.FAKE_REPOS.git_hashes[repo][int(rev)][1] def main(argv): fake = FakeRepos() print 'Using %s' % fake.root_dir try: fake.set_up_git() print('Fake setup, press enter to quit or Ctrl-C to keep the checkouts.') sys.stdin.readline() except KeyboardInterrupt: trial_dir.TrialDir.SHOULD_LEAK.leak = True return 0 if __name__ == '__main__': sys.exit(main(sys.argv))