subprocess2.py 16.5 KB
Newer Older
1
# coding=utf8
2
# Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 4 5 6 7 8 9
# Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file.
"""Collection of subprocess wrapper functions.

In theory you shouldn't need anything else in subprocess, or this module failed.
"""

10
import codecs
11
import errno
12
import io
13 14
import logging
import os
15 16 17 18 19 20

try:
  import Queue
except ImportError:  # For Py3 compatibility
  import queue as Queue

21 22 23 24 25
import subprocess
import sys
import time
import threading

26 27
# Cache the string-escape codec to ensure subprocess can find it later.
# See crbug.com/912292#c2 for context.
28 29
if sys.version_info.major == 2:
  codecs.lookup('string-escape')
30

31 32 33
# Constants forwarded from subprocess.
PIPE = subprocess.PIPE
STDOUT = subprocess.STDOUT
34
# Sends stdout or stderr to os.devnull.
35
VOID = object()
36 37
# Error code when a process was killed because it timed out.
TIMED_OUT = -2001
38 39 40 41 42 43 44 45 46

# Globals.
# Set to True if you somehow need to disable this hack.
SUBPROCESS_CLEANUP_HACKED = False


class CalledProcessError(subprocess.CalledProcessError):
  """Augment the standard exception with more data."""
  def __init__(self, returncode, cmd, cwd, stdout, stderr):
47 48
    super(CalledProcessError, self).__init__(returncode, cmd, output=stdout)
    self.stdout = self.output  # for backward compatibility.
49 50 51 52
    self.stderr = stderr
    self.cwd = cwd

  def __str__(self):
53
    out = 'Command %r returned non-zero exit status %s' % (
54 55 56 57 58 59
        ' '.join(self.cmd), self.returncode)
    if self.cwd:
      out += ' in ' + self.cwd
    return '\n'.join(filter(None, (out, self.stdout, self.stderr)))


60 61 62 63
class CygwinRebaseError(CalledProcessError):
  """Occurs when cygwin's fork() emulation fails due to rebased dll."""


64 65 66 67 68 69 70
## Utility functions


def kill_pid(pid):
  """Kills a process by its process id."""
  try:
    # Unable to import 'module'
71
    # pylint: disable=no-member,F0401
72 73 74 75 76 77 78 79 80 81 82 83 84
    import signal
    return os.kill(pid, signal.SIGKILL)
  except ImportError:
    pass


def kill_win(process):
  """Kills a process with its windows handle.

  Has no effect on other platforms.
  """
  try:
    # Unable to import 'module'
85
    # pylint: disable=import-error
86 87
    import win32process
    # Access to a protected member _handle of a client class
88
    # pylint: disable=protected-access
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104
    return win32process.TerminateProcess(process._handle, -1)
  except ImportError:
    pass


def add_kill():
  """Adds kill() method to subprocess.Popen for python <2.6"""
  if hasattr(subprocess.Popen, 'kill'):
    return

  if sys.platform == 'win32':
    subprocess.Popen.kill = kill_win
  else:
    subprocess.Popen.kill = lambda process: kill_pid(process.pid)


105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124
def hack_subprocess():
  """subprocess functions may throw exceptions when used in multiple threads.

  See http://bugs.python.org/issue1731717 for more information.
  """
  global SUBPROCESS_CLEANUP_HACKED
  if not SUBPROCESS_CLEANUP_HACKED and threading.activeCount() != 1:
    # Only hack if there is ever multiple threads.
    # There is no point to leak with only one thread.
    subprocess._cleanup = lambda: None
    SUBPROCESS_CLEANUP_HACKED = True


def get_english_env(env):
  """Forces LANG and/or LANGUAGE to be English.

  Forces encoding to utf-8 for subprocesses.

  Returns None if it is unnecessary.
  """
125 126
  if sys.platform == 'win32':
    return None
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
  env = env or os.environ

  # Test if it is necessary at all.
  is_english = lambda name: env.get(name, 'en').startswith('en')

  if is_english('LANG') and is_english('LANGUAGE'):
    return None

  # Requires modifications.
  env = env.copy()
  def fix_lang(name):
    if not is_english(name):
      env[name] = 'en_US.UTF-8'
  fix_lang('LANG')
  fix_lang('LANGUAGE')
  return env


145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
class NagTimer(object):
  """
  Triggers a callback when a time interval passes without an event being fired.

  For example, the event could be receiving terminal output from a subprocess;
  and the callback could print a warning to stderr that the subprocess appeared
  to be hung.
  """
  def __init__(self, interval, cb):
    self.interval = interval
    self.cb = cb
    self.timer = threading.Timer(self.interval, self.fn)
    self.last_output = self.previous_last_output = 0

  def start(self):
    self.last_output = self.previous_last_output = time.time()
    self.timer.start()

  def event(self):
    self.last_output = time.time()

  def fn(self):
    now = time.time()
    if self.last_output == self.previous_last_output:
      self.cb(now - self.previous_last_output)
    # Use 0.1 fudge factor, just in case
    #   (self.last_output - now) is very close to zero.
    sleep_time = (self.last_output - now - 0.1) % self.interval
    self.previous_last_output = self.last_output
    self.timer = threading.Timer(sleep_time + 0.1, self.fn)
    self.timer.start()

  def cancel(self):
    self.timer.cancel()


181
class Popen(subprocess.Popen):
182
  """Wraps subprocess.Popen() with various workarounds.
183

184 185 186 187 188
  - Forces English output since it's easier to parse the stdout if it is always
    in English.
  - Sets shell=True on windows by default. You can override this by forcing
    shell parameter to a value.
  - Adds support for VOID to not buffer when not needed.
189
  - Adds self.start property.
190

191 192
  Note: Popen() can throw OSError when cwd or args[0] doesn't exist. Translate
  exceptions generated by cygwin when it fails trying to emulate fork().
193
  """
194 195 196 197 198 199 200 201
  # subprocess.Popen.__init__() is not threadsafe; there is a race between
  # creating the exec-error pipe for the child and setting it to CLOEXEC during
  # which another thread can fork and cause the pipe to be inherited by its
  # descendents, which will cause the current Popen to hang until all those
  # descendents exit. Protect this with a lock so that only one fork/exec can
  # happen at a time.
  popen_lock = threading.Lock()

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216
  def __init__(self, args, **kwargs):
    # Make sure we hack subprocess if necessary.
    hack_subprocess()
    add_kill()

    env = get_english_env(kwargs.get('env'))
    if env:
      kwargs['env'] = env
    if kwargs.get('shell') is None:
      # *Sigh*:  Windows needs shell=True, or else it won't search %PATH% for
      # the executable, but shell=True makes subprocess on Linux fail when it's
      # called with a list because it only tries to execute the first item in
      # the list.
      kwargs['shell'] = bool(sys.platform=='win32')

217 218
    if isinstance(args, str) or (sys.version_info.major == 2 and
                                 isinstance(args, unicode)):
219 220 221 222 223 224 225 226 227
      tmp_str = args
    elif isinstance(args, (list, tuple)):
      tmp_str = ' '.join(args)
    else:
      raise CalledProcessError(None, args, kwargs.get('cwd'), None, None)
    if kwargs.get('cwd', None):
      tmp_str += ';  cwd=%s' % kwargs['cwd']
    logging.debug(tmp_str)

228 229
    self.stdout_cb = None
    self.stderr_cb = None
230 231 232
    self.stdin_is_void = False
    self.stdout_is_void = False
    self.stderr_is_void = False
szager@chromium.org's avatar
szager@chromium.org committed
233
    self.cmd_str = tmp_str
234 235 236 237 238 239

    if kwargs.get('stdin') is VOID:
      kwargs['stdin'] = open(os.devnull, 'r')
      self.stdin_is_void = True

    for stream in ('stdout', 'stderr'):
240 241
      if kwargs.get(stream) in (VOID, os.devnull):
        kwargs[stream] = open(os.devnull, 'w')
242
        setattr(self, stream + '_is_void', True)
243 244 245
      if callable(kwargs.get(stream)):
        setattr(self, stream + '_cb', kwargs[stream])
        kwargs[stream] = PIPE
246

247
    self.start = time.time()
248
    self.timeout = None
szager@chromium.org's avatar
szager@chromium.org committed
249
    self.nag_timer = None
250
    self.nag_max = None
251
    self.shell = kwargs.get('shell', None)
252 253
    # Silence pylint on MacOSX
    self.returncode = None
254

255
    try:
256 257
      with self.popen_lock:
        super(Popen, self).__init__(args, **kwargs)
258
    except OSError as e:
259 260 261 262 263 264 265 266 267 268 269
      if e.errno == errno.EAGAIN and sys.platform == 'cygwin':
        # Convert fork() emulation failure into a CygwinRebaseError().
        raise CygwinRebaseError(
            e.errno,
            args,
            kwargs.get('cwd'),
            None,
            'Visit '
            'http://code.google.com/p/chromium/wiki/CygwinDllRemappingFailure '
            'to learn how to fix this error; you need to rebase your cygwin '
            'dlls')
270
      # Popen() can throw OSError when cwd or args[0] doesn't exist.
271 272 273
      raise OSError('Execution failed with error: %s.\n'
                    'Check that %s or %s exist and have execution permission.'
                    % (str(e), kwargs.get('cwd'), args[0]))
274

275
  def _tee_threads(self, input):  # pylint: disable=redefined-builtin
276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
    """Does I/O for a process's pipes using threads.

    It's the simplest and slowest implementation. Expect very slow behavior.

    If there is a callback and it doesn't keep up with the calls, the timeout
    effectiveness will be delayed accordingly.
    """
    # Queue of either of <threadname> when done or (<threadname>, data).  In
    # theory we would like to limit to ~64kb items to not cause large memory
    # usage when the callback blocks. It is not done because it slows down
    # processing on OSX10.6 by a factor of 2x, making it even slower than
    # Windows!  Revisit this decision if it becomes a problem, e.g. crash
    # because of memory exhaustion.
    queue = Queue.Queue()
    done = threading.Event()
291
    nag = None
292 293 294

    def write_stdin():
      try:
295
        stdin_io = io.BytesIO(input)
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
        while True:
          data = stdin_io.read(1024)
          if data:
            self.stdin.write(data)
          else:
            self.stdin.close()
            break
      finally:
        queue.put('stdin')

    def _queue_pipe_read(pipe, name):
      """Queues characters read from a pipe into a queue."""
      try:
        while True:
          data = pipe.read(1)
          if not data:
            break
313 314
          if nag:
            nag.event()
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
          queue.put((name, data))
      finally:
        queue.put(name)

    def timeout_fn():
      try:
        done.wait(self.timeout)
      finally:
        queue.put('timeout')

    def wait_fn():
      try:
        self.wait()
      finally:
        queue.put('wait')

    # Starts up to 5 threads:
    # Wait for the process to quit
    # Read stdout
    # Read stderr
    # Write stdin
    # Timeout
    threads = {
        'wait': threading.Thread(target=wait_fn),
    }
    if self.timeout is not None:
      threads['timeout'] = threading.Thread(target=timeout_fn)
    if self.stdout_cb:
      threads['stdout'] = threading.Thread(
          target=_queue_pipe_read, args=(self.stdout, 'stdout'))
    if self.stderr_cb:
      threads['stderr'] = threading.Thread(
        target=_queue_pipe_read, args=(self.stderr, 'stderr'))
    if input:
      threads['stdin'] = threading.Thread(target=write_stdin)
350 351 352
    elif self.stdin:
      # Pipe but no input, make sure it's closed.
      self.stdin.close()
353 354 355
    for t in threads.itervalues():
      t.start()

szager@chromium.org's avatar
szager@chromium.org committed
356
    if self.nag_timer:
357 358 359 360 361 362 363 364 365
      def _nag_cb(elapsed):
        logging.warn('  No output for %.0f seconds from command:' % elapsed)
        logging.warn('    %s' % self.cmd_str)
        if (self.nag_max and
            int('%.0f' % (elapsed / self.nag_timer)) >= self.nag_max):
          queue.put('timeout')
          done.set()  # Must do this so that timeout thread stops waiting.
      nag = NagTimer(self.nag_timer, _nag_cb)
      nag.start()
szager@chromium.org's avatar
szager@chromium.org committed
366

367 368 369 370 371
    timed_out = False
    try:
      # This thread needs to be optimized for speed.
      while threads:
        item = queue.get()
372
        if item[0] == 'stdout':
373
          self.stdout_cb(item[1])
374
        elif item[0] == 'stderr':
375 376 377
          self.stderr_cb(item[1])
        else:
          # A thread terminated.
378 379 380
          if item in threads:
            threads[item].join()
            del threads[item]
381 382 383 384
          if item == 'wait':
            # Terminate the timeout thread if necessary.
            done.set()
          elif item == 'timeout' and not timed_out and self.poll() is None:
385 386
            logging.debug('Timed out after %.0fs: killing' % (
                time.time() - self.start))
387 388 389 390 391
            self.kill()
            timed_out = True
    finally:
      # Stop the threads.
      done.set()
392 393
      if nag:
        nag.cancel()
394 395 396 397 398 399 400 401 402 403 404
      if 'wait' in threads:
        # Accelerate things, otherwise it would hang until the child process is
        # done.
        logging.debug('Killing child because of an exception')
        self.kill()
      # Join threads.
      for thread in threads.itervalues():
        thread.join()
      if timed_out:
        self.returncode = TIMED_OUT

405
  # pylint: disable=arguments-differ,W0622
406 407
  def communicate(self, input=None, timeout=None, nag_timer=None,
                  nag_max=None):
408 409 410 411 412 413
    """Adds timeout and callbacks support.

    Returns (stdout, stderr) like subprocess.Popen().communicate().

    - The process will be killed after |timeout| seconds and returncode set to
      TIMED_OUT.
szager@chromium.org's avatar
szager@chromium.org committed
414 415
    - If the subprocess runs for |nag_timer| seconds without producing terminal
      output, print a warning to stderr.
416 417
    """
    self.timeout = timeout
szager@chromium.org's avatar
szager@chromium.org committed
418
    self.nag_timer = nag_timer
419
    self.nag_max = nag_max
szager@chromium.org's avatar
szager@chromium.org committed
420 421
    if (not self.timeout and not self.nag_timer and
        not self.stdout_cb and not self.stderr_cb):
422 423 424 425 426 427 428 429 430 431 432
      return super(Popen, self).communicate(input)

    if self.timeout and self.shell:
      raise TypeError(
          'Using timeout and shell simultaneously will cause a process leak '
          'since the shell will be killed instead of the child process.')

    stdout = None
    stderr = None
    # Convert to a lambda to workaround python's deadlock.
    # http://docs.python.org/library/subprocess.html#subprocess.Popen.wait
433 434 435 436 437 438 439
    # When the pipe fills up, it would deadlock this process.
    if self.stdout and not self.stdout_cb and not self.stdout_is_void:
      stdout = []
      self.stdout_cb = stdout.append
    if self.stderr and not self.stderr_cb and not self.stderr_is_void:
      stderr = []
      self.stderr_cb = stderr.append
440
    self._tee_threads(input)
441 442 443 444
    if stdout is not None:
      stdout = ''.join(stdout)
    if stderr is not None:
      stderr = ''.join(stderr)
445 446
    return (stdout, stderr)

447

448
def communicate(args, timeout=None, nag_timer=None, nag_max=None, **kwargs):
449
  """Wraps subprocess.Popen().communicate() and add timeout support.
450

451
  Returns ((stdout, stderr), returncode).
452

453 454
  - The process will be killed after |timeout| seconds and returncode set to
    TIMED_OUT.
szager@chromium.org's avatar
szager@chromium.org committed
455 456
  - If the subprocess runs for |nag_timer| seconds without producing terminal
    output, print a warning to stderr.
457
  - Automatically passes stdin content as input so do not specify stdin=PIPE.
458 459 460
  """
  stdin = kwargs.pop('stdin', None)
  if stdin is not None:
461 462
    if isinstance(stdin, str) or (sys.version_info.major == 2 and
                                  isinstance(stdin, unicode)):
463 464 465
      # When stdin is passed as an argument, use it as the actual input data and
      # set the Popen() parameter accordingly.
      kwargs['stdin'] = PIPE
466 467 468
    else:
      kwargs['stdin'] = stdin
      stdin = None
469

470
  proc = Popen(args, **kwargs)
471
  if stdin:
szager@chromium.org's avatar
szager@chromium.org committed
472
    return proc.communicate(stdin, timeout, nag_timer), proc.returncode
473
  else:
szager@chromium.org's avatar
szager@chromium.org committed
474
    return proc.communicate(None, timeout, nag_timer), proc.returncode
475 476


477 478 479 480
def call(args, **kwargs):
  """Emulates subprocess.call().

  Automatically convert stdout=PIPE or stderr=PIPE to VOID.
481 482
  In no case they can be returned since no code path raises
  subprocess2.CalledProcessError.
483 484 485 486 487 488 489 490
  """
  if kwargs.get('stdout') == PIPE:
    kwargs['stdout'] = VOID
  if kwargs.get('stderr') == PIPE:
    kwargs['stderr'] = VOID
  return communicate(args, **kwargs)[1]


491
def check_call_out(args, **kwargs):
492
  """Improved version of subprocess.check_call().
493

494
  Returns (stdout, stderr), unlike subprocess.check_call().
495
  """
496
  out, returncode = communicate(args, **kwargs)
497 498 499 500 501 502
  if returncode:
    raise CalledProcessError(
        returncode, args, kwargs.get('cwd'), out[0], out[1])
  return out


503 504 505 506 507 508
def check_call(args, **kwargs):
  """Emulate subprocess.check_call()."""
  check_call_out(args, **kwargs)
  return 0


509 510 511
def capture(args, **kwargs):
  """Captures stdout of a process call and returns it.

512
  Returns stdout.
513

514
  - Discards returncode.
515
  - Blocks stdin by default if not specified since no output will be visible.
516
  """
517 518 519 520
  kwargs.setdefault('stdin', VOID)

  # Like check_output, deny the caller from using stdout arg.
  return communicate(args, stdout=PIPE, **kwargs)[0][0]
521 522 523


def check_output(args, **kwargs):
524
  """Emulates subprocess.check_output().
525

526
  Captures stdout of a process call and returns stdout only.
527

528 529
  - Throws if return code is not 0.
  - Works even prior to python 2.7.
530 531
  - Blocks stdin by default if not specified since no output will be visible.
  - As per doc, "The stdout argument is not allowed as it is used internally."
532
  """
533
  kwargs.setdefault('stdin', VOID)
534
  if 'stdout' in kwargs:
535
    raise ValueError('stdout argument not allowed, it would be overridden.')
536
  return check_call_out(args, stdout=PIPE, **kwargs)[0]