Commit b37760d2 authored by Michael Achenbach's avatar Michael Achenbach Committed by V8 LUCI CQ

[test] Add logging to test runner

This adds Python logging to several code locations that previously
used to just print. The locations aren't yet complete. The changed
code locations should help for investigating hanging test runs.

The default level is WARNING for running tests locally, and INFO
when called from bots that pass the --swarming parameter.

Bug: v8:13113
Change-Id: If3a336703e7c346a5c718f2359b1a80e37e1ca6d
Reviewed-on: https://chromium-review.googlesource.com/c/v8/v8/+/3876183
Commit-Queue: Michael Achenbach <machenbach@chromium.org>
Reviewed-by: 's avatarAlexander Schulze <alexschulze@chromium.org>
Cr-Commit-Position: refs/heads/main@{#83006}
parent 519c430b
...@@ -7,6 +7,7 @@ from functools import reduce ...@@ -7,6 +7,7 @@ from functools import reduce
from os.path import dirname as up from os.path import dirname as up
import json import json
import logging
import multiprocessing import multiprocessing
import optparse import optparse
import os import os
...@@ -116,6 +117,14 @@ TRY_RELEASE_MODE = ModeConfig( ...@@ -116,6 +117,14 @@ TRY_RELEASE_MODE = ModeConfig(
status_mode="debug", status_mode="debug",
) )
# Set up logging. No need to log a date in timestamps as we can get that from
# test run start times.
logging.basicConfig(
format='%(asctime)s %(message)s',
datefmt='%H:%M:%S',
level=logging.WARNING,
)
class TestRunnerError(Exception): class TestRunnerError(Exception):
pass pass
...@@ -142,6 +151,8 @@ class BaseTestRunner(object): ...@@ -142,6 +151,8 @@ class BaseTestRunner(object):
self.options, args = self._parse_args(parser, sys_args) self.options, args = self._parse_args(parser, sys_args)
self.infra_staging = self.options.infra_staging self.infra_staging = self.options.infra_staging
if self.options.swarming: if self.options.swarming:
logging.getLogger().setLevel(logging.INFO)
# Swarming doesn't print how isolated commands are called. Lets make # Swarming doesn't print how isolated commands are called. Lets make
# this less cryptic by printing it ourselves. # this less cryptic by printing it ourselves.
print(' '.join(sys.argv)) print(' '.join(sys.argv))
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
# found in the LICENSE file. # found in the LICENSE file.
from contextlib import contextmanager from contextlib import contextmanager
import logging
import os import os
import re import re
import signal import signal
...@@ -145,13 +146,10 @@ class BaseCommand(object): ...@@ -145,13 +146,10 @@ class BaseCommand(object):
started_as = self.to_string(relative=True) started_as = self.to_string(relative=True)
process_text = 'process %d started as:\n %s\n' % (process.pid, started_as) process_text = 'process %d started as:\n %s\n' % (process.pid, started_as)
try: try:
print('Attempting to kill ' + process_text) logging.warning('Attempting to kill %s', process_text)
sys.stdout.flush()
self._kill_process(process) self._kill_process(process)
except OSError as e: except OSError:
print(e) logging.exception('Unruly %s', process_text)
print('Unruly ' + process_text)
sys.stdout.flush()
def __str__(self): def __str__(self):
return self.to_string() return self.to_string()
...@@ -216,11 +214,10 @@ def taskkill_windows(process, verbose=False, force=True): ...@@ -216,11 +214,10 @@ def taskkill_windows(process, verbose=False, force=True):
) )
stdout, stderr = tk.communicate() stdout, stderr = tk.communicate()
if verbose: if verbose:
print('Taskkill results for %d' % process.pid) logging.info('Taskkill results for %d', process.pid)
print(stdout) logging.info(stdout.decode('utf-8', errors='ignore'))
print(stderr) logging.info(stderr.decode('utf-8', errors='ignore'))
print('Return code: %d' % tk.returncode) logging.info('Return code: %d', tk.returncode)
sys.stdout.flush()
class WindowsCommand(BaseCommand): class WindowsCommand(BaseCommand):
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# found in the LICENSE file. # found in the LICENSE file.
import collections import collections
import logging
import os import os
import signal import signal
import traceback import traceback
...@@ -76,8 +77,7 @@ def Worker(fn, work_queue, done_queue, ...@@ -76,8 +77,7 @@ def Worker(fn, work_queue, done_queue,
# SIGINT, SIGTERM or internal hard timeout. # SIGINT, SIGTERM or internal hard timeout.
break break
except Exception as e: except Exception as e:
traceback.print_exc() logging.exception('Unhandled error during worker execution.')
print(">>> EXCEPTION: %s" % e)
done_queue.put(ExceptionResult(e)) done_queue.put(ExceptionResult(e))
# When we reach here on normal tear down, all items have been pulled from # When we reach here on normal tear down, all items have been pulled from
# the done_queue before and this should have no effect. On fast abort, it's # the done_queue before and this should have no effect. On fast abort, it's
...@@ -245,9 +245,8 @@ class DefaultExecutionPool(ContextPool): ...@@ -245,9 +245,8 @@ class DefaultExecutionPool(ContextPool):
self.advance(gen) self.advance(gen)
except KeyboardInterrupt: except KeyboardInterrupt:
assert False, 'Unreachable' assert False, 'Unreachable'
except Exception as e: except Exception:
traceback.print_exc() logging.exception('Unhandled error during pool execution.')
print(">>> EXCEPTION: %s" % e)
finally: finally:
self._terminate() self._terminate()
...@@ -321,12 +320,17 @@ class DefaultExecutionPool(ContextPool): ...@@ -321,12 +320,17 @@ class DefaultExecutionPool(ContextPool):
self.notify("Draining queues") self.notify("Draining queues")
try: try:
while True: self.work_queue.get(False) while True: self.work_queue.get(False)
except: except Empty:
pass pass
except:
logging.exception('Error draining work queue.')
try: try:
while True: self.done_queue.get(False) while True: self.done_queue.get(False)
except: except Empty:
pass pass
except:
logging.exception('Error draining done queue.')
self.notify("Pool terminated")
def _get_result_from_queue(self): def _get_result_from_queue(self):
"""Attempts to get the next result from the queue. """Attempts to get the next result from the queue.
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
import datetime import datetime
import json import json
import logging
import platform import platform
import sys import sys
import time import time
...@@ -140,13 +141,13 @@ class VerboseProgressIndicator(SimpleProgressIndicator): ...@@ -140,13 +141,13 @@ class VerboseProgressIndicator(SimpleProgressIndicator):
# TODO(machenbach): Remove this platform specific hack and implement a proper # TODO(machenbach): Remove this platform specific hack and implement a proper
# feedback channel from the workers, providing which tests are currently run. # feedback channel from the workers, providing which tests are currently run.
def _print_processes(self): def _log_processes(self):
procs = self.context.list_processes() procs = self.context.list_processes()
if procs: if procs:
self._print('List of processes:') logging.info('List of processes:')
for pid, cmd in self.context.list_processes(): for pid, cmd in self.context.list_processes():
# Show command with pid, but other process info cut off. # Show command with pid, but other process info cut off.
self._print('pid: %d cmd: %s' % (pid, cmd)) logging.info('pid: %d cmd: %s', pid, cmd)
def _ensure_delay(self, delay): def _ensure_delay(self, delay):
return time.time() - self._last_printed_time > delay return time.time() - self._last_printed_time > delay
...@@ -156,11 +157,11 @@ class VerboseProgressIndicator(SimpleProgressIndicator): ...@@ -156,11 +157,11 @@ class VerboseProgressIndicator(SimpleProgressIndicator):
# Print something every 30 seconds to not get killed by an output # Print something every 30 seconds to not get killed by an output
# timeout. # timeout.
self._print('Still working...') self._print('Still working...')
self._print_processes() self._log_processes()
def on_event(self, event): def on_event(self, event):
self._print(event) logging.info(event)
self._print_processes() self._log_processes()
class CIProgressIndicator(VerboseProgressIndicator): class CIProgressIndicator(VerboseProgressIndicator):
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
import logging
import signal import signal
from . import base from . import base
...@@ -16,12 +17,12 @@ class SignalProc(base.TestProcObserver): ...@@ -16,12 +17,12 @@ class SignalProc(base.TestProcObserver):
signal.signal(signal.SIGTERM, self._on_sigterm) signal.signal(signal.SIGTERM, self._on_sigterm)
def _on_ctrlc(self, _signum, _stack_frame): def _on_ctrlc(self, _signum, _stack_frame):
print('>>> Ctrl-C detected, early abort...') logging.warning('Ctrl-C detected, early abort...')
self.exit_code = utils.EXIT_CODE_INTERRUPTED self.exit_code = utils.EXIT_CODE_INTERRUPTED
self.stop() self.stop()
def _on_sigterm(self, _signum, _stack_frame): def _on_sigterm(self, _signum, _stack_frame):
print('>>> SIGTERM received, early abort...') logging.warning('SIGTERM received, early abort...')
self.exit_code = utils.EXIT_CODE_TERMINATED self.exit_code = utils.EXIT_CODE_TERMINATED
self.stop() self.stop()
......
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# Use of this source code is governed by a BSD-style license that can be # Use of this source code is governed by a BSD-style license that can be
# found in the LICENSE file. # found in the LICENSE file.
import logging
import time import time
from . import base from . import base
...@@ -31,5 +32,5 @@ class TimeoutProc(base.TestProcObserver): ...@@ -31,5 +32,5 @@ class TimeoutProc(base.TestProcObserver):
def __on_event(self): def __on_event(self):
if not self.is_stopped: if not self.is_stopped:
if time.time() - self._start > self._duration_sec: if time.time() - self._start > self._duration_sec:
print('>>> Total timeout reached.') logging.info('Total timeout reached.')
self.stop() self.stop()
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
# found in the LICENSE file. # found in the LICENSE file.
import heapq import heapq
import logging
import os import os
import platform import platform
import signal import signal
...@@ -29,10 +30,11 @@ def list_processes_linux(): ...@@ -29,10 +30,11 @@ def list_processes_linux():
] ]
# Filter strange process with name as out dir. # Filter strange process with name as out dir.
return [p for p in processes if p[1] != OUT_DIR] return [p for p in processes if p[1] != OUT_DIR]
except Exception as e: except subprocess.CalledProcessError as e:
# TODO(https://crbug.com/v8/13101): Remove after investigation. # Return code 1 means no processes found.
print('Fetching process list failed.') if e.returncode != 1:
print(e) # TODO(https://crbug.com/v8/13101): Remove after investigation.
logging.exception('Fetching process list failed.')
return [] return []
...@@ -45,10 +47,10 @@ def kill_processes_linux(): ...@@ -45,10 +47,10 @@ def kill_processes_linux():
return return
for pid, cmd in list_processes_linux(): for pid, cmd in list_processes_linux():
try: try:
print('Attempting to kill %d - %s' % (pid, cmd)) logging.warning('Attempting to kill %d - %s', pid, cmd)
os.kill(pid, signal.SIGKILL) os.kill(pid, signal.SIGKILL)
except: except:
pass logging.exception('Failed to kill process')
class FixedSizeTopList(): class FixedSizeTopList():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment