Commit 6ecb4734 authored by Josip Sokcevic's avatar Josip Sokcevic Committed by LUCI CQ

Recover from aborted tar.gz extraction

If download from google storage gets terminated while extracting .tar.gz
archive, it won't be extracted on the next run which results in invalid
workspace. This patches places a flag file if extraction wasn't
completed. Such flag is checked on next execution and forces re-download
if found.

This does not fix currently broken workspaces.

R=dpranke@google.com, ehmladonado@chromium.org

Bug: 1189273
Change-Id: I4047d98c193493d9a0fe20a46043e1f8376e5c7e
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/tools/depot_tools/+/2774085Reviewed-by: 's avatarDirk Pranke <dpranke@google.com>
Commit-Queue: Josip Sokcevic <sokcevic@google.com>
parent e58ece5f
......@@ -254,9 +254,24 @@ def _downloader_worker_thread(thread_num, q, force, base_url,
continue
extract_dir = output_filename[:-len('.tar.gz')]
if os.path.exists(output_filename) and not force:
if not extract or os.path.exists(extract_dir):
if get_sha1(output_filename) == input_sha1_sum:
continue
skip = get_sha1(output_filename) == input_sha1_sum
if extract:
# Additional condition for extract:
# 1) extract_dir must exist
# 2) .tmp flag file mustn't exist
if not os.path.exists(extract_dir):
out_q.put('%d> Extract dir %s does not exist, re-downloading...' %
(thread_num, extract_dir))
skip = False
# .tmp file is created just before extraction and removed just after
# extraction. If such file exists, it means the process was terminated
# mid-extraction and therefore needs to be extracted again.
elif os.path.exists(extract_dir + '.tmp'):
out_q.put('%d> Detected tmp flag file for %s, '
're-downloading...' % (thread_num, output_filename))
skip = False
if skip:
continue
# Check if file exists.
file_url = '%s/%s' % (base_url, input_sha1_sum)
(code, _, err) = gsutil.check_call('ls', file_url)
......@@ -336,7 +351,9 @@ def _downloader_worker_thread(thread_num, q, force, base_url,
out_q.put('%d> Extracting %d entries from %s to %s' %
(thread_num, len(tar.getmembers()),output_filename,
extract_dir))
tar.extractall(path=dirname)
with open(extract_dir + '.tmp', 'a'):
tar.extractall(path=dirname)
os.remove(extract_dir + '.tmp')
# Set executable bit.
if sys.platform == 'cygwin':
# Under cygwin, mark all files as executable. The executable flag in
......
......@@ -295,6 +295,8 @@ class DownloadTests(unittest.TestCase):
shutil.rmtree(output_dirname)
sha1_hash = download_from_google_storage.get_sha1(output_filename)
input_filename = '%s/%s' % (self.base_url, sha1_hash)
# Initial download
self.queue.put((sha1_hash, output_filename))
self.queue.put((None, None))
stdout_queue = queue.Queue()
......@@ -322,6 +324,64 @@ class DownloadTests(unittest.TestCase):
self.assertTrue(os.path.exists(output_dirname))
self.assertTrue(os.path.exists(extracted_filename))
# Test noop download
self.queue.put((sha1_hash, output_filename))
self.queue.put((None, None))
stdout_queue = queue.Queue()
download_from_google_storage._downloader_worker_thread(0,
self.queue,
False,
self.base_url,
self.gsutil,
stdout_queue,
self.ret_codes,
True,
True,
delete=False)
self.assertEqual(list(stdout_queue.queue), [])
self.assertEqual(self.gsutil.history, expected_calls)
self.assertEqual(list(self.ret_codes.queue), [])
self.assertTrue(os.path.exists(output_dirname))
self.assertTrue(os.path.exists(extracted_filename))
# With dirty flag file, previous extraction wasn't complete
with open(os.path.join(self.base_path, 'subfolder.tmp'), 'a'):
pass
self.queue.put((sha1_hash, output_filename))
self.queue.put((None, None))
stdout_queue = queue.Queue()
download_from_google_storage._downloader_worker_thread(0,
self.queue,
False,
self.base_url,
self.gsutil,
stdout_queue,
self.ret_codes,
True,
True,
delete=False)
expected_calls += [('check_call', ('ls', input_filename)),
('check_call', ('cp', input_filename, output_filename))]
if sys.platform != 'win32':
expected_calls.append(
('check_call', ('stat', 'gs://sometesturl/%s' % sha1_hash)))
expected_output = [
'0> Detected tmp flag file for %s, re-downloading...' %
(output_filename),
'0> Downloading %s@%s...' % (output_filename, sha1_hash),
'0> Removed %s...' % (output_dirname),
'0> Extracting 3 entries from %s to %s' %
(output_filename, output_dirname),
]
expected_ret_codes = []
self.assertEqual(list(stdout_queue.queue), expected_output)
self.assertEqual(self.gsutil.history, expected_calls)
self.assertEqual(list(self.ret_codes.queue), expected_ret_codes)
self.assertTrue(os.path.exists(output_dirname))
self.assertTrue(os.path.exists(extracted_filename))
def test_download_worker_skips_not_found_file(self):
sha1_hash = '7871c8e24da15bad8b0be2c36edc9dc77e37727f'
input_filename = '%s/%s' % (self.base_url, sha1_hash)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment