Skip to content

Commit 8a68acb

Browse files
mfschwartzdhermes
authored andcommitted
Cherry-pick of 7c770c8
Done via: $ git cherry-pick 7c770c8 Catch checksum validation failure from download, delete corrupt file (#4133)
1 parent 329b14c commit 8a68acb

3 files changed

Lines changed: 83 additions & 3 deletions

File tree

storage/google/cloud/storage/blob.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,8 +478,13 @@ def download_to_filename(self, filename, client=None):
478478
479479
:raises: :class:`google.cloud.exceptions.NotFound`
480480
"""
481-
with open(filename, 'wb') as file_obj:
482-
self.download_to_file(file_obj, client=client)
481+
try:
482+
with open(filename, 'wb') as file_obj:
483+
self.download_to_file(file_obj, client=client)
484+
except resumable_media.DataCorruption as exc:
485+
# Delete the corrupt downloaded file.
486+
os.remove(filename)
487+
raise
483488

484489
updated = self.updated
485490
if updated is not None:

storage/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
REQUIREMENTS = [
5454
'google-cloud-core >= 0.27.0, < 0.28dev',
5555
'google-auth >= 1.0.0',
56-
'google-resumable-media >= 0.2.3',
56+
'google-resumable-media >= 0.3.0',
5757
'requests >= 2.18.0',
5858
]
5959

storage/tests/unit/test_blob.py

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,13 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15+
import base64
1516
import datetime
17+
import hashlib
1618
import io
1719
import json
1820
import os
21+
import tempfile
1922
import unittest
2023

2124
import mock
@@ -607,6 +610,78 @@ def test_download_to_filename(self):
607610
def test_download_to_filename_wo_updated(self):
608611
self._download_to_filename_helper()
609612

613+
def test_download_to_filename_corrupted(self):
614+
from google.resumable_media import DataCorruption
615+
from google.resumable_media.requests.download import _CHECKSUM_MISMATCH
616+
617+
blob_name = 'blob-name'
618+
transport = mock.Mock(spec=['request'])
619+
empty_hash = base64.b64encode(
620+
hashlib.md5(b'').digest()).decode(u'utf-8')
621+
headers = {'x-goog-hash': 'md5=' + empty_hash}
622+
response = mock.MagicMock(
623+
headers=headers,
624+
status_code=http_client.OK,
625+
spec=[
626+
'__enter__',
627+
'__exit__',
628+
'headers',
629+
'iter_content',
630+
'status_code',
631+
],
632+
)
633+
# i.e. context manager returns ``self``.
634+
response.__enter__.return_value = response
635+
response.__exit__.return_value = None
636+
chunks = (b'noms1', b'coooookies2')
637+
response.iter_content.return_value = iter(chunks)
638+
639+
transport.request.return_value = response
640+
# Create a fake client/bucket and use them in the Blob() constructor.
641+
client = mock.Mock(_http=transport, spec=['_http'])
642+
bucket = mock.Mock(
643+
client=client,
644+
user_project=None,
645+
spec=['client', 'user_project'],
646+
)
647+
media_link = 'http://example.com/media/'
648+
properties = {'mediaLink': media_link}
649+
blob = self._make_one(blob_name, bucket=bucket, properties=properties)
650+
# Make sure the download is **not** chunked.
651+
self.assertIsNone(blob.chunk_size)
652+
653+
# Make sure the hash will be wrong.
654+
content = b''.join(chunks)
655+
expected_hash = base64.b64encode(
656+
hashlib.md5(content).digest()).decode(u'utf-8')
657+
self.assertNotEqual(empty_hash, expected_hash)
658+
659+
# Try to download into a temporary file (don't use
660+
# `_NamedTemporaryFile` it will try to remove after the file is
661+
# already removed)
662+
filehandle, filename = tempfile.mkstemp()
663+
os.close(filehandle)
664+
with self.assertRaises(DataCorruption) as exc_info:
665+
blob.download_to_filename(filename)
666+
667+
msg = _CHECKSUM_MISMATCH.format(media_link, empty_hash, expected_hash)
668+
self.assertEqual(exc_info.exception.args, (msg,))
669+
# Make sure the file was cleaned up.
670+
self.assertFalse(os.path.exists(filename))
671+
672+
# Check the mocks.
673+
response.__enter__.assert_called_once_with()
674+
response.__exit__.assert_called_once_with(None, None, None)
675+
response.iter_content.assert_called_once_with(
676+
chunk_size=8192, decode_unicode=False)
677+
transport.request.assert_called_once_with(
678+
'GET',
679+
media_link,
680+
data=None,
681+
headers={'accept-encoding': 'gzip'},
682+
stream=True,
683+
)
684+
610685
def test_download_to_filename_w_key(self):
611686
import os
612687
import time

0 commit comments

Comments
 (0)