diff --git a/google/cloud/storage/_media/requests/download.py b/google/cloud/storage/_media/requests/download.py index 2c1b9392c..6222148b3 100644 --- a/google/cloud/storage/_media/requests/download.py +++ b/google/cloud/storage/_media/requests/download.py @@ -43,6 +43,12 @@ Please restart the download. """ +_RESPONSE_HEADERS_INFO = """\ +The X-Goog-Stored-Content-Length is {}. The X-Goog-Stored-Content-Encoding is {}. +The download request read {} bytes of data. +If the download was incomplete, please check the network connection and restart the download. +""" + class Download(_request_helpers.RequestsMixin, _download.Download): """Helper to manage downloading a resource from a Google API. @@ -141,13 +147,30 @@ def _write_to_stream(self, response): ): actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) if actual_checksum != expected_checksum: - msg = _CHECKSUM_MISMATCH.format( - self.media_url, - expected_checksum, - actual_checksum, - checksum_type=self.checksum.upper(), + headers = self._get_headers(response) + x_goog_encoding = headers.get("x-goog-stored-content-encoding") + x_goog_length = headers.get("x-goog-stored-content-length") + content_length_msg = _RESPONSE_HEADERS_INFO.format( + x_goog_length, x_goog_encoding, self._bytes_downloaded ) - raise DataCorruption(response, msg) + if ( + x_goog_length + and self._bytes_downloaded < int(x_goog_length) + and x_goog_encoding != "gzip" + ): + # The library will attempt to trigger a retry by raising a ConnectionError, if + # (a) bytes_downloaded is less than response header x-goog-stored-content-length, and + # (b) the object is not gzip-compressed when stored in Cloud Storage. + raise ConnectionError(content_length_msg) + else: + msg = _CHECKSUM_MISMATCH.format( + self.media_url, + expected_checksum, + actual_checksum, + checksum_type=self.checksum.upper(), + ) + msg += content_length_msg + raise DataCorruption(response, msg) def consume( self, @@ -339,13 +362,31 @@ def _write_to_stream(self, response): actual_checksum = _helpers.prepare_checksum_digest(checksum_object.digest()) if actual_checksum != expected_checksum: - msg = _CHECKSUM_MISMATCH.format( - self.media_url, - expected_checksum, - actual_checksum, - checksum_type=self.checksum.upper(), + headers = self._get_headers(response) + x_goog_encoding = headers.get("x-goog-stored-content-encoding") + x_goog_length = headers.get("x-goog-stored-content-length") + content_length_msg = _RESPONSE_HEADERS_INFO.format( + x_goog_length, x_goog_encoding, self._bytes_downloaded ) - raise DataCorruption(response, msg) + if ( + x_goog_length + and self._bytes_downloaded < int(x_goog_length) + and x_goog_encoding != "gzip" + ): + # The library will attempt to trigger a retry by raising a ConnectionError, if + # (a) bytes_downloaded is less than response header x-goog-stored-content-length, and + # (b) the object is not gzip-compressed when stored in Cloud Storage. + raise ConnectionError(content_length_msg) + else: + msg = _CHECKSUM_MISMATCH.format( + self.media_url, + expected_checksum, + actual_checksum, + checksum_type=self.checksum.upper(), + ) + msg += content_length_msg + raise DataCorruption(response, msg) + def consume( self, diff --git a/tests/resumable_media/system/requests/test_download.py b/tests/resumable_media/system/requests/test_download.py index 15fe7d2c0..04c7246f6 100644 --- a/tests/resumable_media/system/requests/test_download.py +++ b/tests/resumable_media/system/requests/test_download.py @@ -463,7 +463,7 @@ def test_corrupt_download(self, add_files, corrupting_transport, checksum): info[checksum], checksum_type=checksum.upper(), ) - assert exc_info.value.args == (msg,) + assert msg in exc_info.value.args[0] def test_corrupt_download_no_check(self, add_files, corrupting_transport): for info in ALL_FILES: diff --git a/tests/resumable_media/unit/requests/test_download.py b/tests/resumable_media/unit/requests/test_download.py index 3da234a29..568d3238c 100644 --- a/tests/resumable_media/unit/requests/test_download.py +++ b/tests/resumable_media/unit/requests/test_download.py @@ -124,7 +124,11 @@ def test__write_to_stream_with_hash_check_fail(self, checksum): msg = download_mod._CHECKSUM_MISMATCH.format( EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper() ) - assert error.args[0] == msg + assert msg in error.args[0] + assert ( + f"The download request read {download._bytes_downloaded} bytes of data." + in error.args[0] + ) # Check mocks. response.__enter__.assert_called_once_with() @@ -186,6 +190,29 @@ def test__write_to_stream_with_invalid_checksum_type(self): error = exc_info.value assert error.args[0] == "checksum must be ``'md5'``, ``'crc32c'`` or ``None``" + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__write_to_stream_incomplete_read(self, checksum): + stream = io.BytesIO() + download = download_mod.Download(EXAMPLE_URL, stream=stream, checksum=checksum) + + chunk1 = b"first chunk" + mock_full_content_length = len(chunk1) + 123 + headers = {"x-goog-stored-content-length": mock_full_content_length} + bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ==" + header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum) + headers[_helpers._HASH_HEADER] = header_value + response = _mock_response(chunks=[chunk1], headers=headers) + + with pytest.raises(ConnectionError) as exc_info: + download._write_to_stream(response) + + assert not download.finished + error = exc_info.value + assert ( + f"The download request read {download._bytes_downloaded} bytes of data." + in error.args[0] + ) + def _consume_helper( self, stream=None, @@ -304,7 +331,11 @@ def test_consume_with_stream_hash_check_fail(self, checksum): msg = download_mod._CHECKSUM_MISMATCH.format( EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper() ) - assert error.args[0] == msg + assert msg in error.args[0] + assert ( + f"The download request read {download._bytes_downloaded} bytes of data." + in error.args[0] + ) # Check mocks. transport.request.assert_called_once_with( @@ -599,7 +630,11 @@ def test__write_to_stream_with_hash_check_fail(self, checksum): msg = download_mod._CHECKSUM_MISMATCH.format( EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper() ) - assert error.args[0] == msg + assert msg in error.args[0] + assert ( + f"The download request read {download._bytes_downloaded} bytes of data." + in error.args[0] + ) # Check mocks. response.__enter__.assert_called_once_with() @@ -632,6 +667,31 @@ def test__write_to_stream_with_invalid_checksum_type(self): error = exc_info.value assert error.args[0] == "checksum must be ``'md5'``, ``'crc32c'`` or ``None``" + @pytest.mark.parametrize("checksum", ["md5", "crc32c"]) + def test__write_to_stream_incomplete_read(self, checksum): + stream = io.BytesIO() + download = download_mod.RawDownload( + EXAMPLE_URL, stream=stream, checksum=checksum + ) + + chunk1 = b"first chunk" + mock_full_content_length = len(chunk1) + 123 + headers = {"x-goog-stored-content-length": mock_full_content_length} + bad_checksum = "d3JvbmcgbiBtYWRlIHVwIQ==" + header_value = "crc32c={bad},md5={bad}".format(bad=bad_checksum) + headers[_helpers._HASH_HEADER] = header_value + response = _mock_raw_response(chunks=[chunk1], headers=headers) + + with pytest.raises(ConnectionError) as exc_info: + download._write_to_stream(response) + + assert not download.finished + error = exc_info.value + assert ( + f"The download request read {download._bytes_downloaded} bytes of data." + in error.args[0] + ) + def _consume_helper( self, stream=None, @@ -754,7 +814,11 @@ def test_consume_with_stream_hash_check_fail(self, checksum): msg = download_mod._CHECKSUM_MISMATCH.format( EXAMPLE_URL, bad_checksum, good_checksum, checksum_type=checksum.upper() ) - assert error.args[0] == msg + assert msg in error.args[0] + assert ( + f"The download request read {download._bytes_downloaded} bytes of data." + in error.args[0] + ) # Check mocks. transport.request.assert_called_once_with(