Skip to content

Commit 4e28107

Browse files
committed
Use generic_open for unified compressed/uncompressed file handling
Replaced the custom logic for handling externally zipped NetCDF files with Satpy's generic_open helper, which transparently supports both compressed and uncompressed inputs. This simplifies the reader implementation and removes compression-detection code. Because generic_open uses a context manager and closes the file as soon as the reader exits the `with` block, the dataset must now be fully loaded immediately. The reader therefore calls `.compute()` on the xarray dataset to ensure all data is available after the context closes.
1 parent fa7c775 commit 4e28107

File tree

2 files changed

+60
-98
lines changed

2 files changed

+60
-98
lines changed

satpy/readers/hsaf_nc.py

Lines changed: 16 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -24,32 +24,30 @@
2424
2525
Currently, this reader supports the following products:
2626
* **H60/H60B** – Blended GEO/IR and LEO/MW instantaneous precipitation over
27-
the full Meteosat (0°) disk.
27+
the full Meteosat (0°) disk.
2828
* **H63** – Blended GEO/IR and LEO/MW instantaneous precipitation over the
2929
Meteosat IODC (Indian Ocean) disk.
3030
* **H90** – Accumulated Precipitation at ground by blended MW and IR IODC
3131
over the Meteosat IODC (Indian Ocean) disk.
3232
3333
Notes:
34-
- Externally compressed files with the ``.gz`` suffix are automatically
35-
uncompressed to the same directory and deleted on reader close.
36-
- The reader relies on area definitions provided in the accompanying YAML
37-
configuration file.
38-
- Supports ``rain_rate`` and ``q_ind`` datasets.
39-
- Output coverage and resolution correspond to the MSG-SEVIRI grid.
34+
* Externally compressed files (.gz) and uncompressed files are handled
35+
transparently via Satpys ``generic_open``, without creating temporary
36+
decompressed files on disk.
37+
* The reader uses area definitions specified in the associated YAML
38+
configuration file to determine projection, coverage, and resolution.
39+
* Provides access to the ``rain_rate`` and ``q_ind`` datasets (including
40+
accumulated variants where available).
4041
"""
4142

4243
import datetime as dt
43-
import gzip
4444
import logging
45-
import shutil
46-
import tempfile
47-
from pathlib import Path
4845

4946
import xarray as xr
5047

5148
from satpy.area import get_area_def
5249
from satpy.readers.core.file_handlers import BaseFileHandler
50+
from satpy.readers.core.utils import generic_open
5351
from satpy.utils import get_legacy_chunk_size
5452

5553
LOG = logging.getLogger(__name__)
@@ -62,58 +60,6 @@
6260

6361
CHUNK_SIZE = get_legacy_chunk_size()
6462

65-
def gunzip(source, destination):
66-
"""Unzips an externally compressed HSAF file."""
67-
with gzip.open(source) as s:
68-
with open(destination, "wb") as d:
69-
shutil.copyfileobj(s, d, 10 * 1024 * 1024)
70-
71-
def create_named_empty_file(source):
72-
"""Create an empty file."""
73-
# Use delete=False for cross-platform safety
74-
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".nc")
75-
tmp.close() # we only needed the filename
76-
LOG.debug(f"Created temporary file {Path(tmp.name)} from {source}")
77-
return Path(tmp.name)
78-
79-
80-
class HSAFFileWrapper:
81-
"""Wrapper for a H SAF NetCDF file for handling external compression if an external gzip layer exist."""
82-
83-
def __init__(self, filename):
84-
"""Opens the nc file and stores the nc data."""
85-
self.filename = filename
86-
self._tmp_file = None
87-
self._compressed = Path(self.filename).suffix == ".gz"
88-
89-
if self._compressed:
90-
self._tmp_file = create_named_empty_file(self.filename)
91-
gunzip(self.filename, self._tmp_file)
92-
self.filename = str(self._tmp_file)
93-
94-
# Open dataset
95-
chunks = CHUNK_SIZE
96-
LOG.debug(f"Opening HSAF file {self.filename}")
97-
self.nc_data = xr.open_dataset(self.filename, decode_cf=True, chunks=chunks)
98-
99-
100-
def close(self):
101-
"""Close the nc file and clean up temp file if needed."""
102-
if self.nc_data is not None:
103-
self.nc_data.close()
104-
self.nc_data = None
105-
LOG.debug("Closed NetCDF dataset")
106-
107-
if self._compressed and self._tmp_file is not None:
108-
try:
109-
self._tmp_file.unlink(missing_ok=True)
110-
LOG.debug(f"Deleted temporary file {self._tmp_file}")
111-
except Exception as e:
112-
LOG.warning(f"Failed to delete temp file {self._tmp_file}: {e}")
113-
finally:
114-
self._tmp_file = None
115-
116-
11763
class HSAFNCFileHandler(BaseFileHandler):
11864
"""Handle H SAF NetCDF files, with optional .gz external compression.
11965
@@ -129,22 +75,19 @@ class HSAFNCFileHandler(BaseFileHandler):
12975
def __init__(self, filename, filename_info, filetype_info):
13076
"""Create a wrapper to opens the nc file and store the nc data."""
13177
super().__init__(filename, filename_info, filetype_info)
132-
self._wrapper = HSAFFileWrapper(filename)
13378
self._area_name = None
13479
self._lon_0 = None
135-
136-
def close(self):
137-
"""Clean up by closing the dataset."""
138-
self._wrapper.close()
80+
with generic_open(filename, mode="rb", compression="infer") as fp:
81+
self._nc_data = xr.open_dataset(fp, chunks="auto", engine="h5netcdf").compute()
13982

14083
def _get_global_attributes(self):
14184
"""Create a dictionary of global attributes to be added to all datasets."""
14285
attributes = {
14386
"filename": self.filename,
14487
"start_time": self.start_time,
14588
"end_time": self.end_time,
146-
"spacecraft_name": platform_translate.get(self._wrapper.nc_data.attrs["satellite_identifier"], "N/A"),
147-
"platform_name": platform_translate.get(self._wrapper.nc_data.attrs["satellite_identifier"], "N/A"),
89+
"spacecraft_name": platform_translate.get(self._nc_data.attrs["satellite_identifier"], "N/A"),
90+
"platform_name": platform_translate.get(self._nc_data.attrs["satellite_identifier"], "N/A"),
14891
}
14992

15093
return attributes
@@ -163,8 +106,8 @@ def get_dataset(self, dataset_id, dataset_info):
163106
"""Get a dataset from the file."""
164107
# Get the variable
165108
var_name = dataset_info.get("file_key", dataset_id["name"])
166-
LOG.debug(f"Getting dataset {var_name} from file {self._wrapper.filename}")
167-
data = self._wrapper.nc_data[var_name]
109+
LOG.debug(f"Getting dataset {var_name} from file {self.filename}")
110+
data = self._nc_data[var_name]
168111

169112
data = self._standarize_dims(data)
170113

@@ -181,7 +124,7 @@ def get_dataset(self, dataset_id, dataset_info):
181124

182125
data.attrs["resolution"] = self.filetype_info["resolution"]
183126
self._area_name = self.filetype_info["area"]
184-
self._lon_0 = float(self._wrapper.nc_data.attrs["sub_satellite_longitude"].rstrip("f"))
127+
self._lon_0 = float(self._nc_data.attrs["sub_satellite_longitude"].rstrip("f"))
185128

186129
# Add global attributes which are shared across datasets
187130
data.attrs.update(self._get_global_attributes())

satpy/tests/reader_tests/test_hsaf_nc.py

Lines changed: 44 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
"""Tests for the H-SAF NC reader."""
22
import datetime as dt
3+
import io
34
import os
5+
from collections import namedtuple
46
from unittest import mock
57

68
import numpy as np
@@ -35,6 +37,12 @@
3537
}
3638
}
3739

40+
# Avoid too many arguments for test_load_datasets
41+
LoadDatasetsParams = namedtuple(
42+
"LoadDatasetsParams",
43+
["file_type", "loadable_ids", "unit", "resolution", "area_name"]
44+
)
45+
3846
# constants for fake test data
3947
DEFAULT_SHAPE = (5, 5)
4048
rng = np.random.default_rng()
@@ -78,48 +86,55 @@ def setup_method(self):
7886
)
7987
def test_reader_creation(self, file_type, expected_loadables):
8088
"""Test that the reader can create file handlers."""
81-
with mock.patch("satpy.readers.hsaf_nc.xr.open_dataset") as od:
82-
od.side_effect = fake_hsaf_dataset
89+
with mock.patch("satpy.readers.hsaf_nc.generic_open") as mock_generic_open, \
90+
mock.patch("satpy.readers.hsaf_nc.xr.open_dataset") as mock_open_dataset:
91+
92+
mock_generic_open.return_value.__enter__.return_value = io.BytesIO(b"fake data")
93+
mock_open_dataset.side_effect = fake_hsaf_dataset
8394

8495
loadables = file_type["reader"].select_files_from_pathnames([file_type["fake_file"]])
85-
file_type["reader"].create_filehandlers(loadables)
96+
file_handlers = file_type["reader"].create_filehandlers(loadables)
8697

87-
assert len(loadables) == expected_loadables
98+
assert len(file_handlers) == expected_loadables
8899
assert file_type["reader"].file_handlers, "No file handlers created"
89100

90101
@pytest.mark.parametrize(
91-
("file_type", "loadable_ids", "unit", "resolution", "area_name"),
102+
"params",
92103
[
93-
(FILE_PARAMS[FILE_TYPE_H60], ["rr", "qind"], "mm/h", 3000, "msg_seviri_fes_3km"),
94-
(FILE_PARAMS[FILE_TYPE_H63], ["rr", "qind"], "mm/h", 3000, "msg_seviri_iodc_3km"),
95-
(FILE_PARAMS[FILE_TYPE_H90], ["acc_rr", "qind"], "mm", 3000, "msg_seviri_iodc_3km"),
104+
LoadDatasetsParams(FILE_PARAMS[FILE_TYPE_H60], ["rr", "qind"], "mm/h", 3000, "msg_seviri_fes_3km"),
105+
LoadDatasetsParams(FILE_PARAMS[FILE_TYPE_H63], ["rr", "qind"], "mm/h", 3000, "msg_seviri_iodc_3km"),
106+
LoadDatasetsParams(FILE_PARAMS[FILE_TYPE_H90], ["acc_rr", "qind"], "mm", 3000, "msg_seviri_iodc_3km"),
96107
],
97108
)
98-
def test_load_datasets(self, file_type, loadable_ids, unit, resolution, area_name):
109+
def test_load_datasets(self, params):
99110
"""Test that datasets can be loaded correctly."""
100-
with mock.patch("satpy.readers.hsaf_nc.xr.open_dataset") as od:
101-
od.side_effect = fake_hsaf_dataset
102-
loadables = file_type["reader"].select_files_from_pathnames([file_type["fake_file"]])
103-
file_type["reader"].create_filehandlers(loadables)
111+
with mock.patch("satpy.readers.hsaf_nc.generic_open") as mock_generic_open, \
112+
mock.patch("satpy.readers.hsaf_nc.xr.open_dataset") as mock_open_dataset:
104113

105-
datasets = file_type["reader"].load(loadable_ids)
114+
mock_generic_open.return_value.__enter__.return_value = io.BytesIO(b"fake data")
115+
mock_open_dataset.side_effect = fake_hsaf_dataset
116+
117+
loadables = params.file_type["reader"].select_files_from_pathnames([params.file_type["fake_file"]])
118+
params.file_type["reader"].create_filehandlers(loadables)
119+
120+
datasets = params.file_type["reader"].load(params.loadable_ids)
106121
dataset_names = {d["name"] for d in datasets.keys()}
107-
assert dataset_names == set(loadable_ids)
122+
assert dataset_names == set(params.loadable_ids)
108123

109124
# check array shapes and types
110-
assert datasets[loadable_ids[0]].shape == DEFAULT_SHAPE
111-
assert datasets[loadable_ids[1]].shape == DEFAULT_SHAPE
112-
assert np.issubdtype(datasets[loadable_ids[0]].dtype, np.floating)
113-
assert np.issubdtype(datasets[loadable_ids[1]].dtype, np.integer)
125+
assert datasets[params.loadable_ids[0]].shape == DEFAULT_SHAPE
126+
assert datasets[params.loadable_ids[1]].shape == DEFAULT_SHAPE
127+
assert np.issubdtype(datasets[params.loadable_ids[0]].dtype, np.floating)
128+
assert np.issubdtype(datasets[params.loadable_ids[1]].dtype, np.integer)
114129

115-
data = datasets[loadable_ids[0]]
130+
data = datasets[params.loadable_ids[0]]
116131
assert data.attrs["spacecraft_name"] == "Meteosat-8"
117132
assert data.attrs["platform_name"] == "Meteosat-8"
118-
assert data.attrs["units"] == unit
119-
assert data.attrs["resolution"] == resolution
133+
assert data.attrs["units"] == params.unit
134+
assert data.attrs["resolution"] == params.resolution
120135
assert data.attrs["start_time"] == dt.datetime(2025, 11, 5, 0, 0)
121136
assert data.attrs["end_time"] == dt.datetime(2025, 11, 5, 0, 15)
122-
assert data.attrs["area"].area_id == area_name
137+
assert data.attrs["area"].area_id == params.area_name
123138
assert data.dims == ("y", "x")
124139

125140

@@ -152,8 +167,12 @@ def test_real_hsaf_file(self, file_type, loadable_ids):
152167

153168
def test_get_area_def(self):
154169
"""Test that the loaded dataset has a AreaDefinition and overwrite of lon_0 of the area works correctly."""
155-
with mock.patch("satpy.readers.hsaf_nc.xr.open_dataset") as od:
156-
od.side_effect = fake_hsaf_dataset
170+
with mock.patch("satpy.readers.hsaf_nc.generic_open") as mock_generic_open, \
171+
mock.patch("satpy.readers.hsaf_nc.xr.open_dataset") as mock_open_dataset:
172+
173+
mock_generic_open.return_value.__enter__.return_value = io.BytesIO(b"fake data")
174+
mock_open_dataset.side_effect = fake_hsaf_dataset
175+
157176
file_type = FILE_PARAMS[FILE_TYPE_H63]
158177
loadables = file_type["reader"].select_files_from_pathnames([file_type["fake_file"]])
159178
file_type["reader"].create_filehandlers(loadables)

0 commit comments

Comments
 (0)