Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
287 changes: 207 additions & 80 deletions nlmod/gwf/recharge.py

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion nlmod/read/jarkus.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,7 @@ def get_dataset_jarkus(extent, kind="jarkus", return_tiles=False, time=-1):
f"no time={time} in {kind}-tile with extent {extent_tile}"
)
tiles = tiles_left
z_dataset = xr.combine_by_coords(tiles, combine_attrs="drop")
z_dataset = xr.combine_by_coords(tiles, combine_attrs="drop", data_vars="all")
# drop 'lat' and 'lon' as these will create problems when resampling the data
z_dataset = z_dataset.drop_vars(["lat", "lon"])
return z_dataset
Expand Down
130 changes: 100 additions & 30 deletions nlmod/read/knmi.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@


@cache.cache_netcdf(coords_3d=True, coords_time=True)
def get_recharge(ds, oc_knmi=None, method="linear", most_common_station=False):
def get_recharge(
ds,
oc_knmi=None,
method="linear",
most_common_station=False,
add_stn_dimensions=False,
to_model_time=True,
):
"""Add recharge to model dataset from KNMI data.

Add recharge to the model dataset with knmi data by following these steps:
Expand Down Expand Up @@ -47,7 +54,7 @@ def get_recharge(ds, oc_knmi=None, method="linear", most_common_station=False):
method : str, optional
If 'linear', calculate recharge by subtracting evaporation from precipitation.
If 'separate', add precipitation as 'recharge' and evaporation as 'evaporation'.
The default is 'linear'.
Method is only used when `add_stn_dimensions=False`. The default is 'linear'.
most_common_station : bool, optional
When True, only download data from the station that is most common in the model
area. The default is False
Expand All @@ -66,12 +73,24 @@ def get_recharge(ds, oc_knmi=None, method="linear", most_common_station=False):
)

return discretize_knmi(
ds, oc_knmi, method=method, most_common_station=most_common_station
ds,
oc_knmi,
method=method,
most_common_station=most_common_station,
add_stn_dimensions=add_stn_dimensions,
to_model_time=to_model_time,
)


@cache.cache_netcdf(coords_3d=True, coords_time=True)
def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
def discretize_knmi(
ds,
oc_knmi,
method="linear",
most_common_station=True,
add_stn_dimensions=False,
to_model_time=True,
):
"""discretize knmi data to model grid

Create a dataset with recharge (and evaporation) data by following these steps:
Expand All @@ -98,10 +117,22 @@ def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
method : str, optional
If 'linear', calculate recharge by subtracting evaporation from precipitation.
If 'separate', add precipitation as 'recharge' and evaporation as 'evaporation'.
The default is 'linear'.
Method is only used when `add_stn_dimensions=False`. The default is 'linear'.
most_common_station : bool, optional
When True, only use data from the station that is most common in the model
area. The default is True
add_stn_dimensions : bool, optional
When True, add the dimension `time` to the variable `recharge` (and `evaporation`
when `method='seperate'`). When True, add dimension `stn_RD` and `stn_EV24` to
the variable `recharge` and `evaporation`, and add variables "recharge_stn" and
"evaporation_stn" that specify for every grid cell which KNMI-stations are used.
When `add_stn_dimensions` is False, specify recharge (and evaporation when
`method='seperate'`) for every gridcell. The default is False.
to_model_time : bool, optional
When True, resample the recharge and evaporation to the dimension `time` in ds.
When False, save the original times of the KNMI-data in variables `time_RD` and
`time_EV24`. `to_model_time=False` is only supported for
`add_stn_dimensions=True`. The default is True.

Returns
-------
Expand Down Expand Up @@ -136,7 +167,10 @@ def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
)

# get recharge data array
ds_out = util.get_ds_empty(ds, keep_coords=("time", "y", "x"))
keep_coords = ("y", "x")
if to_model_time:
keep_coords = ("time",) + keep_coords
ds_out = util.get_ds_empty(ds, keep_coords=keep_coords)
ds_out.attrs["gridtype"] = ds.gridtype

if is_structured(ds):
Expand All @@ -145,22 +179,58 @@ def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
dims = ("icell2d",)
else:
raise ValueError("gridtype should be structured or vertex")

if add_stn_dimensions:
nodata = -999
shape = [len(ds_out[dim]) for dim in dims]
variables = {"recharge": "stn_RD", "evaporation": "stn_EV24"}
for var in variables:
stn_var = variables[var]
ds_out[f"{var}_stn"] = dims, np.full(shape, nodata)
values = [int(x.split("_")[-1]) for x in locations[stn_var]]
if is_structured(ds):
ds_out[f"{var}_stn"].data[locations.row, locations.col] = values
elif is_vertex(ds):
ds_out[f"{var}_stn"].data[locations.index] = values
ds_out[f"{var}_stn"].attrs["nodata"] = nodata

stn_un = locations[stn_var].unique()
column = stn_var.split("_")[-1]
df = pd.DataFrame([x[column] for x in oc_knmi.loc[stn_un, "obs"]]).T
df.columns = [int(x.split("_")[-1]) for x in stn_un]
df.columns.name = stn_var
if to_model_time:
df = df.resample("D").nearest()
df.index.name = "time"
else:
df.index.name = f"time_{column}"
ds_out[var] = df
if to_model_time:
# make sure all attributes of ds.time are in ds_out.time
ds_out["time"] = ds.time
return ds_out

if not to_model_time:
raise (
NotImplementedError(
"`to_model_time=False` not implemented for `add_stn_dimensions=False`"
)
)
dims = ("time",) + dims
shape = [len(ds_out[dim]) for dim in dims]

if method in ["linear"]:
ds_out["recharge"] = dims, np.zeros(shape)

# find unique combination of precipitation and evaporation station
unique_combinations = locations.drop_duplicates(["stn_rd", "stn_ev24"])[
["stn_rd", "stn_ev24"]
unique_combinations = locations.drop_duplicates(["stn_RD", "stn_EV24"])[
["stn_RD", "stn_EV24"]
].values
if unique_combinations.shape[1] > 2:
# bug fix for pandas 2.1 where three columns are returned
unique_combinations = unique_combinations[:, :2]
for stn_rd, stn_ev24 in unique_combinations:
# get locations with the same prec and evap station
mask = (locations["stn_rd"] == stn_rd) & (locations["stn_ev24"] == stn_ev24)
mask = (locations["stn_RD"] == stn_rd) & (locations["stn_EV24"] == stn_ev24)
loc_sel = locations.loc[mask]

# calculate recharge time series
Expand All @@ -173,15 +243,15 @@ def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):

elif method == "separate":
ds_out["recharge"] = dims, np.zeros(shape)
for stn in locations["stn_rd"].unique():
for stn in locations["stn_RD"].unique():
ts = oc_knmi.loc[stn, "obs"]["RD"].resample("D").nearest()
loc_sel = locations.loc[(locations["stn_rd"] == stn)]
loc_sel = locations.loc[(locations["stn_RD"] == stn)]
_add_ts_to_ds(ts, loc_sel, "recharge", ds_out)

ds_out["evaporation"] = dims, np.zeros(shape)
for stn in locations["stn_ev24"].unique():
for stn in locations["stn_EV24"].unique():
ts = oc_knmi.loc[stn, "obs"]["EV24"].resample("D").nearest()
loc_sel = locations.loc[(locations["stn_ev24"] == stn)]
loc_sel = locations.loc[(locations["stn_EV24"] == stn)]
_add_ts_to_ds(ts, loc_sel, "evaporation", ds_out)
else:
raise (ValueError(f"Unknown method: {method}"))
Expand Down Expand Up @@ -398,8 +468,8 @@ def get_locations(ds, oc_knmi=None, most_common_station=False):
Returns
-------
locations : pd.DataFrame
each row contains a location (x and y) and the relevant precipitation (stn_rd)
and evaporation (stn_ev24) stations.
each row contains a location (x and y) and the relevant precipitation (stn_RD)
and evaporation (stn_EV24) stations.
"""
# get locations
if is_structured(ds):
Expand All @@ -410,30 +480,30 @@ def get_locations(ds, oc_knmi=None, most_common_station=False):
raise ValueError("gridtype should be structured or vertex")

if oc_knmi is not None:
locations["stn_rd"] = hpd_knmi.get_nearest_station_df(
locations["stn_RD"] = hpd_knmi.get_nearest_station_df(
locations, stations=oc_knmi.loc[oc_knmi["meteo_var"] == "RD"]
)
locations["stn_ev24"] = hpd_knmi.get_nearest_station_df(
locations["stn_EV24"] = hpd_knmi.get_nearest_station_df(
locations, stations=oc_knmi.loc[oc_knmi["meteo_var"] == "EV24"]
)
else:
locations["stn_rd"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="RD")
locations["stn_ev24"] = hpd_knmi.get_nearest_station_df(
locations["stn_RD"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="RD")
locations["stn_EV24"] = hpd_knmi.get_nearest_station_df(
locations, meteo_var="EV24"
)

if most_common_station:
if is_structured(ds):
# set the most common station to all locations
locations["stn_rd"] = locations["stn_rd"].value_counts().idxmax()
locations["stn_ev24"] = locations["stn_ev24"].value_counts().idxmax()
locations["stn_RD"] = locations["stn_RD"].value_counts().idxmax()
locations["stn_EV24"] = locations["stn_EV24"].value_counts().idxmax()
else:
# set the station with the largest area to all locations
if "area" not in ds:
ds["area"] = get_area(ds)
locations["area"] = ds["area"].loc[locations.index]
locations["stn_rd"] = locations.groupby("stn_rd").sum()["area"].idxmax()
locations["stn_ev24"] = locations.groupby("stn_ev24").sum()["area"].idxmax()
locations["stn_RD"] = locations.groupby("stn_RD").sum()["area"].idxmax()
locations["stn_EV24"] = locations.groupby("stn_EV24").sum()["area"].idxmax()

return locations

Expand All @@ -444,8 +514,8 @@ def _download_knmi_at_locations(locations, start=None, end=None):
Parameters
----------
locations : pd.DataFrame
each row contains a location (x and y) and the relevant precipitation (stn_rd)
and evaporation (stn_ev24) stations.
each row contains a location (x and y) and the relevant precipitation (stn_RD)
and evaporation (stn_EV24) stations.
start : str or datetime, optional
start date of measurements that you want, The default is '2010'.
end : str or datetime, optional
Expand All @@ -456,8 +526,8 @@ def _download_knmi_at_locations(locations, start=None, end=None):
oc_knmi
hpd.ObsCollection
"""
stns_rd = locations["stn_rd"].unique()
stns_ev24 = locations["stn_ev24"].unique()
stns_rd = locations["stn_RD"].unique()
stns_ev24 = locations["stn_EV24"].unique()

# get knmi data stations closest to any grid cell
olist = []
Expand Down Expand Up @@ -491,8 +561,8 @@ def get_knmi_at_locations(locations, ds=None, start=None, end=None):
Parameters
----------
locations : pd.DataFrame
each row contains a location (x and y) and the relevant precipitation (stn_rd)
and evaporation (stn_ev24) stations.
each row contains a location (x and y) and the relevant precipitation (stn_RD)
and evaporation (stn_EV24) stations.
ds : xr.DataSet or None, optional
dataset containing relevant time information. If None provide start and end.
start : str or datetime, optional
Expand Down
2 changes: 1 addition & 1 deletion nlmod/read/knmi_data_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -319,4 +319,4 @@ def read_dataset(
else:
raise ValueError(f"Can't read/handle file {file}")

return xr.concat(data, dim="time")
return xr.concat(data, dim="time", data_vars="all")
3 changes: 2 additions & 1 deletion nlmod/read/nhi.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,8 @@ def add_buisdrainage(
cond_method,
depth_method,
)
return ds.update(ds_out)
ds.update(ds_out)
return ds


def discretize_buisdrainage(
Expand Down
4 changes: 1 addition & 3 deletions nlmod/read/rws.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,8 +174,6 @@ def get_northsea(ds, gdf=None, da_name="northsea"):
Dataset with a single DataArray, this DataArray is 1 at sea and 0
everywhere else. Grid dimensions according to ds.
"""
if gdf is None:
gdf = get_gdf_surface_water(ds=ds)

warnings.warn(
"'get_northsea' is deprecated and will be removed in a future version. "
Expand All @@ -187,7 +185,7 @@ def get_northsea(ds, gdf=None, da_name="northsea"):


@cache.cache_netcdf(coords_2d=True)
def discretize_northsea(ds, gdf, da_name="northsea"):
def discretize_northsea(ds, gdf=None, da_name="northsea"):
"""Get Dataset which is 1 at the northsea and 0 everywhere else. Sea is defined by
rws surface water shapefile.

Expand Down
6 changes: 3 additions & 3 deletions tests/test_004_northsea.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def test_get_bathymetry_nosea():
def test_add_bathymetry_to_top_bot_kh_kv_seamodel():
# model with sea
ds = test_001_model.get_ds_from_cache("basic_sea_model")
ds.update(nlmod.read.rws.get_northsea(ds))
ds.update(nlmod.read.jarkus.get_bathymetry(ds, datavar_sea="northsea"))

ds.update(nlmod.read.rws.discretize_northsea(ds))
bathymetry = nlmod.read.jarkus.get_bathymetry(ds, datavar_sea="northsea")
ds.update(bathymetry.drop_vars("time"))
ds = nlmod.dims.add_bathymetry_to_layer_model(ds)
63 changes: 62 additions & 1 deletion tests/test_005_external_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,71 @@ def test_get_recharge_not_available():
ds = nlmod.get_ds([100000, 110000, 420000, 430000])
time = [pd.Timestamp.now().normalize()]
ds = nlmod.time.set_ds_time(ds, start=time[0] - pd.Timedelta(days=21), time=time)
with pytest.raises(KeyError):
with pytest.raises(ValueError):
ds.update(nlmod.read.knmi.get_recharge(ds))


def test_get_recharge_add_stn_dimensions():
ds = nlmod.get_ds(
[100000, 110000, 420000, 430000],
model_ws=os.path.join("models", "test_get_recharge_add_stn_dimensions"),
model_name="test",
)
# set the top left cell to incactive, to test this functionality as well
ds["active_domain"] = ds["area"] > 0
ds["active_domain"].data[0, 0] = False
time = pd.date_range("2024", "2025")
ds = nlmod.time.set_ds_time(ds, start="2023", time=time)
ds.update(nlmod.read.knmi.get_recharge(ds, add_stn_dimensions=True))

# create simulation
sim = nlmod.sim.sim(ds)
_ = nlmod.sim.tdis(ds, sim)
gwf = nlmod.gwf.gwf(ds, sim)
_ = nlmod.sim.ims(sim)
_ = nlmod.gwf.dis(ds, gwf)
_ = nlmod.gwf.npf(ds, gwf)
_ = nlmod.gwf.ic(ds, gwf, starting_head=1.0)
_ = nlmod.gwf.rch(ds, gwf)
_ = nlmod.gwf.evt(ds, gwf)

# do not run, as this takes a lot of time
# _ = nlmod.gwf.drn(ds, gwf, elev='top', cond='area')
# nlmod.sim.write_and_run(sim, ds, write_ds=False)

spd = gwf.rch.stress_period_data.data
assert len(spd) == 1
assert len(spd[0]) == 10000 - 1 # one inactive cell
assert spd[0]["recharge"].dtype == object

spd = gwf.evt.stress_period_data.data
assert len(spd) == 1
assert len(spd[0]) == 10000 - 1 # one inactive cell
assert spd[0]["rate"].dtype == object


def test_add_recharge_as_float():
ds = nlmod.get_ds(
[100000, 110000, 420000, 430000],
model_ws=os.path.join("models", "test_add_recharge_as_float"),
model_name="test",
)
time = pd.date_range("2024", "2025")
ds = nlmod.time.set_ds_time(ds, start="2023", time=time)

sim = nlmod.sim.sim(ds)
_ = nlmod.sim.tdis(ds, sim)
gwf = nlmod.gwf.gwf(ds, sim)
_ = nlmod.sim.ims(sim)
_ = nlmod.gwf.dis(ds, gwf)
_ = nlmod.gwf.rch(ds, gwf, recharge=0.1)

spd = gwf.rch.stress_period_data.data
assert len(spd) == 1
assert len(spd[0]) == 10000
assert (spd[0]["recharge"] == 0.1).all()


def test_ahn_within_extent():
extent = [104000.0, 105000.0, 494000.0, 494600.0]
da = nlmod.read.ahn.download_latest_ahn_from_wcs(extent)
Expand Down
Loading