gwmod · rubencalje · Nov 19, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 29, 2025
diff --git a/nlmod/gwf/recharge.py b/nlmod/gwf/recharge.py
diff --git a/nlmod/read/jarkus.py b/nlmod/read/jarkus.py
@@ -275,7 +275,7 @@ def get_dataset_jarkus(extent, kind="jarkus", return_tiles=False, time=-1):
                         f"no time={time} in {kind}-tile with extent {extent_tile}"
                     )
             tiles = tiles_left
-    z_dataset = xr.combine_by_coords(tiles, combine_attrs="drop")
+    z_dataset = xr.combine_by_coords(tiles, combine_attrs="drop", data_vars="all")
     # drop 'lat' and 'lon' as these will create problems when resampling the data
     z_dataset = z_dataset.drop_vars(["lat", "lon"])
     return z_dataset

diff --git a/nlmod/read/knmi.py b/nlmod/read/knmi.py
@@ -18,7 +18,14 @@
 
 
 @cache.cache_netcdf(coords_3d=True, coords_time=True)
-def get_recharge(ds, oc_knmi=None, method="linear", most_common_station=False):
+def get_recharge(
+    ds,
+    oc_knmi=None,
+    method="linear",
+    most_common_station=False,
+    add_stn_dimensions=False,
+    to_model_time=True,
+):
     """Add recharge to model dataset from KNMI data.
 
     Add recharge to the model dataset with knmi data by following these steps:
@@ -47,7 +54,7 @@ def get_recharge(ds, oc_knmi=None, method="linear", most_common_station=False):
     method : str, optional
         If 'linear', calculate recharge by subtracting evaporation from precipitation.
         If 'separate', add precipitation as 'recharge' and evaporation as 'evaporation'.
-        The default is 'linear'.
+        Method is only used when `add_stn_dimensions=False`. The default is 'linear'.
     most_common_station : bool, optional
         When True, only download data from the station that is most common in the model
         area. The default is False
@@ -66,12 +73,24 @@ def get_recharge(ds, oc_knmi=None, method="linear", most_common_station=False):
         )
 
     return discretize_knmi(
-        ds, oc_knmi, method=method, most_common_station=most_common_station
+        ds,
+        oc_knmi,
+        method=method,
+        most_common_station=most_common_station,
+        add_stn_dimensions=add_stn_dimensions,
+        to_model_time=to_model_time,
     )
 
 
 @cache.cache_netcdf(coords_3d=True, coords_time=True)
-def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
+def discretize_knmi(
+    ds,
+    oc_knmi,
+    method="linear",
+    most_common_station=True,
+    add_stn_dimensions=False,
+    to_model_time=True,
+):
     """discretize knmi data to model grid
 
     Create a dataset with recharge (and evaporation) data by following these steps:
@@ -98,10 +117,22 @@ def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
     method : str, optional
         If 'linear', calculate recharge by subtracting evaporation from precipitation.
         If 'separate', add precipitation as 'recharge' and evaporation as 'evaporation'.
-        The default is 'linear'.
+        Method is only used when `add_stn_dimensions=False`. The default is 'linear'.
     most_common_station : bool, optional
         When True, only use data from the station that is most common in the model
         area. The default is True
+    add_stn_dimensions : bool, optional
+        When True, add the dimension `time` to the variable `recharge` (and `evaporation`
+        when `method='seperate'`). When True, add dimension `stn_RD` and `stn_EV24` to
+        the variable `recharge` and `evaporation`, and add variables "recharge_stn" and
+        "evaporation_stn" that specify for every grid cell which KNMI-stations are used.
+        When `add_stn_dimensions` is False, specify recharge (and evaporation when
+        `method='seperate'`) for every gridcell. The default is False.
+    to_model_time : bool, optional
+        When True, resample the recharge and evaporation to the dimension `time` in ds.
+        When False, save the original times of the KNMI-data in variables `time_RD` and
+        `time_EV24`. `to_model_time=False` is only supported for
+        `add_stn_dimensions=True`. The default is True.
 
     Returns
     -------
@@ -136,7 +167,10 @@ def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
     )
 
     # get recharge data array
-    ds_out = util.get_ds_empty(ds, keep_coords=("time", "y", "x"))
+    keep_coords = ("y", "x")
+    if to_model_time:
+        keep_coords = ("time",) + keep_coords
+    ds_out = util.get_ds_empty(ds, keep_coords=keep_coords)
     ds_out.attrs["gridtype"] = ds.gridtype
 
     if is_structured(ds):
@@ -145,22 +179,58 @@ def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
         dims = ("icell2d",)
     else:
         raise ValueError("gridtype should be structured or vertex")
+
+    if add_stn_dimensions:
+        nodata = -999
+        shape = [len(ds_out[dim]) for dim in dims]
+        variables = {"recharge": "stn_RD", "evaporation": "stn_EV24"}
+        for var in variables:
+            stn_var = variables[var]
+            ds_out[f"{var}_stn"] = dims, np.full(shape, nodata)
+            values = [int(x.split("_")[-1]) for x in locations[stn_var]]
+            if is_structured(ds):
+                ds_out[f"{var}_stn"].data[locations.row, locations.col] = values
+            elif is_vertex(ds):
+                ds_out[f"{var}_stn"].data[locations.index] = values
+            ds_out[f"{var}_stn"].attrs["nodata"] = nodata
+
+            stn_un = locations[stn_var].unique()
+            column = stn_var.split("_")[-1]
+            df = pd.DataFrame([x[column] for x in oc_knmi.loc[stn_un, "obs"]]).T
+            df.columns = [int(x.split("_")[-1]) for x in stn_un]
+            df.columns.name = stn_var
+            if to_model_time:
+                df = df.resample("D").nearest()
+                df.index.name = "time"
+            else:
+                df.index.name = f"time_{column}"
+            ds_out[var] = df
+        if to_model_time:
+            # make sure all attributes of ds.time are in ds_out.time
+            ds_out["time"] = ds.time
+        return ds_out
+
+    if not to_model_time:
+        raise (
+            NotImplementedError(
+                "`to_model_time=False` not implemented for `add_stn_dimensions=False`"
+            )
+        )
     dims = ("time",) + dims
     shape = [len(ds_out[dim]) for dim in dims]
-
     if method in ["linear"]:
         ds_out["recharge"] = dims, np.zeros(shape)
 
         # find unique combination of precipitation and evaporation station
-        unique_combinations = locations.drop_duplicates(["stn_rd", "stn_ev24"])[
-            ["stn_rd", "stn_ev24"]
+        unique_combinations = locations.drop_duplicates(["stn_RD", "stn_EV24"])[
+            ["stn_RD", "stn_EV24"]
         ].values
         if unique_combinations.shape[1] > 2:
             # bug fix for pandas 2.1 where three columns are returned
             unique_combinations = unique_combinations[:, :2]
         for stn_rd, stn_ev24 in unique_combinations:
             # get locations with the same prec and evap station
-            mask = (locations["stn_rd"] == stn_rd) & (locations["stn_ev24"] == stn_ev24)
+            mask = (locations["stn_RD"] == stn_rd) & (locations["stn_EV24"] == stn_ev24)
             loc_sel = locations.loc[mask]
 
             # calculate recharge time series
@@ -173,15 +243,15 @@ def discretize_knmi(ds, oc_knmi, method="linear", most_common_station=True):
 
     elif method == "separate":
         ds_out["recharge"] = dims, np.zeros(shape)
-        for stn in locations["stn_rd"].unique():
+        for stn in locations["stn_RD"].unique():
             ts = oc_knmi.loc[stn, "obs"]["RD"].resample("D").nearest()
-            loc_sel = locations.loc[(locations["stn_rd"] == stn)]
+            loc_sel = locations.loc[(locations["stn_RD"] == stn)]
             _add_ts_to_ds(ts, loc_sel, "recharge", ds_out)
 
         ds_out["evaporation"] = dims, np.zeros(shape)
-        for stn in locations["stn_ev24"].unique():
+        for stn in locations["stn_EV24"].unique():
             ts = oc_knmi.loc[stn, "obs"]["EV24"].resample("D").nearest()
-            loc_sel = locations.loc[(locations["stn_ev24"] == stn)]
+            loc_sel = locations.loc[(locations["stn_EV24"] == stn)]
             _add_ts_to_ds(ts, loc_sel, "evaporation", ds_out)
     else:
         raise (ValueError(f"Unknown method: {method}"))
@@ -398,8 +468,8 @@ def get_locations(ds, oc_knmi=None, most_common_station=False):
     Returns
     -------
     locations : pd.DataFrame
-        each row contains a location (x and y) and the relevant precipitation (stn_rd)
-        and evaporation (stn_ev24) stations.
+        each row contains a location (x and y) and the relevant precipitation (stn_RD)
+        and evaporation (stn_EV24) stations.
     """
     # get locations
     if is_structured(ds):
@@ -410,30 +480,30 @@ def get_locations(ds, oc_knmi=None, most_common_station=False):
         raise ValueError("gridtype should be structured or vertex")
 
     if oc_knmi is not None:
-        locations["stn_rd"] = hpd_knmi.get_nearest_station_df(
+        locations["stn_RD"] = hpd_knmi.get_nearest_station_df(
             locations, stations=oc_knmi.loc[oc_knmi["meteo_var"] == "RD"]
         )
-        locations["stn_ev24"] = hpd_knmi.get_nearest_station_df(
+        locations["stn_EV24"] = hpd_knmi.get_nearest_station_df(
             locations, stations=oc_knmi.loc[oc_knmi["meteo_var"] == "EV24"]
         )
     else:
-        locations["stn_rd"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="RD")
-        locations["stn_ev24"] = hpd_knmi.get_nearest_station_df(
+        locations["stn_RD"] = hpd_knmi.get_nearest_station_df(locations, meteo_var="RD")
+        locations["stn_EV24"] = hpd_knmi.get_nearest_station_df(
             locations, meteo_var="EV24"
         )
 
     if most_common_station:
         if is_structured(ds):
             # set the most common station to all locations
-            locations["stn_rd"] = locations["stn_rd"].value_counts().idxmax()
-            locations["stn_ev24"] = locations["stn_ev24"].value_counts().idxmax()
+            locations["stn_RD"] = locations["stn_RD"].value_counts().idxmax()
+            locations["stn_EV24"] = locations["stn_EV24"].value_counts().idxmax()
         else:
             # set the station with the largest area to all locations
             if "area" not in ds:
                 ds["area"] = get_area(ds)
             locations["area"] = ds["area"].loc[locations.index]
-            locations["stn_rd"] = locations.groupby("stn_rd").sum()["area"].idxmax()
-            locations["stn_ev24"] = locations.groupby("stn_ev24").sum()["area"].idxmax()
+            locations["stn_RD"] = locations.groupby("stn_RD").sum()["area"].idxmax()
+            locations["stn_EV24"] = locations.groupby("stn_EV24").sum()["area"].idxmax()
 
     return locations
 
@@ -444,8 +514,8 @@ def _download_knmi_at_locations(locations, start=None, end=None):
     Parameters
     ----------
     locations : pd.DataFrame
-        each row contains a location (x and y) and the relevant precipitation (stn_rd)
-        and evaporation (stn_ev24) stations.
+        each row contains a location (x and y) and the relevant precipitation (stn_RD)
+        and evaporation (stn_EV24) stations.
     start : str or datetime, optional
         start date of measurements that you want, The default is '2010'.
     end :  str or datetime, optional
@@ -456,8 +526,8 @@ def _download_knmi_at_locations(locations, start=None, end=None):
     oc_knmi
         hpd.ObsCollection
     """
-    stns_rd = locations["stn_rd"].unique()
-    stns_ev24 = locations["stn_ev24"].unique()
+    stns_rd = locations["stn_RD"].unique()
+    stns_ev24 = locations["stn_EV24"].unique()
 
     # get knmi data stations closest to any grid cell
     olist = []
@@ -491,8 +561,8 @@ def get_knmi_at_locations(locations, ds=None, start=None, end=None):
     Parameters
     ----------
     locations : pd.DataFrame
-        each row contains a location (x and y) and the relevant precipitation (stn_rd)
-        and evaporation (stn_ev24) stations.
+        each row contains a location (x and y) and the relevant precipitation (stn_RD)
+        and evaporation (stn_EV24) stations.
     ds : xr.DataSet or None, optional
         dataset containing relevant time information. If None provide start and end.
     start : str or datetime, optional

diff --git a/nlmod/read/knmi_data_platform.py b/nlmod/read/knmi_data_platform.py
@@ -319,4 +319,4 @@ def read_dataset(
         else:
             raise ValueError(f"Can't read/handle file {file}")
 
-    return xr.concat(data, dim="time")
+    return xr.concat(data, dim="time", data_vars="all")
diff --git a/nlmod/read/nhi.py b/nlmod/read/nhi.py
@@ -136,7 +136,8 @@ def add_buisdrainage(
         cond_method,
         depth_method,
     )
-    return ds.update(ds_out)
+    ds.update(ds_out)
+    return ds
 
 
 def discretize_buisdrainage(

diff --git a/nlmod/read/rws.py b/nlmod/read/rws.py
@@ -174,8 +174,6 @@ def get_northsea(ds, gdf=None, da_name="northsea"):
         Dataset with a single DataArray, this DataArray is 1 at sea and 0
         everywhere else. Grid dimensions according to ds.
     """
-    if gdf is None:
-        gdf = get_gdf_surface_water(ds=ds)
 
     warnings.warn(
         "'get_northsea' is deprecated and will be removed in a future version. "
@@ -187,7 +185,7 @@ def get_northsea(ds, gdf=None, da_name="northsea"):
 
 
 @cache.cache_netcdf(coords_2d=True)
-def discretize_northsea(ds, gdf, da_name="northsea"):
+def discretize_northsea(ds, gdf=None, da_name="northsea"):
     """Get Dataset which is 1 at the northsea and 0 everywhere else. Sea is defined by
     rws surface water shapefile.
 

diff --git a/tests/test_004_northsea.py b/tests/test_004_northsea.py
@@ -72,7 +72,7 @@ def test_get_bathymetry_nosea():
 def test_add_bathymetry_to_top_bot_kh_kv_seamodel():
     # model with sea
     ds = test_001_model.get_ds_from_cache("basic_sea_model")
-    ds.update(nlmod.read.rws.get_northsea(ds))
-    ds.update(nlmod.read.jarkus.get_bathymetry(ds, datavar_sea="northsea"))
-
+    ds.update(nlmod.read.rws.discretize_northsea(ds))
+    bathymetry = nlmod.read.jarkus.get_bathymetry(ds, datavar_sea="northsea")
+    ds.update(bathymetry.drop_vars("time"))
     ds = nlmod.dims.add_bathymetry_to_layer_model(ds)
diff --git a/tests/test_005_external_data.py b/tests/test_005_external_data.py
@@ -43,10 +43,71 @@ def test_get_recharge_not_available():
     ds = nlmod.get_ds([100000, 110000, 420000, 430000])
     time = [pd.Timestamp.now().normalize()]
     ds = nlmod.time.set_ds_time(ds, start=time[0] - pd.Timedelta(days=21), time=time)
-    with pytest.raises(KeyError):
+    with pytest.raises(ValueError):
         ds.update(nlmod.read.knmi.get_recharge(ds))
 
 
+def test_get_recharge_add_stn_dimensions():
+    ds = nlmod.get_ds(
+        [100000, 110000, 420000, 430000],
+        model_ws=os.path.join("models", "test_get_recharge_add_stn_dimensions"),
+        model_name="test",
+    )
+    # set the top left cell to incactive, to test this functionality as well
+    ds["active_domain"] = ds["area"] > 0
+    ds["active_domain"].data[0, 0] = False
+    time = pd.date_range("2024", "2025")
+    ds = nlmod.time.set_ds_time(ds, start="2023", time=time)
+    ds.update(nlmod.read.knmi.get_recharge(ds, add_stn_dimensions=True))
+
+    # create simulation
+    sim = nlmod.sim.sim(ds)
+    _ = nlmod.sim.tdis(ds, sim)
+    gwf = nlmod.gwf.gwf(ds, sim)
+    _ = nlmod.sim.ims(sim)
+    _ = nlmod.gwf.dis(ds, gwf)
+    _ = nlmod.gwf.npf(ds, gwf)
+    _ = nlmod.gwf.ic(ds, gwf, starting_head=1.0)
+    _ = nlmod.gwf.rch(ds, gwf)
+    _ = nlmod.gwf.evt(ds, gwf)
+
+    # do not run, as this takes a lot of time
+    # _ = nlmod.gwf.drn(ds, gwf, elev='top', cond='area')
+    # nlmod.sim.write_and_run(sim, ds, write_ds=False)
+
+    spd = gwf.rch.stress_period_data.data
+    assert len(spd) == 1
+    assert len(spd[0]) == 10000 - 1  # one inactive cell
+    assert spd[0]["recharge"].dtype == object
+
+    spd = gwf.evt.stress_period_data.data
+    assert len(spd) == 1
+    assert len(spd[0]) == 10000 - 1  # one inactive cell
+    assert spd[0]["rate"].dtype == object
+
+
+def test_add_recharge_as_float():
+    ds = nlmod.get_ds(
+        [100000, 110000, 420000, 430000],
+        model_ws=os.path.join("models", "test_add_recharge_as_float"),
+        model_name="test",
+    )
+    time = pd.date_range("2024", "2025")
+    ds = nlmod.time.set_ds_time(ds, start="2023", time=time)
+
+    sim = nlmod.sim.sim(ds)
+    _ = nlmod.sim.tdis(ds, sim)
+    gwf = nlmod.gwf.gwf(ds, sim)
+    _ = nlmod.sim.ims(sim)
+    _ = nlmod.gwf.dis(ds, gwf)
+    _ = nlmod.gwf.rch(ds, gwf, recharge=0.1)
+
+    spd = gwf.rch.stress_period_data.data
+    assert len(spd) == 1
+    assert len(spd[0]) == 10000
+    assert (spd[0]["recharge"] == 0.1).all()
+
+
 def test_ahn_within_extent():
     extent = [104000.0, 105000.0, 494000.0, 494600.0]
     da = nlmod.read.ahn.download_latest_ahn_from_wcs(extent)