Skip to content

Commit 978446c

Browse files
OnnoEbbensmartinvonkrubencaljedbrakenhoff
authored
Download bofek (#392)
* Download bofek2020 * add downloaded bofek data to gitignore * codacy * black formatting of bofek.py * add download progress bar uncomment test, but skip using pytest because of slowness --------- Co-authored-by: Martin Vonk <vonk.mart@gmail.com> Co-authored-by: Ruben Caljé <r.calje@artesia-water.nl> Co-authored-by: Davíd Brakenhoff <d.brakenhoff@artesia-water.nl>
1 parent a0300fe commit 978446c

File tree

6 files changed

+66
-28
lines changed

6 files changed

+66
-28
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ cython_debug/
145145
nlmod/bin/*
146146
!nlmod/bin/
147147
!nlmod/bin/mp7_2_002_provisional
148-
nlmod/data/GIS
148+
nlmod/data/bofek/*
149149

150150
tests/data/*
151151
!tests/data/**/

nlmod/data/bofek/.gitkeep

Whitespace-only changes.

nlmod/gwf/recharge.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import logging
2+
import numbers
23

34
import flopy
45
import numpy as np
@@ -370,7 +371,7 @@ def ds_to_uzf(
370371

371372
# generate packagedata
372373
surfdep = _get_value_from_ds_datavar(ds, "surfdep", surfdep, return_da=True)
373-
vks = _get_value_from_ds_datavar(ds, "vk", vks, return_da=True)
374+
vks = _get_value_from_ds_datavar(ds, "vks", vks, return_da=True)
374375
thtr = _get_value_from_ds_datavar(ds, "thtr", thtr, return_da=True)
375376
thts = _get_value_from_ds_datavar(ds, "thts", thts, return_da=True)
376377
thti = _get_value_from_ds_datavar(ds, "thti", thti, return_da=True)
@@ -496,7 +497,9 @@ def ds_to_uzf(
496497
# account for surfdep, as this decreases the height of the top of the upper cell
497498
# otherwise modflow may return an error
498499
thickness = calculate_thickness(ds)
499-
thickness = [thickness[x] - landflag[x] * surfdep / 2 for x in cellids_obs]
500+
if isinstance(surfdep, numbers.Number):
501+
surfdep = xr.ones_like(thickness) * surfdep
502+
thickness = [thickness[x] - landflag[x] * surfdep[x] / 2 for x in cellids_obs]
500503

501504
# create observations list
502505
obsdepths = []

nlmod/read/bofek.py

Lines changed: 53 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,24 @@
1-
import requests
1+
import logging
2+
import shutil
23
import zipfile
3-
import io
4-
import geopandas as gpd
54
from pathlib import Path
6-
from .. import NLMOD_DATADIR, cache, dims, util
5+
6+
import geopandas as gpd
7+
import requests
8+
from tqdm.auto import tqdm
9+
10+
from nlmod import NLMOD_DATADIR, cache, dims, util
11+
12+
logger = logging.getLogger(__name__)
713

814

915
@cache.cache_pickle
10-
def get_gdf_bofek(ds=None, extent=None, timeout=120):
11-
"""get geodataframe of bofek 2020 wihtin the extent of the model. It does so by
12-
downloading a zip file (> 100 MB) and extracting the relevant geodatabase. Therefore
13-
the function can be slow, ~35 seconds depending on your internet connection.
16+
def get_gdf_bofek(ds=None, extent=None, timeout=3600):
17+
"""Get geodataframe of bofek 2020 wihtin the extent of the model.
18+
19+
It does so by downloading a zip file (> 100 MB) and extracting the relevant
20+
geodatabase. Therefore the function can be slow, ~35 seconds depending on your
21+
internet connection.
1422
1523
Parameters
1624
----------
@@ -19,32 +27,49 @@ def get_gdf_bofek(ds=None, extent=None, timeout=120):
1927
extent : list, tuple or np.array, optional
2028
extent xmin, xmax, ymin, ymax. Only used if ds is None. The default is None.
2129
timeout : int, optional
22-
timeout time of request in seconds. Default is 120.
30+
timeout time of request in seconds. Default is 3600.
2331
2432
Returns
2533
-------
2634
gdf_bofek : GeoDataframe
2735
Bofek2020 geodataframe with a column 'BOFEK2020' containing the bofek cluster
2836
codes
2937
"""
30-
3138
import py7zr
3239

3340
if extent is None and ds is not None:
3441
extent = dims.get_extent(ds)
3542

3643
# set paths
3744
tmpdir = Path(NLMOD_DATADIR)
45+
3846
fname_7z = tmpdir / "BOFEK2020_GIS.7z"
3947
fname_bofek = tmpdir / "GIS" / "BOFEK2020_bestanden" / "BOFEK2020.gdb"
48+
fname_bofek_geojson = tmpdir / "bofek" / "BOFEK2020.geojson"
4049
bofek_zip_url = "https://www.wur.nl/nl/show/bofek-2020-gis-1.htm"
4150

42-
if not fname_bofek.exists():
51+
if not fname_bofek_geojson.exists():
4352
# download zip
44-
r = requests.get(bofek_zip_url, timeout=timeout)
45-
with zipfile.ZipFile(io.BytesIO(r.content)) as z:
53+
logger.info("Downloading BOFEK2020 GIS data (~35 seconds)")
54+
r = requests.get(bofek_zip_url, timeout=timeout, stream=True)
55+
56+
# show download progress
57+
total_size = int(r.headers.get("content-length", 0))
58+
block_size = 1024
59+
with tqdm(
60+
total=total_size, unit="B", unit_scale=True, desc="Downloading BOFEK"
61+
) as progress_bar:
62+
with open(tmpdir / "bofek.zip", "wb") as file:
63+
for data in r.iter_content(block_size):
64+
progress_bar.update(len(data))
65+
file.write(data)
66+
67+
# unpack zips
68+
logger.debug("Extracting zipped BOFEK2020 GIS data")
69+
with zipfile.ZipFile(tmpdir / "bofek.zip") as z:
4670
# extract 7z
4771
z.extractall(tmpdir)
72+
4873
with py7zr.SevenZipFile(fname_7z, mode="r") as z:
4974
z.extract(
5075
targets=["GIS/BOFEK2020_bestanden/BOFEK2020.gdb"],
@@ -53,10 +78,23 @@ def get_gdf_bofek(ds=None, extent=None, timeout=120):
5378
)
5479

5580
# clean up
81+
logger.debug("Remove zip files")
82+
Path(tmpdir / "bofek.zip").unlink()
5683
Path(fname_7z).unlink()
5784

58-
# read geodatabase
59-
gdf_bofek = gpd.read_file(fname_bofek)
85+
# read geodatabase
86+
logger.debug("convert geodatabase to geojson")
87+
gdf_bofek = gpd.read_file(fname_bofek)
88+
89+
# save to geojson
90+
gdf_bofek.to_file(fname_bofek_geojson, driver="GeoJSON")
91+
92+
# remove geodatabase
93+
shutil.rmtree(fname_bofek)
94+
95+
# read geojson
96+
logger.debug(f"read bofek2020 geojson from {fname_bofek_geojson}")
97+
gdf_bofek = gpd.read_file(fname_bofek_geojson)
6098

6199
if extent is not None:
62100
# slice to extent

tests/test_002_regis_geotop.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,12 @@
33
import nlmod
44

55

6-
# @pytest.mark.skip(reason="too slow")
76
def test_get_regis(extent=[98600.0, 99000.0, 489400.0, 489700.0]):
87
regis_ds = nlmod.read.regis.get_regis(extent)
98

109
assert regis_ds.sizes["layer"] == 20
1110

1211

13-
# @pytest.mark.skip(reason="too slow")
1412
def test_get_regis_botm_layer_BEk1(
1513
extent=[98700.0, 99000.0, 489500.0, 489700.0],
1614
botm_layer="MSc",
@@ -44,7 +42,6 @@ def test_get_geotop(extent=[98600.0, 99000.0, 489400.0, 489700.0]):
4442
nlmod.plot.geotop_lithok_on_map(geotop_ds, z=-20.2, ax=ax)
4543

4644

47-
# @pytest.mark.skip(reason="too slow")
4845
def test_get_regis_geotop(extent=[98600.0, 99000.0, 489400.0, 489700.0]):
4946
regis_geotop_ds = nlmod.read.regis.get_combined_layer_models(
5047
extent, use_regis=True, use_geotop=True
@@ -53,7 +50,6 @@ def test_get_regis_geotop(extent=[98600.0, 99000.0, 489400.0, 489700.0]):
5350
assert regis_geotop_ds.sizes["layer"] == 24
5451

5552

56-
# @pytest.mark.skip(reason="too slow")
5753
def test_get_regis_geotop_keep_all_layers(
5854
extent=[98600.0, 99000.0, 489400.0, 489700.0],
5955
):

tests/test_005_external_data.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,11 +134,12 @@ def test_get_brp():
134134

135135

136136
# disable because slow (~35 seconds depending on internet connection)
137-
# def test_get_bofek():
138-
# # model with sea
139-
# ds = test_001_model.get_ds_from_cache("basic_sea_model")
137+
@pytest.mark.skip(reason="slow")
138+
def test_get_bofek():
139+
# model with sea
140+
ds = test_001_model.get_ds_from_cache("basic_sea_model")
140141

141-
# # add knmi recharge to the model dataset
142-
# gdf_bofek = nlmod.read.bofek.get_gdf_bofek(ds)
142+
# add knmi recharge to the model dataset
143+
gdf_bofek = nlmod.read.bofek.get_gdf_bofek(ds)
143144

144-
# assert not gdf_bofek.empty, "Bofek geodataframe is empty"
145+
assert not gdf_bofek.empty, "Bofek geodataframe is empty"

0 commit comments

Comments
 (0)