From 4d75a492589d2faff9e5e560bf2e58f0c7fcee48 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 16 Dec 2024 10:54:52 +0100 Subject: [PATCH 1/5] Download bofek2020 --- nlmod/data/bofek/.gitkeep | 0 nlmod/gwf/recharge.py | 7 +++++-- nlmod/read/bofek.py | 25 ++++++++++++++++++++++--- 3 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 nlmod/data/bofek/.gitkeep diff --git a/nlmod/data/bofek/.gitkeep b/nlmod/data/bofek/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/nlmod/gwf/recharge.py b/nlmod/gwf/recharge.py index 7f54708b..ba85167a 100644 --- a/nlmod/gwf/recharge.py +++ b/nlmod/gwf/recharge.py @@ -1,4 +1,5 @@ import logging +import numbers import flopy import numpy as np @@ -370,7 +371,7 @@ def ds_to_uzf( # generate packagedata surfdep = _get_value_from_ds_datavar(ds, "surfdep", surfdep, return_da=True) - vks = _get_value_from_ds_datavar(ds, "vk", vks, return_da=True) + vks = _get_value_from_ds_datavar(ds, "vks", vks, return_da=True) thtr = _get_value_from_ds_datavar(ds, "thtr", thtr, return_da=True) thts = _get_value_from_ds_datavar(ds, "thts", thts, return_da=True) thti = _get_value_from_ds_datavar(ds, "thti", thti, return_da=True) @@ -496,7 +497,9 @@ def ds_to_uzf( # account for surfdep, as this decreases the height of the top of the upper cell # otherwise modflow may return an error thickness = calculate_thickness(ds) - thickness = [thickness[x] - landflag[x] * surfdep / 2 for x in cellids_obs] + if isinstance(surfdep, numbers.Number): + surfdep = xr.ones_like(thickness) * surfdep + thickness = [thickness[x] - landflag[x] * surfdep[x] / 2 for x in cellids_obs] # create observations list obsdepths = [] diff --git a/nlmod/read/bofek.py b/nlmod/read/bofek.py index 76273bb4..95d59541 100644 --- a/nlmod/read/bofek.py +++ b/nlmod/read/bofek.py @@ -2,6 +2,10 @@ import geopandas as gpd from pathlib import Path from nlmod import NLMOD_DATADIR, cache, dims, util +import shutil +import logging + +logger = logging.getLogger(__name__) @cache.cache_pickle @@ -33,23 +37,38 @@ def get_gdf_bofek(ds=None, extent=None): tmpdir = Path(NLMOD_DATADIR) fname_7z = tmpdir / 'BOFEK2020_GIS.7z' fname_bofek = tmpdir / 'GIS' / 'BOFEK2020_bestanden' / 'BOFEK2020.gdb' + fname_bofek_geojson = tmpdir / 'bofek' / 'BOFEK2020.geojson' bofek_zip_url = 'https://www.wur.nl/nl/show/bofek-2020-gis-1.htm' - if not fname_bofek.exists(): + if not fname_bofek_geojson.exists(): # download zip + logger.info('Downloading BOFEK2020 GIS data (~35 seconds)') r = requests.get(bofek_zip_url) z = zipfile.ZipFile(io.BytesIO(r.content)) # extract 7z + logger.debug('Extracting zipped BOFEK2020 GIS data') z.extractall(tmpdir) with py7zr.SevenZipFile(fname_7z, mode='r') as z: z.extract(targets=['GIS/BOFEK2020_bestanden/BOFEK2020.gdb'], path=tmpdir, recursive=True) # clean up + logger.debug('Remove zip files') Path(fname_7z).unlink() - # read geodatabase - gdf_bofek = gpd.read_file(fname_bofek) + # read geodatabase + logger.debug('convert geodatabase to geojson') + gdf_bofek = gpd.read_file(fname_bofek) + + # save to geojson + gdf_bofek.to_file(fname_bofek_geojson, driver='GeoJSON') + + # remove geodatabase + shutil.rmtree(fname_bofek) + + # read geojson + logger.debug(f'read bofek2020 geojson from {fname_bofek_geojson}') + gdf_bofek = gpd.read_file(fname_bofek_geojson) # slice to extent gdf_bofek = util.gdf_within_extent(gdf_bofek, extent) From e3164a2ffe110a9fd922762418830684a0856c9d Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 16 Dec 2024 11:00:18 +0100 Subject: [PATCH 2/5] add downloaded bofek data to gitignore --- .gitignore | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 3d4dd62d..5edf9e93 100644 --- a/.gitignore +++ b/.gitignore @@ -145,7 +145,7 @@ cython_debug/ nlmod/bin/* !nlmod/bin/ !nlmod/bin/mp7_2_002_provisional -nlmod/data/GIS +nlmod/data/bofek/* tests/data/* !tests/data/**/ From 36d0418fc936b1a4ef2e5f21e58625288a8c5c5a Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 16 Dec 2024 11:22:57 +0100 Subject: [PATCH 3/5] codacy --- nlmod/read/bofek.py | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/nlmod/read/bofek.py b/nlmod/read/bofek.py index 95d59541..65b06eb1 100644 --- a/nlmod/read/bofek.py +++ b/nlmod/read/bofek.py @@ -1,18 +1,24 @@ -import requests, zipfile, io -import geopandas as gpd +import io +import logging +import shutil +import zipfile from pathlib import Path + +import geopandas as gpd +import requests + from nlmod import NLMOD_DATADIR, cache, dims, util -import shutil -import logging logger = logging.getLogger(__name__) @cache.cache_pickle def get_gdf_bofek(ds=None, extent=None): - """get geodataframe of bofek 2020 wihtin the extent of the model. It does so by - downloading a zip file (> 100 MB) and extracting the relevant geodatabase. Therefore - the function can be slow, ~35 seconds depending on your internet connection. + """Get geodataframe of bofek 2020 wihtin the extent of the model. + + It does so by downloading a zip file (> 100 MB) and extracting the relevant + geodatabase. Therefore the function can be slow, ~35 seconds depending on your + internet connection. Parameters ---------- @@ -27,7 +33,6 @@ def get_gdf_bofek(ds=None, extent=None): Bofek2020 geodataframe with a column 'BOFEK2020' containing the bofek cluster codes """ - import py7zr if extent is None and ds is not None: @@ -43,14 +48,16 @@ def get_gdf_bofek(ds=None, extent=None): if not fname_bofek_geojson.exists(): # download zip logger.info('Downloading BOFEK2020 GIS data (~35 seconds)') - r = requests.get(bofek_zip_url) + r = requests.get(bofek_zip_url, timeout=3600) z = zipfile.ZipFile(io.BytesIO(r.content)) # extract 7z logger.debug('Extracting zipped BOFEK2020 GIS data') z.extractall(tmpdir) with py7zr.SevenZipFile(fname_7z, mode='r') as z: - z.extract(targets=['GIS/BOFEK2020_bestanden/BOFEK2020.gdb'], path=tmpdir, recursive=True) + z.extract(targets=['GIS/BOFEK2020_bestanden/BOFEK2020.gdb'], + path=tmpdir, + recursive=True) # clean up logger.debug('Remove zip files') From 689230d4b876f007cfb33292f2a570b803d17b26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ruben=20Calj=C3=A9?= Date: Wed, 15 Jan 2025 21:08:22 +0100 Subject: [PATCH 4/5] black formatting of bofek.py --- nlmod/read/bofek.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/nlmod/read/bofek.py b/nlmod/read/bofek.py index 13569241..173762ce 100644 --- a/nlmod/read/bofek.py +++ b/nlmod/read/bofek.py @@ -14,7 +14,7 @@ @cache.cache_pickle def get_gdf_bofek(ds=None, extent=None, timeout=3600): """Get geodataframe of bofek 2020 wihtin the extent of the model. - + It does so by downloading a zip file (> 100 MB) and extracting the relevant geodatabase. Therefore the function can be slow, ~35 seconds depending on your internet connection. @@ -42,21 +42,21 @@ def get_gdf_bofek(ds=None, extent=None, timeout=3600): # set paths tmpdir = Path(NLMOD_DATADIR) - fname_7z = tmpdir / 'BOFEK2020_GIS.7z' - fname_bofek = tmpdir / 'GIS' / 'BOFEK2020_bestanden' / 'BOFEK2020.gdb' - fname_bofek_geojson = tmpdir / 'bofek' / 'BOFEK2020.geojson' - bofek_zip_url = 'https://www.wur.nl/nl/show/bofek-2020-gis-1.htm' + fname_7z = tmpdir / "BOFEK2020_GIS.7z" + fname_bofek = tmpdir / "GIS" / "BOFEK2020_bestanden" / "BOFEK2020.gdb" + fname_bofek_geojson = tmpdir / "bofek" / "BOFEK2020.geojson" + bofek_zip_url = "https://www.wur.nl/nl/show/bofek-2020-gis-1.htm" if not fname_bofek_geojson.exists(): # download zip - logger.info('Downloading BOFEK2020 GIS data (~35 seconds)') + logger.info("Downloading BOFEK2020 GIS data (~35 seconds)") r = requests.get(bofek_zip_url, timeout=timeout) - - logger.debug('Extracting zipped BOFEK2020 GIS data') + + logger.debug("Extracting zipped BOFEK2020 GIS data") with zipfile.ZipFile(io.BytesIO(r.content)) as z: # extract 7z z.extractall(tmpdir) - + with py7zr.SevenZipFile(fname_7z, mode="r") as z: z.extract( targets=["GIS/BOFEK2020_bestanden/BOFEK2020.gdb"], @@ -65,21 +65,21 @@ def get_gdf_bofek(ds=None, extent=None, timeout=3600): ) # clean up - logger.debug('Remove zip files') + logger.debug("Remove zip files") Path(fname_7z).unlink() # read geodatabase - logger.debug('convert geodatabase to geojson') + logger.debug("convert geodatabase to geojson") gdf_bofek = gpd.read_file(fname_bofek) # save to geojson - gdf_bofek.to_file(fname_bofek_geojson, driver='GeoJSON') + gdf_bofek.to_file(fname_bofek_geojson, driver="GeoJSON") # remove geodatabase shutil.rmtree(fname_bofek) # read geojson - logger.debug(f'read bofek2020 geojson from {fname_bofek_geojson}') + logger.debug(f"read bofek2020 geojson from {fname_bofek_geojson}") gdf_bofek = gpd.read_file(fname_bofek_geojson) if extent is not None: From 73aeb0b6a7d05ad78a5c1300577c2770f7d59602 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dav=C3=ADd=20Brakenhoff?= Date: Thu, 16 Jan 2025 10:41:40 +0100 Subject: [PATCH 5/5] add download progress bar uncomment test, but skip using pytest because of slowness --- nlmod/read/bofek.py | 26 ++++++++++++++++++++------ tests/test_002_regis_geotop.py | 4 ---- tests/test_005_external_data.py | 13 +++++++------ 3 files changed, 27 insertions(+), 16 deletions(-) diff --git a/nlmod/read/bofek.py b/nlmod/read/bofek.py index 173762ce..df2fffdb 100644 --- a/nlmod/read/bofek.py +++ b/nlmod/read/bofek.py @@ -1,12 +1,13 @@ -import io import logging import shutil import zipfile from pathlib import Path -import requests + import geopandas as gpd -from .. import NLMOD_DATADIR, cache, dims, util +import requests +from tqdm.auto import tqdm +from nlmod import NLMOD_DATADIR, cache, dims, util logger = logging.getLogger(__name__) @@ -50,10 +51,22 @@ def get_gdf_bofek(ds=None, extent=None, timeout=3600): if not fname_bofek_geojson.exists(): # download zip logger.info("Downloading BOFEK2020 GIS data (~35 seconds)") - r = requests.get(bofek_zip_url, timeout=timeout) - + r = requests.get(bofek_zip_url, timeout=timeout, stream=True) + + # show download progress + total_size = int(r.headers.get("content-length", 0)) + block_size = 1024 + with tqdm( + total=total_size, unit="B", unit_scale=True, desc="Downloading BOFEK" + ) as progress_bar: + with open(tmpdir / "bofek.zip", "wb") as file: + for data in r.iter_content(block_size): + progress_bar.update(len(data)) + file.write(data) + + # unpack zips logger.debug("Extracting zipped BOFEK2020 GIS data") - with zipfile.ZipFile(io.BytesIO(r.content)) as z: + with zipfile.ZipFile(tmpdir / "bofek.zip") as z: # extract 7z z.extractall(tmpdir) @@ -66,6 +79,7 @@ def get_gdf_bofek(ds=None, extent=None, timeout=3600): # clean up logger.debug("Remove zip files") + Path(tmpdir / "bofek.zip").unlink() Path(fname_7z).unlink() # read geodatabase diff --git a/tests/test_002_regis_geotop.py b/tests/test_002_regis_geotop.py index dced58ba..94b9f7f7 100644 --- a/tests/test_002_regis_geotop.py +++ b/tests/test_002_regis_geotop.py @@ -3,14 +3,12 @@ import nlmod -# @pytest.mark.skip(reason="too slow") def test_get_regis(extent=[98600.0, 99000.0, 489400.0, 489700.0]): regis_ds = nlmod.read.regis.get_regis(extent) assert regis_ds.sizes["layer"] == 20 -# @pytest.mark.skip(reason="too slow") def test_get_regis_botm_layer_BEk1( extent=[98700.0, 99000.0, 489500.0, 489700.0], botm_layer="MSc", @@ -32,7 +30,6 @@ def test_get_geotop(extent=[98600.0, 99000.0, 489400.0, 489700.0]): nlmod.plot.geotop_lithok_on_map(geotop_ds, z=-20.2, ax=ax) -# @pytest.mark.skip(reason="too slow") def test_get_regis_geotop(extent=[98600.0, 99000.0, 489400.0, 489700.0]): regis_geotop_ds = nlmod.read.regis.get_combined_layer_models( extent, use_regis=True, use_geotop=True @@ -41,7 +38,6 @@ def test_get_regis_geotop(extent=[98600.0, 99000.0, 489400.0, 489700.0]): assert regis_geotop_ds.sizes["layer"] == 24 -# @pytest.mark.skip(reason="too slow") def test_get_regis_geotop_keep_all_layers( extent=[98600.0, 99000.0, 489400.0, 489700.0], ): diff --git a/tests/test_005_external_data.py b/tests/test_005_external_data.py index 5bf2f1e9..f7b8c9d3 100644 --- a/tests/test_005_external_data.py +++ b/tests/test_005_external_data.py @@ -134,11 +134,12 @@ def test_get_brp(): # disable because slow (~35 seconds depending on internet connection) -# def test_get_bofek(): -# # model with sea -# ds = test_001_model.get_ds_from_cache("basic_sea_model") +@pytest.mark.skip(reason="slow") +def test_get_bofek(): + # model with sea + ds = test_001_model.get_ds_from_cache("basic_sea_model") -# # add knmi recharge to the model dataset -# gdf_bofek = nlmod.read.bofek.get_gdf_bofek(ds) + # add knmi recharge to the model dataset + gdf_bofek = nlmod.read.bofek.get_gdf_bofek(ds) -# assert not gdf_bofek.empty, "Bofek geodataframe is empty" + assert not gdf_bofek.empty, "Bofek geodataframe is empty"