From 9b11e131d6306cafc90c75613a17ff5e532050f0 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:05:39 +0100 Subject: [PATCH 01/17] add brt to nlmod.read --- nlmod/read/__init__.py | 1 + nlmod/read/brt.py | 303 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 304 insertions(+) create mode 100644 nlmod/read/brt.py diff --git a/nlmod/read/__init__.py b/nlmod/read/__init__.py index 4f273018..6e4f55f6 100644 --- a/nlmod/read/__init__.py +++ b/nlmod/read/__init__.py @@ -3,6 +3,7 @@ administrative, ahn, bgt, + brt, bofek, bro, brp, diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py new file mode 100644 index 00000000..cd83d53f --- /dev/null +++ b/nlmod/read/brt.py @@ -0,0 +1,303 @@ +import json +import logging +import requests + +import geopandas as gpd +import numpy as np +import pandas as pd + +from io import BytesIO +from shapely.geometry import mapping, shape, Polygon +from zipfile import ZipFile + +from ..util import extent_to_polygon + +logger = logging.getLogger(__name__) + +def get_brt( + extent, + layer="waterdeel_lijn", + crs=28992, + pagesize=1000 +): + """ + Get geometries within an extent or polygon from the Basis Registratie + Topografie (BRT). Some useful links: + https://geoforum.nl/t/pdok-lanceert-de-brt-top10nl-in-ogc-api-s-als-demo/9821/5 + https://api.pdok.nl/brt/top10nl/ogc/v1/api + + Parameters + ---------- + extent : list or tuple of int or float + The extent (xmin, xmax, ymin, ymax) for which shapes are requested. + layer : string, optional + The layer for which shapes are requested. The default is 'waterdeel_lijn'. + crs : int, optional + The coordinate reference system. The default is 28992. + pagesize : int, optional + The number of features that is reqeusted per page. The default is 1000. + + Returns + ------- + gdf : GeoPandas GeoDataFrame or requests.models.Response + A GeoDataFrame containing all geometries and properties. + + """ + + if isinstance(layer, (list, tuple, np.ndarray)): + # recursively call this function for all layers + gdfs = [get_brt(extent=extent, layer=l, crs=crs, pagesize=pagesize) for l in layer] + return pd.concat(gdfs) + + api_url = "https://api.pdok.nl" + url = f"{api_url}/brt/top10nl/ogc/v1/collections/{layer}/items?" + + params = {'bbox':f'{extent[0]},{extent[2]},{extent[1]},{extent[3]}', + 'f':'json', + 'limit':pagesize} + + if crs==28992: + params['crs'] = "http://www.opengis.net/def/crs/EPSG/0/28992" + params['bbox-crs'] = "http://www.opengis.net/def/crs/EPSG/0/28992" + elif crs!=4326: + raise ValueError('invalid crs, please choose between 28992 (RD) or 4326 (WGS84)') + + response = requests.get(url, params=params) + response.raise_for_status() + + gdf = gpd.GeoDataFrame.from_features(response.json()['features']) + + return gdf + + + +def get_brt_v2( + extent, + layer="waterdeel", + cut_by_extent=True, + make_valid=False, + fname=None, + geometry=None, + remove_expired=True, + add_bronhouder_names=True, + timeout=1200, +): + """Get geometries within an extent or polygon from the Basis Registratie + Topografie (BRT). Useful links: + https://api.pdok.nl/brt/top10nl/download/v1_0/ui + + Parameters + ---------- + extent : list or tuple of length 4 or shapely Polygon + The extent (xmin, xmax, ymin, ymax) or polygon for which shapes are + requested. + layer : string or list of strings, optional + The layer(s) for which shapes are requested. When layer is "all", all layers are + requested. The default is "waterdeel". + cut_by_extent : bool, optional + Only return the intersection with the extent if True. The default is + True + make_valid : bool, optional + Make geometries valid by appying a buffer of 0 m when True. The default is + False. + fname : string, optional + Save the zipfile that is received by the request to file. The default + is None, which does not save anything to file. + geometry: string, optional + When geometry is specified, this attribute is used as the geometry of the + resulting GeoDataFrame. Some layers have multiple geometry-attributes. An + example is the layer 'pand', where each buidling (polygon) also contains a + Point-geometry for the label. When geometry is None, the last attribute starting + with the word "geometrie" is used as the geometry. The default is None. + remove_expired: bool, optional + Remove expired items (that contain a value for 'eindRegistratie') when True. The + default is True. + add_bronhouder_names: bool, optional + Add bronhouder in a column called 'bronhouder_name. names when True. The default + is True. + timeout: int optional + The amount of time in seconds to wait for the server to send data before giving + up. The default is 1200 (20 minutes). + + Returns + ------- + gdf : GeoPandas GeoDataFrame or dict of GeoPandas GeoDataFrame + A GeoDataFrame (when only one layer is requested) or a dict of GeoDataFrames + containing all geometries and properties. + """ + + if layer == "all": + layer = get_brt_layers() + if isinstance(layer, str): + layer = [layer] + + api_url = "https://api.pdok.nl" + url = f"{api_url}/brt/top10nl/download/v1_0/full/custom" + body = {"format": "gml", "featuretypes": layer} + + if isinstance(extent, Polygon): + polygon = extent + else: + polygon = extent_to_polygon(extent) + + body["geofilter"] = polygon.wkt + + headers = {"content-type": "application/json"} + + response = requests.post( + url, headers=headers, data=json.dumps(body), timeout=timeout + ) # 20 minutes + + # check api-status, if completed, download + if response.status_code in range(200, 300): + running = True + href = response.json()["_links"]["status"]["href"] + url = f"{api_url}{href}" + + while running: + response = requests.get(url, timeout=timeout) + if response.status_code in range(200, 300): + status = response.json()["status"] + if status == "COMPLETED": + running = False + else: + time.sleep(2) + else: + running = False + else: + msg = f"Download of brt-data failed: {response.text}" + raise (Exception(msg)) + + href = response.json()["_links"]["download"]["href"] + response = requests.get(f"{api_url}{href}", timeout=timeout) + + if fname is not None: + with open(fname, "wb") as file: + file.write(response.content) + + raise NotImplementedError('this part is not yet implemented please use "read_brt"') + + zipfile = BytesIO(response.content) + gdf = read_brt_zipfile( + zipfile, + geometry=geometry, + cut_by_extent=cut_by_extent, + make_valid=make_valid, + extent=polygon, + remove_expired=remove_expired, + add_bronhouder_names=add_bronhouder_names, + ) + + if len(layer) == 1: + gdf = gdf[layer[0]] + + return + + +def get_brt_layers(timeout=1200): + """ + Get the layers in the Basis Registratie Topografie (BRT) + + Parameters + ---------- + timeout: int optional + The amount of time in seconds to wait for the server to send data before giving + up. The default is 1200 (20 minutes). + + Returns + ------- + list + A list with the layer names. + + """ + url = "https://api.pdok.nl/brt/top10nl/ogc/v1/collections/waterdeel_lijn" + resp = requests.get(url, timeout=timeout) + data = resp.json() + return [x["featuretype"] for x in data["timeliness"]] + + +def read_brt_zipfile( + fname, + geometry=None, + files=None, + cut_by_extent=True, + make_valid=False, + extent=None, + remove_expired=True, + add_bronhouder_names=True, +): + """Read data from a zipfile that was downloaded using get_bgt(). + + Parameters + ---------- + fname : string + The filename of the zip-file containing the BGT-data. + geometry : str, optional + DESCRIPTION. The default is None. + files : string of list of strings, optional + The files to read from the zipfile. Read all files when files is None. The + default is None. + cut_by_extent : bool, optional + Cut the geoemetries by the supplied extent. When no extent is supplied, + cut_by_extent is set to False. The default is True. + make_valid : bool, optional + Make geometries valid by appying a buffer of 0 m when True. THe defaults is + False. + extent : list or tuple of length 4 or shapely Polygon + The extent (xmin, xmax, ymin, ymax) or polygon by which the geometries are + clipped. Only used when cut_by_extent is True. The defult is None. + remove_expired: bool, optional + Remove expired items (that contain a value for 'eindRegistratie') when True. The + default is True. + add_bronhouder_names: bool, optional + Add bronhouder in a column called 'bronhouder_name. names when True. The default + is True. + + Returns + ------- + gdf : dict of GeoPandas GeoDataFrame + A dict of GeoDataFrames containing all geometries and properties. + """ + zf = ZipFile(fname) + gdf = {} + if files is None: + files = zf.namelist() + elif isinstance(files, str): + files = [files] + if extent is None: + cut_by_extent = False + else: + if isinstance(extent, Polygon): + polygon = extent + else: + polygon = extent_to_polygon(extent) + for file in files: + key = file[4:-4] + gdf[key] = read_brt_gml(zf.open(file), geometry=geometry) + + if remove_expired and gdf[key] is not None and "eindRegistratie" in gdf[key]: + # remove double features + # by removing features with an eindRegistratie + gdf[key] = gdf[key][gdf[key]["eindRegistratie"].isna()] + + if make_valid and isinstance(gdf[key], gpd.GeoDataFrame): + gdf[key].geometry = gdf[key].geometry.buffer(0.0) + + if cut_by_extent and isinstance(gdf[key], gpd.GeoDataFrame): + gdf[key].geometry = gdf[key].intersection(polygon) + gdf[key] = gdf[key][~gdf[key].is_empty] + + if add_bronhouder_names: + bgt_bronhouder_names = get_bronhouder_names() + for gdf_layer in gdf.values(): + if gdf_layer is None or "bronhouder" not in gdf_layer.columns: + continue + gdf_layer["bronhouder_name"] = gdf_layer["bronhouder"].map( + bgt_bronhouder_names + ) + + return gdf + + +endpoint = ["https://api.pdok.nl/brt/top10nl/download/v1_0/full/custom", + "https://api.pdok.nl/brt/top10nl/ogc/v1"] From 0eab0af8b458095efb3ab3b53e9e7c89b02ba8b3 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:06:19 +0100 Subject: [PATCH 02/17] remove read function similar to bgt --- nlmod/read/brt.py | 235 +--------------------------------------------- 1 file changed, 1 insertion(+), 234 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index cd83d53f..df445846 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -67,237 +67,4 @@ def get_brt( gdf = gpd.GeoDataFrame.from_features(response.json()['features']) - return gdf - - - -def get_brt_v2( - extent, - layer="waterdeel", - cut_by_extent=True, - make_valid=False, - fname=None, - geometry=None, - remove_expired=True, - add_bronhouder_names=True, - timeout=1200, -): - """Get geometries within an extent or polygon from the Basis Registratie - Topografie (BRT). Useful links: - https://api.pdok.nl/brt/top10nl/download/v1_0/ui - - Parameters - ---------- - extent : list or tuple of length 4 or shapely Polygon - The extent (xmin, xmax, ymin, ymax) or polygon for which shapes are - requested. - layer : string or list of strings, optional - The layer(s) for which shapes are requested. When layer is "all", all layers are - requested. The default is "waterdeel". - cut_by_extent : bool, optional - Only return the intersection with the extent if True. The default is - True - make_valid : bool, optional - Make geometries valid by appying a buffer of 0 m when True. The default is - False. - fname : string, optional - Save the zipfile that is received by the request to file. The default - is None, which does not save anything to file. - geometry: string, optional - When geometry is specified, this attribute is used as the geometry of the - resulting GeoDataFrame. Some layers have multiple geometry-attributes. An - example is the layer 'pand', where each buidling (polygon) also contains a - Point-geometry for the label. When geometry is None, the last attribute starting - with the word "geometrie" is used as the geometry. The default is None. - remove_expired: bool, optional - Remove expired items (that contain a value for 'eindRegistratie') when True. The - default is True. - add_bronhouder_names: bool, optional - Add bronhouder in a column called 'bronhouder_name. names when True. The default - is True. - timeout: int optional - The amount of time in seconds to wait for the server to send data before giving - up. The default is 1200 (20 minutes). - - Returns - ------- - gdf : GeoPandas GeoDataFrame or dict of GeoPandas GeoDataFrame - A GeoDataFrame (when only one layer is requested) or a dict of GeoDataFrames - containing all geometries and properties. - """ - - if layer == "all": - layer = get_brt_layers() - if isinstance(layer, str): - layer = [layer] - - api_url = "https://api.pdok.nl" - url = f"{api_url}/brt/top10nl/download/v1_0/full/custom" - body = {"format": "gml", "featuretypes": layer} - - if isinstance(extent, Polygon): - polygon = extent - else: - polygon = extent_to_polygon(extent) - - body["geofilter"] = polygon.wkt - - headers = {"content-type": "application/json"} - - response = requests.post( - url, headers=headers, data=json.dumps(body), timeout=timeout - ) # 20 minutes - - # check api-status, if completed, download - if response.status_code in range(200, 300): - running = True - href = response.json()["_links"]["status"]["href"] - url = f"{api_url}{href}" - - while running: - response = requests.get(url, timeout=timeout) - if response.status_code in range(200, 300): - status = response.json()["status"] - if status == "COMPLETED": - running = False - else: - time.sleep(2) - else: - running = False - else: - msg = f"Download of brt-data failed: {response.text}" - raise (Exception(msg)) - - href = response.json()["_links"]["download"]["href"] - response = requests.get(f"{api_url}{href}", timeout=timeout) - - if fname is not None: - with open(fname, "wb") as file: - file.write(response.content) - - raise NotImplementedError('this part is not yet implemented please use "read_brt"') - - zipfile = BytesIO(response.content) - gdf = read_brt_zipfile( - zipfile, - geometry=geometry, - cut_by_extent=cut_by_extent, - make_valid=make_valid, - extent=polygon, - remove_expired=remove_expired, - add_bronhouder_names=add_bronhouder_names, - ) - - if len(layer) == 1: - gdf = gdf[layer[0]] - - return - - -def get_brt_layers(timeout=1200): - """ - Get the layers in the Basis Registratie Topografie (BRT) - - Parameters - ---------- - timeout: int optional - The amount of time in seconds to wait for the server to send data before giving - up. The default is 1200 (20 minutes). - - Returns - ------- - list - A list with the layer names. - - """ - url = "https://api.pdok.nl/brt/top10nl/ogc/v1/collections/waterdeel_lijn" - resp = requests.get(url, timeout=timeout) - data = resp.json() - return [x["featuretype"] for x in data["timeliness"]] - - -def read_brt_zipfile( - fname, - geometry=None, - files=None, - cut_by_extent=True, - make_valid=False, - extent=None, - remove_expired=True, - add_bronhouder_names=True, -): - """Read data from a zipfile that was downloaded using get_bgt(). - - Parameters - ---------- - fname : string - The filename of the zip-file containing the BGT-data. - geometry : str, optional - DESCRIPTION. The default is None. - files : string of list of strings, optional - The files to read from the zipfile. Read all files when files is None. The - default is None. - cut_by_extent : bool, optional - Cut the geoemetries by the supplied extent. When no extent is supplied, - cut_by_extent is set to False. The default is True. - make_valid : bool, optional - Make geometries valid by appying a buffer of 0 m when True. THe defaults is - False. - extent : list or tuple of length 4 or shapely Polygon - The extent (xmin, xmax, ymin, ymax) or polygon by which the geometries are - clipped. Only used when cut_by_extent is True. The defult is None. - remove_expired: bool, optional - Remove expired items (that contain a value for 'eindRegistratie') when True. The - default is True. - add_bronhouder_names: bool, optional - Add bronhouder in a column called 'bronhouder_name. names when True. The default - is True. - - Returns - ------- - gdf : dict of GeoPandas GeoDataFrame - A dict of GeoDataFrames containing all geometries and properties. - """ - zf = ZipFile(fname) - gdf = {} - if files is None: - files = zf.namelist() - elif isinstance(files, str): - files = [files] - if extent is None: - cut_by_extent = False - else: - if isinstance(extent, Polygon): - polygon = extent - else: - polygon = extent_to_polygon(extent) - for file in files: - key = file[4:-4] - gdf[key] = read_brt_gml(zf.open(file), geometry=geometry) - - if remove_expired and gdf[key] is not None and "eindRegistratie" in gdf[key]: - # remove double features - # by removing features with an eindRegistratie - gdf[key] = gdf[key][gdf[key]["eindRegistratie"].isna()] - - if make_valid and isinstance(gdf[key], gpd.GeoDataFrame): - gdf[key].geometry = gdf[key].geometry.buffer(0.0) - - if cut_by_extent and isinstance(gdf[key], gpd.GeoDataFrame): - gdf[key].geometry = gdf[key].intersection(polygon) - gdf[key] = gdf[key][~gdf[key].is_empty] - - if add_bronhouder_names: - bgt_bronhouder_names = get_bronhouder_names() - for gdf_layer in gdf.values(): - if gdf_layer is None or "bronhouder" not in gdf_layer.columns: - continue - gdf_layer["bronhouder_name"] = gdf_layer["bronhouder"].map( - bgt_bronhouder_names - ) - - return gdf - - -endpoint = ["https://api.pdok.nl/brt/top10nl/download/v1_0/full/custom", - "https://api.pdok.nl/brt/top10nl/ogc/v1"] + return gdf \ No newline at end of file From cc302d6d8a210a96bd1d11665293a4bddc31abe0 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:06:32 +0100 Subject: [PATCH 03/17] fix f string in bgt --- nlmod/read/bgt.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nlmod/read/bgt.py b/nlmod/read/bgt.py index 56d00be2..290fef41 100644 --- a/nlmod/read/bgt.py +++ b/nlmod/read/bgt.py @@ -108,7 +108,7 @@ def get_bgt( else: running = False else: - msg = "Download of bgt-data failed: {response.text}" + msg = f"Download of bgt-data failed: {response.text}" raise (Exception(msg)) href = response.json()["_links"]["download"]["href"] From f418bc2d74791810a6cd848498fb1ba11a465b91 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:19:52 +0100 Subject: [PATCH 04/17] minor improvements --- nlmod/read/brt.py | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index df445846..11b7e845 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -18,11 +18,12 @@ def get_brt( extent, layer="waterdeel_lijn", crs=28992, - pagesize=1000 + limit=1000, + apif='json' ): """ - Get geometries within an extent or polygon from the Basis Registratie - Topografie (BRT). Some useful links: + Get geometries within an extent from the Basis Registratie Topografie (BRT). + Some useful links: https://geoforum.nl/t/pdok-lanceert-de-brt-top10nl-in-ogc-api-s-als-demo/9821/5 https://api.pdok.nl/brt/top10nl/ogc/v1/api @@ -34,27 +35,29 @@ def get_brt( The layer for which shapes are requested. The default is 'waterdeel_lijn'. crs : int, optional The coordinate reference system. The default is 28992. - pagesize : int, optional - The number of features that is reqeusted per page. The default is 1000. + limit : int, optional + The maximum number of features that is requested. The default is 1000. + apif : str, optional + The output format of the api. The default is 'json'. Returns ------- - gdf : GeoPandas GeoDataFrame or requests.models.Response + gdf : GeoPandas GeoDataFrame A GeoDataFrame containing all geometries and properties. """ if isinstance(layer, (list, tuple, np.ndarray)): # recursively call this function for all layers - gdfs = [get_brt(extent=extent, layer=l, crs=crs, pagesize=pagesize) for l in layer] + gdfs = [get_brt(extent=extent, layer=l, crs=crs, limit=limit, apif=apif) for l in layer] return pd.concat(gdfs) api_url = "https://api.pdok.nl" url = f"{api_url}/brt/top10nl/ogc/v1/collections/{layer}/items?" params = {'bbox':f'{extent[0]},{extent[2]},{extent[1]},{extent[3]}', - 'f':'json', - 'limit':pagesize} + 'f':apif, + 'limit':limit} if crs==28992: params['crs'] = "http://www.opengis.net/def/crs/EPSG/0/28992" @@ -67,4 +70,8 @@ def get_brt( gdf = gpd.GeoDataFrame.from_features(response.json()['features']) + if gdf.shape[0] == limit: + msg = f'the number of features in your extent is probably higher than {limit}, consider increasing the "limit" argument in "get_brt"' + logger.warning(msg) + return gdf \ No newline at end of file From d10d145b832335240d8e273001c55467b8d62fc2 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:20:28 +0100 Subject: [PATCH 05/17] add test --- tests/test_030_brt.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 tests/test_030_brt.py diff --git a/tests/test_030_brt.py b/tests/test_030_brt.py new file mode 100644 index 00000000..a949b1a2 --- /dev/null +++ b/tests/test_030_brt.py @@ -0,0 +1,5 @@ +import nlmod + +def test_brt(): + extent = [119900, 120000, 440000, 440100] + brt = nlmod.read.brt.get_brt_v2(extent) \ No newline at end of file From 8fc947f1d5382e08d7ddfb80a6267a171ddda815 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:24:57 +0100 Subject: [PATCH 06/17] Update test --- tests/test_030_brt.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_030_brt.py b/tests/test_030_brt.py index a949b1a2..3fc8400f 100644 --- a/tests/test_030_brt.py +++ b/tests/test_030_brt.py @@ -1,5 +1,8 @@ import nlmod +import geopandas as gpd def test_brt(): extent = [119900, 120000, 440000, 440100] - brt = nlmod.read.brt.get_brt_v2(extent) \ No newline at end of file + brt = nlmod.read.brt.get_brt(extent) + + assert isinstance(brt, gpd.GeoDataFrame) \ No newline at end of file From d09cbe6126284afd46b0f655d60310e0bc8d626a Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:35:58 +0100 Subject: [PATCH 07/17] remove obsolete dependencies --- nlmod/read/brt.py | 7 ------- 1 file changed, 7 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 11b7e845..4d0b319d 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -1,4 +1,3 @@ -import json import logging import requests @@ -6,12 +5,6 @@ import numpy as np import pandas as pd -from io import BytesIO -from shapely.geometry import mapping, shape, Polygon -from zipfile import ZipFile - -from ..util import extent_to_polygon - logger = logging.getLogger(__name__) def get_brt( From 71512c0b0520fe745866f1393bbb8e067cd9d443 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:48:11 +0100 Subject: [PATCH 08/17] ruff --- nlmod/read/brt.py | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 4d0b319d..eccd4f45 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -1,19 +1,14 @@ import logging -import requests import geopandas as gpd import numpy as np import pandas as pd +import requests logger = logging.getLogger(__name__) -def get_brt( - extent, - layer="waterdeel_lijn", - crs=28992, - limit=1000, - apif='json' -): + +def get_brt(extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json"): """ Get geometries within an extent from the Basis Registratie Topografie (BRT). Some useful links: @@ -39,32 +34,38 @@ def get_brt( A GeoDataFrame containing all geometries and properties. """ - if isinstance(layer, (list, tuple, np.ndarray)): # recursively call this function for all layers - gdfs = [get_brt(extent=extent, layer=l, crs=crs, limit=limit, apif=apif) for l in layer] + gdfs = [ + get_brt(extent=extent, layer=l, crs=crs, limit=limit, apif=apif) + for l in layer + ] return pd.concat(gdfs) api_url = "https://api.pdok.nl" url = f"{api_url}/brt/top10nl/ogc/v1/collections/{layer}/items?" - params = {'bbox':f'{extent[0]},{extent[2]},{extent[1]},{extent[3]}', - 'f':apif, - 'limit':limit} + params = { + "bbox": f"{extent[0]},{extent[2]},{extent[1]},{extent[3]}", + "f": apif, + "limit": limit, + } - if crs==28992: - params['crs'] = "http://www.opengis.net/def/crs/EPSG/0/28992" - params['bbox-crs'] = "http://www.opengis.net/def/crs/EPSG/0/28992" - elif crs!=4326: - raise ValueError('invalid crs, please choose between 28992 (RD) or 4326 (WGS84)') + if crs == 28992: + params["crs"] = "http://www.opengis.net/def/crs/EPSG/0/28992" + params["bbox-crs"] = "http://www.opengis.net/def/crs/EPSG/0/28992" + elif crs != 4326: + raise ValueError( + "invalid crs, please choose between 28992 (RD) or 4326 (WGS84)" + ) response = requests.get(url, params=params) response.raise_for_status() - gdf = gpd.GeoDataFrame.from_features(response.json()['features']) + gdf = gpd.GeoDataFrame.from_features(response.json()["features"]) if gdf.shape[0] == limit: msg = f'the number of features in your extent is probably higher than {limit}, consider increasing the "limit" argument in "get_brt"' logger.warning(msg) - return gdf \ No newline at end of file + return gdf From 8ca87f5b2aa2781ce3f476c6d9888edff54b585d Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Wed, 12 Feb 2025 12:54:46 +0100 Subject: [PATCH 09/17] codacy --- nlmod/read/brt.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index eccd4f45..45f4341e 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -8,7 +8,7 @@ logger = logging.getLogger(__name__) -def get_brt(extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json"): +def get_brt(extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json", timeout=1200): """ Get geometries within an extent from the Basis Registratie Topografie (BRT). Some useful links: @@ -27,6 +27,9 @@ def get_brt(extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json"): The maximum number of features that is requested. The default is 1000. apif : str, optional The output format of the api. The default is 'json'. + timeout: int optional + The amount of time in seconds to wait for the server to send data before giving + up. The default is 1200 (20 minutes). Returns ------- @@ -37,8 +40,8 @@ def get_brt(extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json"): if isinstance(layer, (list, tuple, np.ndarray)): # recursively call this function for all layers gdfs = [ - get_brt(extent=extent, layer=l, crs=crs, limit=limit, apif=apif) - for l in layer + get_brt(extent=extent, layer=lay, crs=crs, limit=limit, apif=apif, timeout=timeout) + for lay in layer ] return pd.concat(gdfs) @@ -59,7 +62,7 @@ def get_brt(extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json"): "invalid crs, please choose between 28992 (RD) or 4326 (WGS84)" ) - response = requests.get(url, params=params) + response = requests.get(url, params=params, timeout=timeout) response.raise_for_status() gdf = gpd.GeoDataFrame.from_features(response.json()["features"]) From 865fb17d70edc491918352ef83bc25e47bb83ef0 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Fri, 14 Feb 2025 09:18:23 +0100 Subject: [PATCH 10/17] different way of retrieving BRT, similar to BGT --- nlmod/read/brt.py | 278 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 278 insertions(+) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 45f4341e..4fcfd0cc 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -1,10 +1,19 @@ +import json import logging +import time import geopandas as gpd import numpy as np import pandas as pd import requests +from io import BytesIO +from shapely.geometry import mapping, shape, Polygon, LineString +from xml.etree import ElementTree +from zipfile import ZipFile + +from ..util import extent_to_polygon + logger = logging.getLogger(__name__) @@ -72,3 +81,272 @@ def get_brt(extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json", logger.warning(msg) return gdf + + + +def get_brt_v2( + extent, + layer="waterdeel", + cut_by_extent=True, + make_valid=False, + fname=None, + remove_expired=True, + add_bronhouder_names=True, + timeout=1200, +): + """Get geometries within an extent or polygon from the Basis Registratie + Topografie (BRT). Useful links: + https://api.pdok.nl/brt/top10nl/download/v1_0/ui + Parameters + ---------- + extent : list or tuple of length 4 or shapely Polygon + The extent (xmin, xmax, ymin, ymax) or polygon for which shapes are + requested. + layer : string or list of strings, optional + The layer(s) for which shapes are requested. When layer is "all", all layers are + requested. The default is "waterdeel". + cut_by_extent : bool, optional + Only return the intersection with the extent if True. The default is + True + make_valid : bool, optional + Make geometries valid by appying a buffer of 0 m when True. The default is + False. + fname : string, optional + Save the zipfile that is received by the request to file. The default + is None, which does not save anything to file. + remove_expired: bool, optional + Remove expired items (that contain a value for 'eindRegistratie') when True. The + default is True. + add_bronhouder_names: bool, optional + Add bronhouder in a column called 'bronhouder_name. names when True. The default + is True. + timeout: int optional + The amount of time in seconds to wait for the server to send data before giving + up. The default is 1200 (20 minutes). + Returns + ------- + gdf : GeoPandas GeoDataFrame or dict of GeoPandas GeoDataFrame + A GeoDataFrame (when only one layer is requested) or a dict of GeoDataFrames + containing all geometries and properties. + """ + + if layer == "all": + layer = get_brt_layers() + if isinstance(layer, str): + layer = [layer] + + api_url = "https://api.pdok.nl" + url = f"{api_url}/brt/top10nl/download/v1_0/full/custom" + body = {"format": "gml", "featuretypes": layer} + + if isinstance(extent, Polygon): + polygon = extent + else: + polygon = extent_to_polygon(extent) + + body["geofilter"] = polygon.wkt + + headers = {"content-type": "application/json"} + + response = requests.post( + url, headers=headers, data=json.dumps(body), timeout=timeout + ) # 20 minutes + + # check api-status, if completed, download + if response.status_code in range(200, 300): + running = True + href = response.json()["_links"]["status"]["href"] + url = f"{api_url}{href}" + + while running: + response = requests.get(url, timeout=timeout) + if response.status_code in range(200, 300): + status = response.json()["status"] + if status == "COMPLETED": + running = False + else: + time.sleep(2) + else: + running = False + else: + msg = f"Download of brt-data failed: {response.text}" + raise (Exception(msg)) + + href = response.json()["_links"]["download"]["href"] + response = requests.get(f"{api_url}{href}", timeout=timeout) + + if fname is not None: + with open(fname, "wb") as file: + file.write(response.content) + + # raise NotImplementedError('this part is not yet implemented please use "read_brt"') + + zipfile = BytesIO(response.content) + gdf_dic = read_brt_zipfile( + zipfile, + cut_by_extent=cut_by_extent, + make_valid=make_valid, + extent=polygon, + remove_expired=remove_expired, + add_bronhouder_names=add_bronhouder_names, + ) + + if len(layer) == 1: + gdf_dic = gdf_dic[layer[0]] + + return gdf_dic + + +def get_brt_layers(timeout=1200): + """ + Get the layers in the Basis Registratie Topografie (BRT) + Parameters + ---------- + timeout: int optional + The amount of time in seconds to wait for the server to send data before giving + up. The default is 1200 (20 minutes). + Returns + ------- + list + A list with the layer names. + """ + url = "https://api.pdok.nl/brt/top10nl/ogc/v1/collections/waterdeel_lijn" + resp = requests.get(url, timeout=timeout) + data = resp.json() + return [x["featuretype"] for x in data["timeliness"]] + + +def read_brt_zipfile( + fname, + files=None, + cut_by_extent=True, + make_valid=False, + extent=None, + remove_expired=True, + add_bronhouder_names=True, +): + """Read data from a zipfile that was downloaded using get_bgt(). + Parameters + ---------- + fname : string + The filename of the zip-file containing the BGT-data. + files : string of list of strings, optional + The files to read from the zipfile. Read all files when files is None. The + default is None. + cut_by_extent : bool, optional + Cut the geoemetries by the supplied extent. When no extent is supplied, + cut_by_extent is set to False. The default is True. + make_valid : bool, optional + Make geometries valid by appying a buffer of 0 m when True. THe defaults is + False. + extent : list or tuple of length 4 or shapely Polygon + The extent (xmin, xmax, ymin, ymax) or polygon by which the geometries are + clipped. Only used when cut_by_extent is True. The defult is None. + remove_expired: bool, optional + Remove expired items (that contain a value for 'eindRegistratie') when True. The + default is True. + + Returns + ------- + gdf : dict of GeoPandas GeoDataFrame + A dict of GeoDataFrames containing all geometries and properties. + """ + zf = ZipFile(fname) + gdf_dic = {} + if files is None: + files = zf.namelist() + elif isinstance(files, str): + files = [files] + if extent is None: + cut_by_extent = False + else: + if isinstance(extent, Polygon): + polygon = extent + else: + polygon = extent_to_polygon(extent) + for file in files: + key = file[8:-4] + gdf_dic[key] = read_brt_gml(zf.open(file)) + + if remove_expired and gdf_dic[key] is not None and "eindRegistratie" in gdf_dic[key]: + # remove double features + # by removing features with an eindRegistratie + gdf_dic[key] = gdf_dic[key][gdf_dic[key]["eindRegistratie"].isna()] + + if make_valid and isinstance(gdf_dic[key], gpd.GeoDataFrame): + gdf_dic[key].geometry = gdf_dic[key].geometry.buffer(0.0) + + if cut_by_extent and isinstance(gdf_dic[key], gpd.GeoDataFrame): + gdf_dic[key].geometry = gdf_dic[key].intersection(polygon) + gdf_dic[key] = gdf_dic[key][~gdf_dic[key].is_empty] + + return gdf_dic + + + +def read_brt_gml(fname, crs="epsg:28992"): + def get_xy(text): + xy = [float(val) for val in text.split()] + xy = np.array(xy).reshape(int(len(xy) / 2), 2) + return xy + + def get_ring_xy(exterior): + assert len(exterior) == 1 + if exterior[0].tag.rpartition('}')[-1] == "LinearRing": + lr = exterior.find("gml:LinearRing",ns) + xy = get_xy(lr.find("gml:posList",ns).text) + else: + raise Exception(f"Unknown exterior type: {exterior[0].tag}") + return xy + + def read_polygon(polygon): + exterior = polygon.find("gml:exterior",ns) + shell = get_ring_xy(exterior) + holes = [] + for interior in polygon.findall("gml:interior",ns): + holes.append(get_ring_xy(interior)) + return shell, holes + + def read_linestring(linestring): + return get_xy(linestring.find("gml:posList",ns).text) + + ns = {'top10nl': "http://register.geostandaarden.nl/gmlapplicatieschema/top10nl/1.2.0", + 'brt': "http://register.geostandaarden.nl/gmlapplicatieschema/brt-algemeen/1.2.0", + 'gml': "http://www.opengis.net/gml/3.2"} + tree = ElementTree.parse(fname) + + data = [] + for com in tree.findall("top10nl:FeatureMember", ns): + assert len(com) == 1 + bp = com[0] + d = {} + for key, name in bp.attrib.items(): + d[key.rpartition('}')[-1]] = name + + for child in bp: + tag = child.tag.rpartition('}')[-1] + if tag == 'geometrie': + geom = child.find('brt:BRTVlakLijnOfPunt',ns) + assert len(geom) == 1 + for child in geom: + tag = child.tag.rpartition('}')[-1] + if tag =='lijnGeometrie': + d['geometry'] = LineString(read_linestring(child[0])) + elif tag =='vlakGeometrie': + d['geometry'] = Polygon(*read_polygon(child[0])) + else: + raise RuntimeError('unexpected geometry type') + elif tag == 'identificatie': + loc_id = child.find(f"brt:NEN3610ID",ns).find(f"brt:lokaalID",ns) + d['lokaalID'] = loc_id.text + else: + d[tag] = child.text + data.append(d) + + if len(data) > 0: + return gpd.GeoDataFrame(data, geometry='geometry', crs=crs) + else: + return None + +# endpoint = ["https://api.pdok.nl/brt/top10nl/download/v1_0/full/custom", +# "https://api.pdok.nl/brt/top10nl/ogc/v1"] \ No newline at end of file From 63b1bba3eb747db140707cfe3191dfc8482521b1 Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Mon, 17 Feb 2025 13:42:47 +0100 Subject: [PATCH 11/17] Make sure all geometries can be read --- nlmod/read/brt.py | 479 ++++++++++++++++++++++++++++-------------- tests/test_030_brt.py | 4 +- 2 files changed, 320 insertions(+), 163 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 4fcfd0cc..7f02637f 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -8,7 +8,7 @@ import requests from io import BytesIO -from shapely.geometry import mapping, shape, Polygon, LineString +from shapely.geometry import Point, Polygon, LineString, MultiPolygon from xml.etree import ElementTree from zipfile import ZipFile @@ -16,87 +16,23 @@ logger = logging.getLogger(__name__) +NS = {'top10nl': "http://register.geostandaarden.nl/gmlapplicatieschema/top10nl/1.2.0", + 'brt': "http://register.geostandaarden.nl/gmlapplicatieschema/brt-algemeen/1.2.0", + 'gml': "http://www.opengis.net/gml/3.2"} -def get_brt(extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json", timeout=1200): - """ - Get geometries within an extent from the Basis Registratie Topografie (BRT). - Some useful links: - https://geoforum.nl/t/pdok-lanceert-de-brt-top10nl-in-ogc-api-s-als-demo/9821/5 - https://api.pdok.nl/brt/top10nl/ogc/v1/api - - Parameters - ---------- - extent : list or tuple of int or float - The extent (xmin, xmax, ymin, ymax) for which shapes are requested. - layer : string, optional - The layer for which shapes are requested. The default is 'waterdeel_lijn'. - crs : int, optional - The coordinate reference system. The default is 28992. - limit : int, optional - The maximum number of features that is requested. The default is 1000. - apif : str, optional - The output format of the api. The default is 'json'. - timeout: int optional - The amount of time in seconds to wait for the server to send data before giving - up. The default is 1200 (20 minutes). - - Returns - ------- - gdf : GeoPandas GeoDataFrame - A GeoDataFrame containing all geometries and properties. - - """ - if isinstance(layer, (list, tuple, np.ndarray)): - # recursively call this function for all layers - gdfs = [ - get_brt(extent=extent, layer=lay, crs=crs, limit=limit, apif=apif, timeout=timeout) - for lay in layer - ] - return pd.concat(gdfs) - - api_url = "https://api.pdok.nl" - url = f"{api_url}/brt/top10nl/ogc/v1/collections/{layer}/items?" - - params = { - "bbox": f"{extent[0]},{extent[2]},{extent[1]},{extent[3]}", - "f": apif, - "limit": limit, - } - - if crs == 28992: - params["crs"] = "http://www.opengis.net/def/crs/EPSG/0/28992" - params["bbox-crs"] = "http://www.opengis.net/def/crs/EPSG/0/28992" - elif crs != 4326: - raise ValueError( - "invalid crs, please choose between 28992 (RD) or 4326 (WGS84)" - ) - - response = requests.get(url, params=params, timeout=timeout) - response.raise_for_status() - - gdf = gpd.GeoDataFrame.from_features(response.json()["features"]) - - if gdf.shape[0] == limit: - msg = f'the number of features in your extent is probably higher than {limit}, consider increasing the "limit" argument in "get_brt"' - logger.warning(msg) - - return gdf - - - -def get_brt_v2( +def get_brt( extent, layer="waterdeel", cut_by_extent=True, make_valid=False, fname=None, - remove_expired=True, - add_bronhouder_names=True, - timeout=1200, + geometry=None, + timeout=1200 ): """Get geometries within an extent or polygon from the Basis Registratie Topografie (BRT). Useful links: https://api.pdok.nl/brt/top10nl/download/v1_0/ui + Parameters ---------- extent : list or tuple of length 4 or shapely Polygon @@ -114,15 +50,16 @@ def get_brt_v2( fname : string, optional Save the zipfile that is received by the request to file. The default is None, which does not save anything to file. - remove_expired: bool, optional - Remove expired items (that contain a value for 'eindRegistratie') when True. The - default is True. - add_bronhouder_names: bool, optional - Add bronhouder in a column called 'bronhouder_name. names when True. The default - is True. + geometry: string, optional + When geometry is specified, this attribute is used as the geometry of the + resulting GeoDataFrame. Some layers have multiple geometry-attributes. An + example is the layer 'wegdeel', where each feature (point) has a hartGeometry and a + hoofdGeometry. When geometry is None, the last tag in the xml ending + with the word "geometrie" or "Geometrie" is used as the geometry. The default is None. timeout: int optional The amount of time in seconds to wait for the server to send data before giving up. The default is 1200 (20 minutes). + Returns ------- gdf : GeoPandas GeoDataFrame or dict of GeoPandas GeoDataFrame @@ -130,7 +67,7 @@ def get_brt_v2( containing all geometries and properties. """ - if layer == "all": + if layer.lower() == "all": layer = get_brt_layers() if isinstance(layer, str): layer = [layer] @@ -150,7 +87,7 @@ def get_brt_v2( response = requests.post( url, headers=headers, data=json.dumps(body), timeout=timeout - ) # 20 minutes + ) # check api-status, if completed, download if response.status_code in range(200, 300): @@ -179,16 +116,13 @@ def get_brt_v2( with open(fname, "wb") as file: file.write(response.content) - # raise NotImplementedError('this part is not yet implemented please use "read_brt"') - zipfile = BytesIO(response.content) gdf_dic = read_brt_zipfile( zipfile, + geometry=geometry, cut_by_extent=cut_by_extent, make_valid=make_valid, extent=polygon, - remove_expired=remove_expired, - add_bronhouder_names=add_bronhouder_names, ) if len(layer) == 1: @@ -205,12 +139,13 @@ def get_brt_layers(timeout=1200): timeout: int optional The amount of time in seconds to wait for the server to send data before giving up. The default is 1200 (20 minutes). + Returns ------- list A list with the layer names. """ - url = "https://api.pdok.nl/brt/top10nl/ogc/v1/collections/waterdeel_lijn" + url = "https://api.pdok.nl/brt/top10nl/download/v1_0/dataset" resp = requests.get(url, timeout=timeout) data = resp.json() return [x["featuretype"] for x in data["timeliness"]] @@ -218,18 +153,23 @@ def get_brt_layers(timeout=1200): def read_brt_zipfile( fname, + geometry=None, files=None, cut_by_extent=True, make_valid=False, - extent=None, - remove_expired=True, - add_bronhouder_names=True, + extent=None ): """Read data from a zipfile that was downloaded using get_bgt(). Parameters ---------- fname : string The filename of the zip-file containing the BGT-data. + geometry: string, optional + When geometry is specified, this attribute is used as the geometry of the + resulting GeoDataFrame. Some layers have multiple geometry-attributes. An + example is the layer 'wegdeel', where each feature (point) has a hartGeometry and a + hoofdGeometry. When geometry is None, the last tag in the xml ending + with the word "geometrie" of "Geometrie" is used as the geometry. The default is None. files : string of list of strings, optional The files to read from the zipfile. Read all files when files is None. The default is None. @@ -242,13 +182,10 @@ def read_brt_zipfile( extent : list or tuple of length 4 or shapely Polygon The extent (xmin, xmax, ymin, ymax) or polygon by which the geometries are clipped. Only used when cut_by_extent is True. The defult is None. - remove_expired: bool, optional - Remove expired items (that contain a value for 'eindRegistratie') when True. The - default is True. Returns ------- - gdf : dict of GeoPandas GeoDataFrame + gdf_dic : dict of GeoPandas GeoDataFrame A dict of GeoDataFrames containing all geometries and properties. """ zf = ZipFile(fname) @@ -265,88 +202,306 @@ def read_brt_zipfile( else: polygon = extent_to_polygon(extent) for file in files: - key = file[8:-4] - gdf_dic[key] = read_brt_gml(zf.open(file)) - - if remove_expired and gdf_dic[key] is not None and "eindRegistratie" in gdf_dic[key]: - # remove double features - # by removing features with an eindRegistratie - gdf_dic[key] = gdf_dic[key][gdf_dic[key]["eindRegistratie"].isna()] + lay = file[8:-4] + if lay == 'relief': + logger.warning('Cannot read relief data, not implemented yet') + continue + + logger.debug(f'reading brt file {file}') + gdf_dic[lay] = read_brt_gml(zf.open(file), lay=lay, geometry=geometry) - if make_valid and isinstance(gdf_dic[key], gpd.GeoDataFrame): - gdf_dic[key].geometry = gdf_dic[key].geometry.buffer(0.0) + if make_valid and isinstance(gdf_dic[lay], gpd.GeoDataFrame): + gdf_dic[lay].geometry = gdf_dic[lay].geometry.buffer(0.0) - if cut_by_extent and isinstance(gdf_dic[key], gpd.GeoDataFrame): - gdf_dic[key].geometry = gdf_dic[key].intersection(polygon) - gdf_dic[key] = gdf_dic[key][~gdf_dic[key].is_empty] + if cut_by_extent and isinstance(gdf_dic[lay], gpd.GeoDataFrame): + gdf_dic[lay].geometry = gdf_dic[lay].intersection(polygon) + gdf_dic[lay] = gdf_dic[lay][~gdf_dic[lay].is_empty] return gdf_dic -def read_brt_gml(fname, crs="epsg:28992"): - def get_xy(text): - xy = [float(val) for val in text.split()] - xy = np.array(xy).reshape(int(len(xy) / 2), 2) - return xy +def read_brt_gml(fname, lay='waterdeel', geometry=None, crs="epsg:28992"): + """read an xml file with features from the BRT - def get_ring_xy(exterior): - assert len(exterior) == 1 - if exterior[0].tag.rpartition('}')[-1] == "LinearRing": - lr = exterior.find("gml:LinearRing",ns) - xy = get_xy(lr.find("gml:posList",ns).text) - else: - raise Exception(f"Unknown exterior type: {exterior[0].tag}") - return xy - - def read_polygon(polygon): - exterior = polygon.find("gml:exterior",ns) - shell = get_ring_xy(exterior) - holes = [] - for interior in polygon.findall("gml:interior",ns): - holes.append(get_ring_xy(interior)) - return shell, holes - - def read_linestring(linestring): - return get_xy(linestring.find("gml:posList",ns).text) - - ns = {'top10nl': "http://register.geostandaarden.nl/gmlapplicatieschema/top10nl/1.2.0", - 'brt': "http://register.geostandaarden.nl/gmlapplicatieschema/brt-algemeen/1.2.0", - 'gml': "http://www.opengis.net/gml/3.2"} - tree = ElementTree.parse(fname) + Parameters + ---------- + fname : str + filename + lay : str, optional + The layer to read. The default is "waterdeel". + geometry: string, optional + When geometry is specified, this attribute is used as the geometry of the + resulting GeoDataFrame. Some layers have multiple geometry-attributes. An + example is the layer 'wegdeel', where each feature (point) has a hartGeometry and a + hoofdGeometry. When geometry is None, the last tag in the xml ending + with the word "geometrie" of "Geometrie" is used as the geometry. The default is None. + crs : str, optional + coordinate reference system, by default "epsg:28992" - data = [] - for com in tree.findall("top10nl:FeatureMember", ns): - assert len(com) == 1 - bp = com[0] - d = {} - for key, name in bp.attrib.items(): - d[key.rpartition('}')[-1]] = name - - for child in bp: - tag = child.tag.rpartition('}')[-1] - if tag == 'geometrie': - geom = child.find('brt:BRTVlakLijnOfPunt',ns) - assert len(geom) == 1 - for child in geom: - tag = child.tag.rpartition('}')[-1] - if tag =='lijnGeometrie': - d['geometry'] = LineString(read_linestring(child[0])) - elif tag =='vlakGeometrie': - d['geometry'] = Polygon(*read_polygon(child[0])) - else: - raise RuntimeError('unexpected geometry type') - elif tag == 'identificatie': - loc_id = child.find(f"brt:NEN3610ID",ns).find(f"brt:lokaalID",ns) - d['lokaalID'] = loc_id.text - else: - d[tag] = child.text - data.append(d) + Returns + ------- + GeoDataFrame or None + with BRT feature data + """ + + tree = ElementTree.parse(fname) + data = [_read_single_brt_feature(com, lay=lay, geometry=geometry) for com in tree.findall("top10nl:FeatureMember",NS)] + if len(data) > 0: return gpd.GeoDataFrame(data, geometry='geometry', crs=crs) else: return None -# endpoint = ["https://api.pdok.nl/brt/top10nl/download/v1_0/full/custom", -# "https://api.pdok.nl/brt/top10nl/ogc/v1"] \ No newline at end of file +def _read_single_brt_feature(com, lay='waterdeel', geometry=None): + """read a single feature from an xml with multiple features + + Parameters + ---------- + com : Element + xml reference to a feature of the BRT + lay : str, optional + The layer to read. The default is "waterdeel". + geometry: string, optional + When geometry is specified, this attribute is used as the geometry of the + resulting GeoDataFrame. Some layers have multiple geometry-attributes. An + example is the layer 'wegdeel', where each feature (point) has a hartGeometry and a + hoofdGeometry. When geometry is None, the last tag in the xml ending + with the word "geometrie" of "Geometrie" is used as the geometry. The default is None. + + + Returns + ------- + dict + dictionary with data from a single feature + + Raises + ------ + RuntimeError + if the feature is not a 'lijn' or 'vlak' geometry + """ + if geometry is None: + geometry='geometrie' + else: + geometry = geometry.lower() # make sure geometry is lower case + + assert len(com) == 1 + bp = com[0] + d = {} + for key, name in bp.attrib.items(): + d[key.rpartition('}')[-1]] = name + + for child in bp: + tag = child.tag.rpartition('}')[-1] + if tag.lower().endswith('geometrie'): + geom = child[0] + d['geometry'] = _read_geometry(geom) + elif tag == 'geometrieVlak' and lay=='terrein': + d['geometry'] = _read_geometry(child) + elif tag == 'identificatie': + loc_id = child.find("brt:NEN3610ID",NS).find("brt:lokaalID",NS) + d['lokaalID'] = loc_id.text + else: + d[tag] = child.text + + return d + +def _read_geometry(geom): + """read geometry from a geometry xml tag + + Parameters + ---------- + geom : Element + xml reference to a geometry tag in the BRT + + Returns + ------- + shapely.Geometry or None + """ + + assert len(geom) == 1 + feature_geom = geom[0] + + geom_tag = feature_geom.tag.rpartition('}')[-1] + if geom_tag =='lijnGeometrie': + return LineString(_read_linestring(feature_geom[0])) + elif geom_tag == 'puntGeometrie': + return Point(_read_point(feature_geom[0])) + elif geom_tag =='vlakGeometrie': + return Polygon(*_read_polygon(feature_geom[0])) + elif geom_tag =='hartGeometrie': + return Polygon(*_read_polygon(feature_geom[0])) + elif geom_tag =='Polygon': + return Polygon(*_read_polygon(feature_geom)) + elif geom_tag == 'multivlakGeometrie': + ms = feature_geom.find("gml:MultiSurface",NS) + polygons = [] + for sm in ms: + assert len(sm) == 1 + polygon = sm.find("gml:Polygon",NS) + polygons.append(_read_polygon(polygon)) + return MultiPolygon(polygons) + else: + logger.warning(f'cannot read geometry type {geom_tag}, skipping these features') + return None + +def _get_xy(text): + """get x and y coordinates from a tag text + + Parameters + ---------- + text : str + x and y values in str format + + Returns + ------- + np.ndarray + x and y coordinates + """ + xy = [float(val) for val in text.split()] + xy = np.array(xy).reshape(int(len(xy) / 2), 2) + return xy + +def _get_ring_xy(exterior): + """get x and y coordinates for a LinearRing + + Parameters + ---------- + exterior : Element + xml reference to an exterior tag of the BRT + + Returns + ------- + np.ndarray + x and y coordinates + + Raises + ------ + Exception + for unknown exterior types + """ + assert len(exterior) == 1 + if exterior[0].tag.rpartition('}')[-1] == "LinearRing": + lr = exterior.find("gml:LinearRing",NS) + xy = _get_xy(lr.find("gml:posList",NS).text) + else: + raise Exception(f"Unknown exterior type: {exterior[0].tag}") + return xy + +def _read_polygon(polygon): + """read polygon geometry from an xml tag + + Parameters + ---------- + polygon : Element + xml reference to a polygon tag. + + Returns + ------- + shell, holes + polygon definition + """ + exterior = polygon.find("gml:exterior",NS) + shell = _get_ring_xy(exterior) + holes = [] + for interior in polygon.findall("gml:interior",NS): + holes.append(_get_ring_xy(interior)) + return shell, holes + +def _read_linestring(linestring): + """read linestring geometry from an xml tag + + Parameters + ---------- + linestring : Element + xml reference to a linestring tag. + + Returns + ------- + np.ndarray + x and y coordinates + """ + return _get_xy(linestring.find("gml:posList",NS).text) + +def _read_point(point): + """read point geometry from an xml tag + + Parameters + ---------- + point : Element + xml reference to a point tag. + + Returns + ------- + list + x and y coordinates + """ + xy = [float(x) for x in point.find("gml:pos",NS).text.split()] + return xy + +def get_brt_ogc(extent, layer="waterdeel_lijn", crs=28992, limit=1000, + apif="json", timeout=1200): + """ + Get geometries within an extent from the Basis Registratie Topografie (BRT). + Some useful links: + https://geoforum.nl/t/pdok-lanceert-de-brt-top10nl-in-ogc-api-s-als-demo/9821/5 + https://api.pdok.nl/brt/top10nl/ogc/v1/api + + Parameters + ---------- + extent : list or tuple of int or float + The extent (xmin, xmax, ymin, ymax) for which shapes are requested. + layer : string, optional + The layer for which shapes are requested. The default is 'waterdeel_lijn'. + crs : int, optional + The coordinate reference system. The default is 28992. + limit : int, optional + The maximum number of features that is requested. The default is 1000. This api does not seem to work properly when there are more than 1000 features in your extent. + apif : str, optional + The output format of the api. The default is 'json'. + timeout: int optional + The amount of time in seconds to wait for the server to send data before giving + up. The default is 1200 (20 minutes). + + Returns + ------- + gdf : GeoPandas GeoDataFrame + A GeoDataFrame containing all geometries and properties. + + """ + if isinstance(layer, (list, tuple, np.ndarray)): + # recursively call this function for all layers + gdfs = [ + get_brt_ogc(extent=extent, layer=lay, crs=crs, limit=limit, apif=apif, timeout=timeout) + for lay in layer + ] + return pd.concat(gdfs) + + api_url = "https://api.pdok.nl" + url = f"{api_url}/brt/top10nl/ogc/v1/collections/{layer}/items?" + + params = { + "bbox": f"{extent[0]},{extent[2]},{extent[1]},{extent[3]}", + "f": apif, + "limit": limit, + } + + if crs == 28992: + params["crs"] = "http://www.opengis.net/def/crs/EPSG/0/28992" + params["bbox-crs"] = "http://www.opengis.net/def/crs/EPSG/0/28992" + elif crs != 4326: + raise ValueError( + "invalid crs, please choose between 28992 (RD) and 4326 (WGS84)" + ) + + response = requests.get(url, params=params, timeout=timeout) + response.raise_for_status() + + gdf = gpd.GeoDataFrame.from_features(response.json()["features"]) + + if gdf.shape[0] == limit: + msg = f'the number of features in your extent is probably higher than {limit}, consider increasing the "limit" argument in "get_brt"' + logger.error(msg) + + return gdf \ No newline at end of file diff --git a/tests/test_030_brt.py b/tests/test_030_brt.py index 3fc8400f..8f0916f4 100644 --- a/tests/test_030_brt.py +++ b/tests/test_030_brt.py @@ -2,7 +2,9 @@ import geopandas as gpd def test_brt(): + + nlmod.util.get_color_logger("DEBUG") extent = [119900, 120000, 440000, 440100] - brt = nlmod.read.brt.get_brt(extent) + brt = nlmod.read.brt.get_brt(extent, layer='waterdeel') assert isinstance(brt, gpd.GeoDataFrame) \ No newline at end of file From f603eed89ac1abe01ad4cc8ecf1760754c41a54d Mon Sep 17 00:00:00 2001 From: Onno Ebbens Date: Mon, 17 Feb 2025 13:52:04 +0100 Subject: [PATCH 12/17] ruff --- nlmod/read/brt.py | 208 ++++++++++++++++++++++++++-------------------- 1 file changed, 116 insertions(+), 92 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 7f02637f..cddfc8a9 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -1,24 +1,26 @@ import json import logging import time +from io import BytesIO +from xml.etree import ElementTree +from zipfile import ZipFile import geopandas as gpd import numpy as np import pandas as pd import requests - -from io import BytesIO -from shapely.geometry import Point, Polygon, LineString, MultiPolygon -from xml.etree import ElementTree -from zipfile import ZipFile +from shapely.geometry import LineString, MultiPolygon, Point, Polygon from ..util import extent_to_polygon logger = logging.getLogger(__name__) -NS = {'top10nl': "http://register.geostandaarden.nl/gmlapplicatieschema/top10nl/1.2.0", - 'brt': "http://register.geostandaarden.nl/gmlapplicatieschema/brt-algemeen/1.2.0", - 'gml': "http://www.opengis.net/gml/3.2"} +NS = { + "top10nl": "http://register.geostandaarden.nl/gmlapplicatieschema/top10nl/1.2.0", + "brt": "http://register.geostandaarden.nl/gmlapplicatieschema/brt-algemeen/1.2.0", + "gml": "http://www.opengis.net/gml/3.2", +} + def get_brt( extent, @@ -27,10 +29,11 @@ def get_brt( make_valid=False, fname=None, geometry=None, - timeout=1200 + timeout=1200, ): - """Get geometries within an extent or polygon from the Basis Registratie - Topografie (BRT). Useful links: + """Get geometries within an extent/polygon from the Basis Registratie Topografie. + + Useful links: https://api.pdok.nl/brt/top10nl/download/v1_0/ui Parameters @@ -53,9 +56,10 @@ def get_brt( geometry: string, optional When geometry is specified, this attribute is used as the geometry of the resulting GeoDataFrame. Some layers have multiple geometry-attributes. An - example is the layer 'wegdeel', where each feature (point) has a hartGeometry and a - hoofdGeometry. When geometry is None, the last tag in the xml ending - with the word "geometrie" or "Geometrie" is used as the geometry. The default is None. + example is the layer 'wegdeel', where each feature (point) has a hartGeometry + and a hoofdGeometry. When geometry is None, the last tag in the xml ending + with the word "geometrie" of "Geometrie" is used as the geometry. The default + is None. timeout: int optional The amount of time in seconds to wait for the server to send data before giving up. The default is 1200 (20 minutes). @@ -66,7 +70,6 @@ def get_brt( A GeoDataFrame (when only one layer is requested) or a dict of GeoDataFrames containing all geometries and properties. """ - if layer.lower() == "all": layer = get_brt_layers() if isinstance(layer, str): @@ -87,7 +90,7 @@ def get_brt( response = requests.post( url, headers=headers, data=json.dumps(body), timeout=timeout - ) + ) # check api-status, if completed, download if response.status_code in range(200, 300): @@ -133,7 +136,8 @@ def get_brt( def get_brt_layers(timeout=1200): """ - Get the layers in the Basis Registratie Topografie (BRT) + Get the layers in the Basis Registratie Topografie (BRT). + Parameters ---------- timeout: int optional @@ -152,14 +156,10 @@ def get_brt_layers(timeout=1200): def read_brt_zipfile( - fname, - geometry=None, - files=None, - cut_by_extent=True, - make_valid=False, - extent=None + fname, geometry=None, files=None, cut_by_extent=True, make_valid=False, extent=None ): """Read data from a zipfile that was downloaded using get_bgt(). + Parameters ---------- fname : string @@ -167,9 +167,10 @@ def read_brt_zipfile( geometry: string, optional When geometry is specified, this attribute is used as the geometry of the resulting GeoDataFrame. Some layers have multiple geometry-attributes. An - example is the layer 'wegdeel', where each feature (point) has a hartGeometry and a - hoofdGeometry. When geometry is None, the last tag in the xml ending - with the word "geometrie" of "Geometrie" is used as the geometry. The default is None. + example is the layer 'wegdeel', where each feature (point) has a hartGeometry + and a hoofdGeometry. When geometry is None, the last tag in the xml ending + with the word "geometrie" of "Geometrie" is used as the geometry. The default + is None. files : string of list of strings, optional The files to read from the zipfile. Read all files when files is None. The default is None. @@ -203,11 +204,11 @@ def read_brt_zipfile( polygon = extent_to_polygon(extent) for file in files: lay = file[8:-4] - if lay == 'relief': - logger.warning('Cannot read relief data, not implemented yet') + if lay == "relief": + logger.warning("Cannot read relief data, not implemented yet") continue - - logger.debug(f'reading brt file {file}') + + logger.debug(f"reading brt file {file}") gdf_dic[lay] = read_brt_gml(zf.open(file), lay=lay, geometry=geometry) if make_valid and isinstance(gdf_dic[lay], gpd.GeoDataFrame): @@ -220,9 +221,8 @@ def read_brt_zipfile( return gdf_dic - -def read_brt_gml(fname, lay='waterdeel', geometry=None, crs="epsg:28992"): - """read an xml file with features from the BRT +def read_brt_gml(fname, lay="waterdeel", geometry=None, crs="epsg:28992"): + """Read an xml file with features from the BRT. Parameters ---------- @@ -233,9 +233,10 @@ def read_brt_gml(fname, lay='waterdeel', geometry=None, crs="epsg:28992"): geometry: string, optional When geometry is specified, this attribute is used as the geometry of the resulting GeoDataFrame. Some layers have multiple geometry-attributes. An - example is the layer 'wegdeel', where each feature (point) has a hartGeometry and a - hoofdGeometry. When geometry is None, the last tag in the xml ending - with the word "geometrie" of "Geometrie" is used as the geometry. The default is None. + example is the layer 'wegdeel', where each feature (point) has a hartGeometry + and a hoofdGeometry. When geometry is None, the last tag in the xml ending + with the word "geometrie" of "Geometrie" is used as the geometry. The default + is None. crs : str, optional coordinate reference system, by default "epsg:28992" @@ -244,18 +245,21 @@ def read_brt_gml(fname, lay='waterdeel', geometry=None, crs="epsg:28992"): GeoDataFrame or None with BRT feature data """ - tree = ElementTree.parse(fname) - data = [_read_single_brt_feature(com, lay=lay, geometry=geometry) for com in tree.findall("top10nl:FeatureMember",NS)] - + data = [ + _read_single_brt_feature(com, lay=lay, geometry=geometry) + for com in tree.findall("top10nl:FeatureMember", NS) + ] + if len(data) > 0: - return gpd.GeoDataFrame(data, geometry='geometry', crs=crs) + return gpd.GeoDataFrame(data, geometry="geometry", crs=crs) else: return None -def _read_single_brt_feature(com, lay='waterdeel', geometry=None): - """read a single feature from an xml with multiple features + +def _read_single_brt_feature(com, lay="waterdeel", geometry=None): + """Read a single feature from an xml with multiple features. Parameters ---------- @@ -266,10 +270,11 @@ def _read_single_brt_feature(com, lay='waterdeel', geometry=None): geometry: string, optional When geometry is specified, this attribute is used as the geometry of the resulting GeoDataFrame. Some layers have multiple geometry-attributes. An - example is the layer 'wegdeel', where each feature (point) has a hartGeometry and a - hoofdGeometry. When geometry is None, the last tag in the xml ending - with the word "geometrie" of "Geometrie" is used as the geometry. The default is None. - + example is the layer 'wegdeel', where each feature (point) has a hartGeometry + and a hoofdGeometry. When geometry is None, the last tag in the xml ending + with the word "geometrie" of "Geometrie" is used as the geometry. The default + is None. + Returns ------- @@ -282,33 +287,34 @@ def _read_single_brt_feature(com, lay='waterdeel', geometry=None): if the feature is not a 'lijn' or 'vlak' geometry """ if geometry is None: - geometry='geometrie' + geometry = "geometrie" else: - geometry = geometry.lower() # make sure geometry is lower case - + geometry = geometry.lower() # make sure geometry is lower case + assert len(com) == 1 bp = com[0] d = {} for key, name in bp.attrib.items(): - d[key.rpartition('}')[-1]] = name + d[key.rpartition("}")[-1]] = name for child in bp: - tag = child.tag.rpartition('}')[-1] - if tag.lower().endswith('geometrie'): + tag = child.tag.rpartition("}")[-1] + if tag.lower().endswith("geometrie"): geom = child[0] - d['geometry'] = _read_geometry(geom) - elif tag == 'geometrieVlak' and lay=='terrein': - d['geometry'] = _read_geometry(child) - elif tag == 'identificatie': - loc_id = child.find("brt:NEN3610ID",NS).find("brt:lokaalID",NS) - d['lokaalID'] = loc_id.text + d["geometry"] = _read_geometry(geom) + elif tag == "geometrieVlak" and lay == "terrein": + d["geometry"] = _read_geometry(child) + elif tag == "identificatie": + loc_id = child.find("brt:NEN3610ID", NS).find("brt:lokaalID", NS) + d["lokaalID"] = loc_id.text else: d[tag] = child.text return d + def _read_geometry(geom): - """read geometry from a geometry xml tag + """Read geometry from a geometry xml tag. Parameters ---------- @@ -319,35 +325,35 @@ def _read_geometry(geom): ------- shapely.Geometry or None """ - assert len(geom) == 1 feature_geom = geom[0] - - geom_tag = feature_geom.tag.rpartition('}')[-1] - if geom_tag =='lijnGeometrie': + + geom_tag = feature_geom.tag.rpartition("}")[-1] + if geom_tag == "lijnGeometrie": return LineString(_read_linestring(feature_geom[0])) - elif geom_tag == 'puntGeometrie': + elif geom_tag == "puntGeometrie": return Point(_read_point(feature_geom[0])) - elif geom_tag =='vlakGeometrie': + elif geom_tag == "vlakGeometrie": return Polygon(*_read_polygon(feature_geom[0])) - elif geom_tag =='hartGeometrie': - return Polygon(*_read_polygon(feature_geom[0])) - elif geom_tag =='Polygon': + elif geom_tag == "hartGeometrie": + return Polygon(*_read_polygon(feature_geom[0])) + elif geom_tag == "Polygon": return Polygon(*_read_polygon(feature_geom)) - elif geom_tag == 'multivlakGeometrie': - ms = feature_geom.find("gml:MultiSurface",NS) + elif geom_tag == "multivlakGeometrie": + ms = feature_geom.find("gml:MultiSurface", NS) polygons = [] for sm in ms: assert len(sm) == 1 - polygon = sm.find("gml:Polygon",NS) + polygon = sm.find("gml:Polygon", NS) polygons.append(_read_polygon(polygon)) return MultiPolygon(polygons) else: - logger.warning(f'cannot read geometry type {geom_tag}, skipping these features') + logger.warning(f"cannot read geometry type {geom_tag}, skipping these features") return None + def _get_xy(text): - """get x and y coordinates from a tag text + """Get x and y coordinates from a tag text. Parameters ---------- @@ -363,8 +369,9 @@ def _get_xy(text): xy = np.array(xy).reshape(int(len(xy) / 2), 2) return xy + def _get_ring_xy(exterior): - """get x and y coordinates for a LinearRing + """Get x and y coordinates for a LinearRing. Parameters ---------- @@ -382,15 +389,16 @@ def _get_ring_xy(exterior): for unknown exterior types """ assert len(exterior) == 1 - if exterior[0].tag.rpartition('}')[-1] == "LinearRing": - lr = exterior.find("gml:LinearRing",NS) - xy = _get_xy(lr.find("gml:posList",NS).text) + if exterior[0].tag.rpartition("}")[-1] == "LinearRing": + lr = exterior.find("gml:LinearRing", NS) + xy = _get_xy(lr.find("gml:posList", NS).text) else: raise Exception(f"Unknown exterior type: {exterior[0].tag}") return xy + def _read_polygon(polygon): - """read polygon geometry from an xml tag + """Read polygon geometry from an xml tag. Parameters ---------- @@ -402,15 +410,16 @@ def _read_polygon(polygon): shell, holes polygon definition """ - exterior = polygon.find("gml:exterior",NS) + exterior = polygon.find("gml:exterior", NS) shell = _get_ring_xy(exterior) holes = [] - for interior in polygon.findall("gml:interior",NS): + for interior in polygon.findall("gml:interior", NS): holes.append(_get_ring_xy(interior)) return shell, holes + def _read_linestring(linestring): - """read linestring geometry from an xml tag + """Read linestring geometry from an xml tag. Parameters ---------- @@ -422,10 +431,11 @@ def _read_linestring(linestring): np.ndarray x and y coordinates """ - return _get_xy(linestring.find("gml:posList",NS).text) + return _get_xy(linestring.find("gml:posList", NS).text) + def _read_point(point): - """read point geometry from an xml tag + """Read point geometry from an xml tag. Parameters ---------- @@ -437,13 +447,15 @@ def _read_point(point): list x and y coordinates """ - xy = [float(x) for x in point.find("gml:pos",NS).text.split()] + xy = [float(x) for x in point.find("gml:pos", NS).text.split()] return xy -def get_brt_ogc(extent, layer="waterdeel_lijn", crs=28992, limit=1000, - apif="json", timeout=1200): - """ - Get geometries within an extent from the Basis Registratie Topografie (BRT). + +def get_brt_ogc( + extent, layer="waterdeel_lijn", crs=28992, limit=1000, apif="json", timeout=1200 +): + """Get geometries within an extent from the Basis Registratie Topografie (BRT). + Some useful links: https://geoforum.nl/t/pdok-lanceert-de-brt-top10nl-in-ogc-api-s-als-demo/9821/5 https://api.pdok.nl/brt/top10nl/ogc/v1/api @@ -457,7 +469,9 @@ def get_brt_ogc(extent, layer="waterdeel_lijn", crs=28992, limit=1000, crs : int, optional The coordinate reference system. The default is 28992. limit : int, optional - The maximum number of features that is requested. The default is 1000. This api does not seem to work properly when there are more than 1000 features in your extent. + The maximum number of features that is requested. The default is 1000. This api + does not seem to work properly when there are more than 1000 features in your + extent. apif : str, optional The output format of the api. The default is 'json'. timeout: int optional @@ -473,7 +487,14 @@ def get_brt_ogc(extent, layer="waterdeel_lijn", crs=28992, limit=1000, if isinstance(layer, (list, tuple, np.ndarray)): # recursively call this function for all layers gdfs = [ - get_brt_ogc(extent=extent, layer=lay, crs=crs, limit=limit, apif=apif, timeout=timeout) + get_brt_ogc( + extent=extent, + layer=lay, + crs=crs, + limit=limit, + apif=apif, + timeout=timeout, + ) for lay in layer ] return pd.concat(gdfs) @@ -501,7 +522,10 @@ def get_brt_ogc(extent, layer="waterdeel_lijn", crs=28992, limit=1000, gdf = gpd.GeoDataFrame.from_features(response.json()["features"]) if gdf.shape[0] == limit: - msg = f'the number of features in your extent is probably higher than {limit}, consider increasing the "limit" argument in "get_brt"' + msg = ( + f"the number of features in your extent is probably higher than {limit}," + ' consider increasing the "limit" argument in "get_brt"' + ) logger.error(msg) - return gdf \ No newline at end of file + return gdf From b5e360600ec88465f4f07ecba04797a97ae336a7 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 17 Feb 2025 14:54:03 +0100 Subject: [PATCH 13/17] =?UTF-8?q?fix=20comments=20Dav=C3=ADd?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- nlmod/read/brt.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index cddfc8a9..7e45c13e 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -158,12 +158,12 @@ def get_brt_layers(timeout=1200): def read_brt_zipfile( fname, geometry=None, files=None, cut_by_extent=True, make_valid=False, extent=None ): - """Read data from a zipfile that was downloaded using get_bgt(). + """Read data from a zipfile that was downloaded using get_brt(). Parameters ---------- fname : string - The filename of the zip-file containing the BGT-data. + The filename of the zip-file containing the BRT-data. geometry: string, optional When geometry is specified, this attribute is used as the geometry of the resulting GeoDataFrame. Some layers have multiple geometry-attributes. An From 7b7e4f0b728fda5a08b517bc97ed20833f861902 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 17 Feb 2025 16:36:21 +0100 Subject: [PATCH 14/17] comments Dav'id --- nlmod/read/brt.py | 14 ++++++++++---- tests/test_030_brt.py | 1 - 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 7e45c13e..28ae7f0f 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -2,6 +2,7 @@ import logging import time from io import BytesIO +from tqdm import tqdm from xml.etree import ElementTree from zipfile import ZipFile @@ -88,6 +89,9 @@ def get_brt( headers = {"content-type": "application/json"} + msg = f"Downloading BRT data layers {layer} within {body['geofilter']}" + logger.info(msg) + response = requests.post( url, headers=headers, data=json.dumps(body), timeout=timeout ) @@ -149,6 +153,7 @@ def get_brt_layers(timeout=1200): list A list with the layer names. """ + url = "https://api.pdok.nl/brt/top10nl/download/v1_0/dataset" resp = requests.get(url, timeout=timeout) data = resp.json() @@ -178,7 +183,7 @@ def read_brt_zipfile( Cut the geoemetries by the supplied extent. When no extent is supplied, cut_by_extent is set to False. The default is True. make_valid : bool, optional - Make geometries valid by appying a buffer of 0 m when True. THe defaults is + Make geometries valid by appying a buffer of 0 m when True. The default is False. extent : list or tuple of length 4 or shapely Polygon The extent (xmin, xmax, ymin, ymax) or polygon by which the geometries are @@ -208,13 +213,14 @@ def read_brt_zipfile( logger.warning("Cannot read relief data, not implemented yet") continue - logger.debug(f"reading brt file {file}") + logger.debug(f"reading brt layer {file}") gdf_dic[lay] = read_brt_gml(zf.open(file), lay=lay, geometry=geometry) if make_valid and isinstance(gdf_dic[lay], gpd.GeoDataFrame): gdf_dic[lay].geometry = gdf_dic[lay].geometry.buffer(0.0) if cut_by_extent and isinstance(gdf_dic[lay], gpd.GeoDataFrame): + logger.debug('only keep features within the extent') gdf_dic[lay].geometry = gdf_dic[lay].intersection(polygon) gdf_dic[lay] = gdf_dic[lay][~gdf_dic[lay].is_empty] @@ -246,10 +252,10 @@ def read_brt_gml(fname, lay="waterdeel", geometry=None, crs="epsg:28992"): with BRT feature data """ tree = ElementTree.parse(fname) - + ft_members = tree.findall("top10nl:FeatureMember", NS) data = [ _read_single_brt_feature(com, lay=lay, geometry=geometry) - for com in tree.findall("top10nl:FeatureMember", NS) + for com in tqdm(ft_members, desc=f"Downloading features of layer {lay}") ] if len(data) > 0: diff --git a/tests/test_030_brt.py b/tests/test_030_brt.py index 8f0916f4..e5f7ace5 100644 --- a/tests/test_030_brt.py +++ b/tests/test_030_brt.py @@ -3,7 +3,6 @@ def test_brt(): - nlmod.util.get_color_logger("DEBUG") extent = [119900, 120000, 440000, 440100] brt = nlmod.read.brt.get_brt(extent, layer='waterdeel') From c34faa6885e0ee155986ac86c58a9e7fefc47a55 Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 17 Feb 2025 16:41:35 +0100 Subject: [PATCH 15/17] add some logging for removed features --- nlmod/read/brt.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 28ae7f0f..6ea27941 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -220,9 +220,11 @@ def read_brt_zipfile( gdf_dic[lay].geometry = gdf_dic[lay].geometry.buffer(0.0) if cut_by_extent and isinstance(gdf_dic[lay], gpd.GeoDataFrame): - logger.debug('only keep features within the extent') + no_ft = gdf_dic[lay].shape[0] gdf_dic[lay].geometry = gdf_dic[lay].intersection(polygon) gdf_dic[lay] = gdf_dic[lay][~gdf_dic[lay].is_empty] + rm_ft = no_ft - gdf_dic[lay].shape[0] + logger.info(f'removed {rm_ft} features that are outside the extent') return gdf_dic From 375fc0ec52ed3afd0395bab53d4988c4e51b7a8d Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 17 Feb 2025 16:50:32 +0100 Subject: [PATCH 16/17] ruff --- nlmod/read/brt.py | 5 ++--- tests/test_030_brt.py | 9 +++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 6ea27941..30bd52a4 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -2,7 +2,6 @@ import logging import time from io import BytesIO -from tqdm import tqdm from xml.etree import ElementTree from zipfile import ZipFile @@ -11,6 +10,7 @@ import pandas as pd import requests from shapely.geometry import LineString, MultiPolygon, Point, Polygon +from tqdm import tqdm from ..util import extent_to_polygon @@ -153,7 +153,6 @@ def get_brt_layers(timeout=1200): list A list with the layer names. """ - url = "https://api.pdok.nl/brt/top10nl/download/v1_0/dataset" resp = requests.get(url, timeout=timeout) data = resp.json() @@ -224,7 +223,7 @@ def read_brt_zipfile( gdf_dic[lay].geometry = gdf_dic[lay].intersection(polygon) gdf_dic[lay] = gdf_dic[lay][~gdf_dic[lay].is_empty] rm_ft = no_ft - gdf_dic[lay].shape[0] - logger.info(f'removed {rm_ft} features that are outside the extent') + logger.info(f"removed {rm_ft} features that are outside the extent") return gdf_dic diff --git a/tests/test_030_brt.py b/tests/test_030_brt.py index e5f7ace5..0b1ee6d4 100644 --- a/tests/test_030_brt.py +++ b/tests/test_030_brt.py @@ -1,9 +1,10 @@ -import nlmod import geopandas as gpd -def test_brt(): +import nlmod + +def test_brt(): extent = [119900, 120000, 440000, 440100] - brt = nlmod.read.brt.get_brt(extent, layer='waterdeel') + brt = nlmod.read.brt.get_brt(extent, layer="waterdeel") - assert isinstance(brt, gpd.GeoDataFrame) \ No newline at end of file + assert isinstance(brt, gpd.GeoDataFrame) From 5b0f7c6ec213100e95e8fca0527478d30c195e3c Mon Sep 17 00:00:00 2001 From: OnnoEbbens Date: Mon, 17 Feb 2025 18:58:26 +0100 Subject: [PATCH 17/17] make exceptions more specific --- nlmod/read/brt.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/nlmod/read/brt.py b/nlmod/read/brt.py index 30bd52a4..be67e3f5 100644 --- a/nlmod/read/brt.py +++ b/nlmod/read/brt.py @@ -113,8 +113,7 @@ def get_brt( else: running = False else: - msg = f"Download of brt-data failed: {response.text}" - raise (Exception(msg)) + response.raise_for_status() href = response.json()["_links"]["download"]["href"] response = requests.get(f"{api_url}{href}", timeout=timeout) @@ -392,7 +391,7 @@ def _get_ring_xy(exterior): Raises ------ - Exception + NotImplementedError for unknown exterior types """ assert len(exterior) == 1 @@ -400,7 +399,7 @@ def _get_ring_xy(exterior): lr = exterior.find("gml:LinearRing", NS) xy = _get_xy(lr.find("gml:posList", NS).text) else: - raise Exception(f"Unknown exterior type: {exterior[0].tag}") + raise NotImplementedError(f"Unknown exterior type: {exterior[0].tag}") return xy