Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
164 changes: 0 additions & 164 deletions pynsee/geodata/_add_insee_dep.py

This file was deleted.

89 changes: 86 additions & 3 deletions pynsee/geodata/translate_and_zoom.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import math
import re
from typing import Optional

import pandas as pd
Expand All @@ -10,12 +11,55 @@
from ._make_offshore_points import _make_offshore_points
from ._rescale_geom import _rescale_geom
from ._get_center import _get_center
from ._add_insee_dep import _add_insee_dep

from ._get_geodata_with_backup import _get_geodata_with_backup
from pynsee.utils.save_df import save_df

logger = logging.getLogger(__name__)


@save_df(cls=GeoDataFrame, day_lapse_max=30)
def _deps_with_valid_coverage() -> GeoDataFrame:
"""
Inner function used to create a geodataframe of french departments, safe to
use for a spatial join.

First (if overlaps detected) a valid coverage is enforced (meaning polygons
do not overlap and are sharing edges) with a simplification of 1 meter.
After that, a negative 10 meters buffer is applied to prevent duplicates
when running a spatial join with intersects predicate.
This function uses a 30 day cache storage.

Returns
-------
GeoDataFrame
Departments' geodataset
Only two columns : code_insee_du_departement, insee_dep_geometry

"""
dataset_id = "ADMINEXPRESS-COG-CARTO.LATEST:departement"
dep = _get_geodata_with_backup(dataset_id).to_crs("EPSG:3857")
dep = dep[["code_insee", "geometry"]].rename(
columns={
"code_insee": "code_insee_du_departement",
"geometry": "insee_dep_geometry",
}
)
dep = dep.set_geometry("insee_dep_geometry")

x = dep.sjoin(dep, how="left", predicate="overlaps").query(
"~code_insee_du_departement_right.isnull()"
)
if not x.empty:
# Geometries overlap:
# Force coverage validity (non-overlapping, edge-matched polygons)
# (This shouldn't be necessary with modern ADMINEXPRESS datasets and
# is here as a backup safe)
dep["insee_dep_geometry"] = dep.simplify_coverage(1)
dep["insee_dep_geometry"] = dep["insee_dep_geometry"].buffer(-1)

return dep


def transform_overseas(
gdf: GeoDataFrame,
departement: tuple[str, ...] = ("971", "972", "974", "973", "976", "NR"),
Expand Down Expand Up @@ -74,7 +118,46 @@ def transform_overseas(
gdf["code_insee_du_departement"] = gdf["code_insee"]
gdf["insee_dep_geometry"] = gdf["geometry"]
else:
gdf = _add_insee_dep(gdf.copy()).reset_index(drop=True)

# retrieve safe deps geometries
dep_cov = _deps_with_valid_coverage()

# detect INSEE department's codes
# (available in some of IGN's geodatasets, but with different patterns)
pattern = "codes?.*?insee.*?departement"
dep = gdf.columns[gdf.columns.str.match(pattern, flags=re.IGNORECASE)]
if not dep.empty:
dep = dep[0]
deps = gdf[dep].str.extractall("([0-9AB]{2,3})")
deps.index = deps.index.droplevel(-1)
deps = deps.loc[~deps.index.duplicated(keep="first")]
gdf["code_insee_du_departement"] = deps[0]

else:

# retrieve department's codes using a spatial join. The negative
# buffer on deps should be safe to be used without any duplication
# of gdf, except when there is a valid overlapping (ie regions
# covering multiple deps, interdep epcis...).
# In case of duplication (for instance, starting from REGION) keep
# only first dep geometry
gdf = gdf.sjoin(dep_cov, how="left")
gdf = gdf.loc[~gdf.index.duplicated(keep="first")]
gdf = gdf.drop("index_right", axis=1)

gdf["code_insee_du_departement"] = gdf[
"code_insee_du_departement"
].fillna("NR")

# Retrieve simplified geometries for deps. Note that it used a negative
# buffer (10 meters) which should not alter geographic transformations
# given it's range
gdf = gdf.merge(dep_cov, on="code_insee_du_departement", how="left")

# where dep geom is still missing, use initial geometry
gdf["insee_dep_geometry"] = gdf["insee_dep_geometry"].combine_first(
gdf.geometry
)

offshore_points = _make_offshore_points(
center=Point(center),
Expand Down
8 changes: 8 additions & 0 deletions pynsee/localdata/get_population.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# -*- coding: utf-8 -*-

from functools import lru_cache
import warnings

from pynsee.geodata.get_geodata import get_geodata

Expand All @@ -14,6 +15,13 @@ def get_population():
>>> pop = get_population()
"""

warnings.warn(
"get_population is deprecated. Please use "
"`from pynsee import get_geodata;get_geodata('ADMINEXPRESS-COG-CARTO.LATEST:commune')` "
"instead",
category=FutureWarning,
)

df = get_geodata("ADMINEXPRESS-COG-CARTO.LATEST:commune")

return df
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ keywords = [
"INSEE", "IGN", "statistics", "demography", "geospatial", "France", "GIS",
"statistique", "démographie", "géospatial", "SIG"
]
requires-python = ">=3.9"
requires-python = ">=3.10"
dynamic = ["dependencies", "optional-dependencies", "version"]
classifiers = [
"Development Status :: 5 - Production/Stable",
Expand Down
4 changes: 2 additions & 2 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
pandas >= 2.2.0, < 4.0
geopandas >= 1.0, < 2.0
geopandas >= 1.1.0, < 2.0
shapely >= 2.1
pyarrow >= 17.0
tqdm >= 4.56
platformdirs >= 4
unidecode >= 1.3
urllib3 >= 2.2
shapely >= 2
requests >= 2.32
requests-ratelimiter >= 0.7
openpyxl
6 changes: 0 additions & 6 deletions tests/localdata/test_pynsee_localdata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
from pynsee.localdata.get_local_data import get_local_data
from pynsee.localdata.get_nivgeo_list import get_nivgeo_list
from pynsee.localdata.get_local_metadata import get_local_metadata
from pynsee.localdata.get_population import get_population
from pynsee.localdata.get_old_city import get_old_city
from pynsee.localdata.get_new_city import get_new_city
from pynsee.localdata.get_area_projection import get_area_projection
Expand All @@ -29,11 +28,6 @@

class TestFunction(TestCase):

def test_get_population(self):
df = get_population()
test = isinstance(df, pd.DataFrame)
self.assertTrue(test)

def test_get_insee_one_area_1(self):
def get_insee_one_area_test(area_type="derf", codearea="c"):
_get_insee_one_area(area_type=area_type, codearea=codearea)
Expand Down
Loading