InseeFrLab · tgrandje · Apr 6, 2026 · Apr 7, 2026 · Apr 7, 2026
diff --git a/pynsee/melodi/__init__.py b/pynsee/melodi/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+
+from .catalog import get_melodi_catalog
+
+__all__ = ["get_melodi_catalog"]
diff --git a/pynsee/melodi/catalog.py b/pynsee/melodi/catalog.py
@@ -0,0 +1,179 @@
+# -*- coding: utf-8 -*-
+"""
+Download MELODI's catalog.
+
+Only one endpoint is covered by pynsee, as "catalog/{id}" and "catalog/ids"
+are covered using in "catalog/all" (which is slowest but cache handled by
+pynsee bypasses that).
+
+Thus, the only endpoint NOT yet covered is "/catalog/dcat".
+"""
+
+import pandas as pd
+
+from pynsee.utils.requests_session import PynseeAPISession
+from pynsee.utils.save_df import save_df
+
+
+@save_df(day_lapse_max=30)
+def get_melodi_catalog(language: str = "all") -> pd.DataFrame:
+    """
+    Retrieve MELODI's full catalog.
+
+    Parameters
+    ----------
+    language : str, optional
+        Filter metadata to select a desired language, if available. The default
+        is "all", which won't filter anything. Covered values are "fr", "en".
+
+    Returns
+    -------
+    dataset : pd.DataFrame
+
+    Ex:
+                                                   accessURL  byteSize format  \
+        0  https://api.insee.fr/melodi/file/DD_CNA_AGREGA...   1895856    CSV
+        1  https://api.insee.fr/melodi/file/DD_CNA_BRANCH...   3551601    CSV
+
+                               id               issued language mediaType  \
+        0  DD_CNA_AGREGATS_CSV_FR  2024-07-18T15:46:53       FR  text/csv
+        1  DD_CNA_BRANCHES_CSV_FR  2024-11-22T13:11:59       FR  text/csv
+
+                      modified    packageFormat  \
+        0  2025-08-29T13:38:30  application/zip
+        1  2025-05-27T17:45:13  application/zip
+
+                                                       title dataset_identifier  \
+        0  Produit Intérieur Brut (PIB) et grands agrégat...    DD_CNA_AGREGATS
+        1                Activité des branches de l'économie    DD_CNA_BRANCHES
+
+                                                 abstract_fr  \
+        0  Données annuelles de Produit Intérieur Brut (P...
+        1  Données annuelles sur l'activité des branches ...
+
+                                                 abstract_en accessRights_fr  \
+        0  Annual data on Gross Domestic Product (GDP) an...           Libre
+        1  Annual data on the activity of branches within...           Libre
+
+          accrualPeriodicity_fr accrualPeriodicity_en confidentialityStatus_fr  \
+        0                Annuel                Annual                    Libre
+        1                Annuel                Annual                    Libre
+
+          creator                                     description_fr  \
+        0   Insee  Le produit intérieur brut (PIB) est le princip...
+        1   Insee  Données annuelles sur l'activité des branches ...
+
+                                              description_en  \
+        0  Gross domestic product (GDP) is the main aggre...
+        1  Annual data on the activity of industries as r...
+
+                                             ordreComposants processStep_fr  \
+        0  ACCOUNTING_ENTRY, ACTIVITY, COUNTERPART_AREA, ...       inseeApi
+        1  REF_SECTOR, COUNTERPART_AREA, ACCOUNTING_ENTRY...       inseeApi
+
+          processStep_en                                       publisher_fr  \
+        0       inseeApi  Institut national de la statistique et des etu...
+        1       inseeApi  Institut national de la statistique et des etu...
+
+                                                publisher_en  \
+        0  National Institute of Statistics and Economic ...
+        1  National Institute of Statistics and Economic ...
+
+                                                scopeNote_fr  \
+        0
+        1  La version du dataset mise en ligne le 8 octob...
+
+                                                scopeNote_en      spatial_fr  \
+        0                                                     France entière
+        1  The version of the dataset published online on...  France entière
+
+          spatial_en          dsd                            subtitle_fr  \
+        0     France  DSD_NA_MAIN  Comptes nationaux annuels - Base 2020
+        1     France  DSD_NA_MAIN  Comptes nationaux annuels - Base 2020
+
+                             subtitle_en            endPeriod          startPeriod  \
+        0     Base 2020 - Annual Results  2024-01-01T00:00:00  1949-01-01T00:00:00
+        1  National accounts - Base 2020  2024-01-01T00:00:00  1949-01-01T00:00:00
+
+                                                    title_fr  \
+        0  Produit Intérieur Brut (PIB) et grands agrégat...
+        1                Activité des branches de l'économie
+
+                                                    title_en           type_fr  \
+        0  Gross domestic product (GDP) and main economic...  Données agrégées
+        1    Production and generation of income by industry  Données agrégées
+
+                                                         uri  \
+        0  http://id.insee.fr/catalogues/jeuDeDonnees/8b0...
+        1  http://id.insee.fr/catalogues/jeuDeDonnees/a85...
+
+                                           uuid accessRights_en type_en  \
+        0  2ccb4960-fb78-4ef9-93f6-2edf14cb53c8            None    None
+        1  2017dbc2-177f-29d7-4f74-749965615961            None    None
+
+          confidentialityStatus_en spatialTemporal
+        0                     None            None
+        1                     None            None
+
+    Examples
+    ----------
+    >>> get_melodi_catalog()
+    >>> get_melodi_catalog(language="fr")
+    """
+
+    url = "https://api.insee.fr/melodi/catalog/all"
+
+    with PynseeAPISession() as session:
+
+        r = session.request_insee(api_url=url, file_format="application/json")
+
+    list_data_dict = []
+    list_product_dict = []
+
+    for dset in r.json():
+        dico = {}
+        for metadata, meta_description in dset.items():
+            if isinstance(meta_description, list):
+                if all(isinstance(j, str) for j in meta_description):
+                    dico[metadata] = ", ".join(meta_description)
+                for d2 in meta_description:
+                    if isinstance(d2, dict):
+                        if all(j in d2.keys() for j in ["lang", "content"]):
+                            if language in {"all", d2["lang"]}:
+                                dico[metadata + "_" + d2["lang"]] = d2[
+                                    "content"
+                                ]
+            elif isinstance(meta_description, dict):
+                try:
+                    for d2 in meta_description["label"]:
+                        if language in {"all", d2["lang"]}:
+                            dico[metadata + "_" + d2["lang"]] = d2["content"]
+                except KeyError:
+                    dico.update(meta_description)
+
+            elif isinstance(meta_description, str):
+                dico[metadata] = meta_description
+
+        list_data_dict += [dico]
+
+        if "product" in dset.keys():
+            if isinstance(dset["product"], list):
+                for product in dset["product"]:
+                    if isinstance(product, dict):
+                        product["identifier"] = dset["identifier"]
+                        list_product_dict += [product]
+
+    meta = pd.DataFrame(list_data_dict)
+    products = pd.DataFrame(list_product_dict)
+    list_col_dropped = [
+        c
+        for c in products.columns
+        if (c in meta.columns) and (c != "identifier")
+    ]
+    meta2 = meta.drop(columns=list_col_dropped)
+
+    dataset = products.merge(meta2, on="identifier", how="left").rename(
+        columns={"identifier": "dataset_identifier"}
+    )
+
+    return dataset
diff --git a/pynsee/melodi/data.py b/pynsee/melodi/data.py
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+"""
+#TODO
+
+// Note: available functions in R package :
+    get_catalog
+    get_all_data
+    get_data
+    get_file
+    get_local_data
+    get_local_data_by_com
+    get_metadata
+    get_range
+    get_range_geo
+
+    https://github.com/InseeFrLab/melodi/tree/main/R
+
+"""
+
+import logging
+from urllib.parse import urlencode
+
+import pandas as pd
+import requests
+from tqdm import tqdm
+
+from pynsee.utils.requests_session import PynseeAPISession
+from pynsee.utils.save_df import save_df
+
+SIZE = 10_000
+
+logger = logging.getLogger(__name__)
+
+
+def _parse_metadata(
+    response: requests.Response, language: str = "all"
+) -> dict:
+
+    data = response.json()
+
+    metadata = {}
+    for lang, title in data["title"].items():
+        if language in {"all", lang}:
+            metadata[f"title_{lang}"] = title
+
+    metadata["identifier"] = data["identifier"]
+
+    metadata["publisher_id"] = data["publisher"]["id"]
+    for d in data["publisher"]["label"]:
+        if isinstance(d, dict) and all(
+            j in d.keys() for j in ["lang", "content"]
+        ):
+            if language in {"all", d["lang"]}:
+                metadata["publisher_" + d["lang"]] = d["content"]
+
+    return metadata
+
+
+def _parse_dataset_observations(
+    response: requests.Response, language: str = "all"
+):
+
+    data = response.json()
+
+    obs = pd.DataFrame(data["observations"])
+    for f in "dimensions", "attributes":
+        if f in obs.columns:
+            obs = obs.drop(f, axis=1).join(
+                pd.DataFrame(obs[f].values.tolist())
+            )
+
+    if "measures" in obs.columns:
+        measures = obs.measures.str["OBS_VALUE_NIVEAU"].str["value"]
+        obs["OBS_VALUE_NIVEAU"] = measures
+        obs = obs.drop("measures", axis=1)
+
+    return obs
+
+
+@save_df(day_lapse_max=90)
+def get_melodi_dataset(
+    id_dataset, language="all", page=1, **filters
+) -> pd.DataFrame:
+
+    url = f"https://api.insee.fr/melodi/data/{id_dataset}"
+
+    params = filters.copy()
+    params["totalCount"] = True
+    # params["range"] = True
+
+    params["maxResult"] = 0
+    params["page"] = page
+
+    if params:
+        url_api_count = f"{url}?{urlencode(params)}"
+        del params["maxResult"]
+        url_api = f"{url}?{urlencode(params)}"
+
+    observations = []
+
+    with PynseeAPISession() as session:
+
+        # check iterations count
+        r = session.request_insee(
+            url_api_count, file_format="application/json"
+        )
+
+        # parse metadata only once to reduce RAM consumption
+        metadata = _parse_metadata(r, language=language)
+
+        count = r.json()["paging"]["count"]
+        count_pages = count // SIZE + (0 if count % SIZE == 0 else 1)
+
+        # download
+        data = {"paging": {"next": url_api}}
+        for x in tqdm(range(count_pages), desc="Downloading"):
+            url = data["paging"]["next"]
+            r = session.request_insee(url, file_format="application/json")
+            try:
+                data = r.json()
+            except requests.exceptions.JSONDecodeError as e:
+                raise requests.exceptions.RequestException(
+                    f"an error occured on {url}"
+                ) from e
+
+            observations.append(_parse_dataset_observations(r))
+
+        if not data["paging"]["isLast"]:
+            raise ValueError(
+                "An unexpected error occured, please get in touch"
+            )
+
+    observations = pd.concat(observations).assign(**metadata)
+    if len(observations) != count:
+        raise ValueError("An unexpected error occured, please get in touch")
+
+    return observations
+
+
+if __name__ == "__main__":
+
+    # test = get_melodi_dataset("DS_TICM_PRATIQUES")
+    test = get_melodi_dataset("DS_RP_POPULATION_PRINC")
+    print(test)
diff --git a/pynsee/utils/requests_session.py b/pynsee/utils/requests_session.py
@@ -18,7 +18,6 @@
 from pynsee.utils._create_insee_folder import _create_insee_folder
 from pynsee.constants import SIRENE_KEY, HTTPS_PROXY_KEY, HTTP_PROXY_KEY
 
-
 logger = logging.getLogger(__name__)
 
 
@@ -143,7 +142,6 @@ def __init__(
         self.headers.update(useragent)
 
         self.sirene_key = config[SIRENE_KEY]
-        self.headers["X-INSEE-Api-Key-Integration"] = self.sirene_key
 
     def _mount_adapters(self):
         """
@@ -295,6 +293,14 @@ def request(
                 "is not yet covered by pynsee."
             )
 
+        if "api-sirene" in url:
+            self.headers["X-INSEE-Api-Key-Integration"] = self.sirene_key
+        else:
+            # for now, melodi returns 401 if setting an API key based on a
+            # SIRENE subscription
+            if "X-INSEE-Api-Key-Integration" in self.headers:
+                del self.headers["X-INSEE-Api-Key-Integration"]
+
         logger.info(url)
         with warnings.catch_warnings():
             warnings.simplefilter(