graphnet-team
diff --git a/‎data/tests/km3net/mcv6.example_offline_reco_format.root‎
4.93 MB b/‎data/tests/km3net/mcv6.example_offline_reco_format.root‎
4.93 MB
diff --git a/‎docs/source/installation/install.rst‎
Lines changed: 47 additions & 1 deletion b/‎docs/source/installation/install.rst‎
Lines changed: 47 additions & 1 deletion
diff --git a/‎examples/07_km3net/01_convert_km3net.py‎
Lines changed: 104 additions & 0 deletions b/‎examples/07_km3net/01_convert_km3net.py‎
Lines changed: 104 additions & 0 deletions
diff --git a/‎examples/07_km3net/README.md‎
Lines changed: 87 additions & 0 deletions b/‎examples/07_km3net/README.md‎
Lines changed: 87 additions & 0 deletions
diff --git a/‎examples/README.md‎
Lines changed: 3 additions & 1 deletion b/‎examples/README.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎src/graphnet/data/constants.py‎
Lines changed: 58 additions & 2 deletions b/‎src/graphnet/data/constants.py‎
Lines changed: 58 additions & 2 deletions
diff --git a/‎src/graphnet/data/dataconverter.py‎
Lines changed: 2 additions & 0 deletions b/‎src/graphnet/data/dataconverter.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/graphnet/data/extractors/km3net/__init__.py‎
Lines changed: 13 additions & 0 deletions b/‎src/graphnet/data/extractors/km3net/__init__.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎src/graphnet/data/extractors/km3net/km3netextractor.py‎
Lines changed: 30 additions & 0 deletions b/‎src/graphnet/data/extractors/km3net/km3netextractor.py‎
Lines changed: 30 additions & 0 deletions
@@ -55,6 +55,52 @@ To achieve this, we recommend installing |graphnet|\ GraphNeT into a CVMFS with
 
 Once installed, |graphnet|\ GraphNeT is available whenever you open the CVMFS locally.
 
+Installation with km3io (KM3NeT)
+-----------------------------------------------
+
+This installation is only necessary if you want to process KM3NeT/ARCA or KM3NeT/ORCA files. Processing means converting them from a `.root` offline format into a suitable format for training using |graphnet|. If you already have your KM3NeT data in `SQLite` or `parquet` format and only want to train a model or perform inference on this database, this specific installation is not needed.
+
+Note that this installation will add `km3io` ensuring it is built with a compatible versions. The steps below are provided for a conda environment, with an enviroment created in the same way it is done above in this page, but feel free to choose a different enviroment setup.
+
+As mentioned, it is highly reommended to create a conda enviroment where your installation is done to do not mess up any dependecy. It can be done with the following commands:
+
+.. code-block:: bash
+
+   # Create an environment with Python 3.10
+   conda create -p <full-path-to-env> --no-default-packages python=3.10 -y
+   # Activate the environment and move to the graphnet repository you just cloned. If using conda:
+   conda activate <full-path-to-env>
+
+The isntallation of GraphNeT is then done by:
+
+.. code-block:: bash
+
+   git clone https://github.com/graphnet-team/graphnet.git
+   cd graphnet
+
+Choose the appropriate requirements file based on your system. Here there is just an example of installation with PyTorch-2.5.1 but check the matrix above for a full idea of all the versions can be installed.
+
+For CPU-only enviroments:
+
+.. code-block:: bash
+
+   pip3 install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu
+   pip3 install -e .[torch-25] -f https://data.pyg.org/whl/torch-2.5.1+cpu.html
+
+For GPU enviroments with, for instance, CUDA 11.8 drivers:
+
+.. code-block:: bash
+
+   pip3 install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cu118
+   pip3 install -e .[torch-25] -f https://data.pyg.org/whl/torch-2.5.1+cu118.html
+
+Downgrade setuptools for compatibility between km3io and GraphNeT.
+
+.. code-block:: bash
+
+   pip3 install --force-reinstall setuptools==70.3.0
+   pip3 install km3io==1.2.0
+   
+
 .. note::
    We recommend installing |graphnet|\ GraphNeT without GPU in clean metaprojects.
-
 
@@ -0,0 +1,104 @@
+"""Code to run the extraction of km3net data."""
+
+import os
+import warnings
+
+from graphnet.constants import EXAMPLE_OUTPUT_DIR, TEST_DATA_DIR
+from graphnet.data.readers import KM3NeTReader
+from graphnet.data.writers import ParquetWriter, SQLiteWriter
+from graphnet.data import DataConverter
+from graphnet.data.extractors.km3net import (
+    KM3NeTTruthExtractor,
+    KM3NeTFullPulseExtractor,
+    KM3NeTTriggPulseExtractor,
+    KM3NeTHNLTruthExtractor,
+    KM3NeTRegularRecoExtractor,
+    KM3NeTHNLRecoExtractor,
+)
+
+from graphnet.utilities.argparse import ArgumentParser
+
+
+def main(backend: str, triggered: str, HNL: str, OUTPUT_DIR: str) -> None:
+    """Convert ROOT files from KM3NeT to `backend` format."""
+    warnings.simplefilter(action="ignore", category=FutureWarning)
+
+    input_dir = [f"{TEST_DATA_DIR}/km3net"]
+    if OUTPUT_DIR != "None":
+        outdir = f"{OUTPUT_DIR}/{backend}"
+    else:
+        outdir = f"{EXAMPLE_OUTPUT_DIR}/{backend}"
+    os.makedirs(outdir, exist_ok=True)
+    print(60 * "*")
+    print(f"Saving to {outdir}")
+    print(60 * "*")
+    if backend == "parquet":
+        save_method = ParquetWriter(truth_table="truth")
+    elif backend == "sqlite":
+        save_method = SQLiteWriter()  # type: ignore
+    else:
+        raise ValueError("Invalid backend choice")
+
+    if HNL == "km3net-vars":
+        truth_extractor = KM3NeTTruthExtractor(name="truth")
+        reco_extractor = KM3NeTRegularRecoExtractor(name="reco")
+    elif HNL == "hnl-vars":
+        truth_extractor = KM3NeTHNLTruthExtractor(name="truth")  # type: ignore
+        reco_extractor = KM3NeTHNLRecoExtractor(name="reco")  # type: ignore
+    else:
+        raise ValueError("Invalid HNL choice")
+
+    if triggered == "Triggered":
+        pulse_extractor = KM3NeTTriggPulseExtractor(name="pulse_map")
+    elif triggered == "Snapshot":
+        pulse_extractor = KM3NeTFullPulseExtractor(
+            name="pulse_map"
+        )  # type: ignore
+    else:
+        raise ValueError("Invalid triggered choice")
+
+    converter = DataConverter(
+        file_reader=KM3NeTReader(),
+        save_method=save_method,
+        extractors=[truth_extractor, pulse_extractor, reco_extractor],
+        outdir=outdir,
+        num_workers=1,
+    )
+
+    converter(input_dir=input_dir)
+
+
+if __name__ == "__main__":
+
+    # Parse command-line arguments
+    parser = ArgumentParser(
+        description="""
+            Convert root files from KM3NeT to an sqlite or parquet.
+            """
+    )
+
+    parser.add_argument(
+        "backend",
+        choices=["sqlite", "parquet"],
+        help="Choose the backend format",
+    )
+    parser.add_argument(
+        "triggered",
+        choices=["Triggered", "Snapshot"],
+        help="Choose between triggered or snapshot pulse maps",
+    )
+    parser.add_argument(
+        "HNL",
+        choices=["km3net-vars", "hnl-vars"],
+        help="Km3net truth or adding Heavy Neutral Lepton info",
+    )
+    parser.add_argument(
+        "OUTPUT_DIR",
+        default="None",
+        help="Output directory (optional)",
+    )
+
+    args, unknown = parser.parse_known_args()
+
+    # Run example script
+    main(args.backend, args.triggered, args.HNL, args.OUTPUT_DIR)
@@ -0,0 +1,87 @@
+# KM3NeT Data Conversion
+
+This folder contains an example script for extracting information from ROOT files of KM3NeT offline data and converting it into intermediate file formats suitable for deep learning training or inference using GraphNeT. Supported output formats include SQLite and Parquet. After this conversion, training and inference on KM3NeT data can be performed efficiently.
+
+## Example Usage
+
+The following example demonstrates how to perform the conversion using a sample KM3NeT-like file containing a few events with random information:
+
+```bash
+python 01_convert_km3net.py <output_format> <pulse_option> <variable_set> [OUTPUT_DIR]
+```
+
+### Arguments:
+- `<output_format>`: Specifies the output format, either `sqlite` or `parquet`.
+- `<pulse_option>`: Determines whether to extract all pulses (`Snapshot`) or only the triggered ones (`Triggered`).
+- `<variable_set>`: Defines the variables to include, such as `km3net-vars` for standard neutrino-related data or `hnl-vars` for additional quantities related to Heavy Neutral Lepton searches.
+- `[OUTPUT_DIR]` (optional): Specifies the output directory. If not provided, the output will be stored in GraphNeT's default example output directory, which can be found using:
+
+```python
+from graphnet.constants import EXAMPLE_OUTPUT_DIR
+print(EXAMPLE_OUTPUT_DIR)
+```
+
+The path to the ROOT file converted can be found by running:
+```python
+from graphnet.constants import TEST_DATA_DIR
+print(TEST_DATA_DIR)
+```
+
+### Output Structure
+
+The generated SQLite or Parquet file contains:
+- A **pulse table**, storing hit-by-hit information for each event, with a unique identifier linking pulses to their respective events.
+- A **true Monte Carlo event table**, including ground-truth event information. If available and selected, it may also contain reconstructed information from likelihood-based methods.
+- Unavailable variables (e.g., true Monte Carlo information in real data files) will be filled with unphysical placeholder values.
+
+### Reading the Output Files
+
+The output files can be read using Python.
+
+- **If you chose to create a Parquet output**:
+  You will find several `.parquet` files in the output folder, each corresponding to a different extracted table (e.g., a table with the true event information, a table with pulse information, etc.).
+  To read one of these tables:
+
+  ```python
+  import pandas as pd
+
+  df = pd.read_parquet("FILE_NAME.parquet")
+  print(df.head())
+
+- **If you chose to create an SQLite output**:
+  In this case, you will find a single `.db` file per converted input, which contains all the tables inside.
+  To list the table names and preview their contents:
+
+  ```python
+  import pandas as pd
+  import sqlite3
+
+  # Connect to the SQLite database
+  conn = sqlite3.connect("FILE_NAME.db")
+  cursor = conn.cursor()
+
+  # Get the table names
+  cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
+  tables = [t[0] for t in cursor.fetchall()]
+  print("The following tables are stored inside the file:", tables)
+
+  # Preview the first 5 rows of each table
+  for t in tables:
+      print(f"\nTable: {t}")
+      df = pd.read_sql_query(f"SELECT * FROM {t[0]} LIMIT 5;", conn)
+      print(df)
+
+
+## Help
+
+For more information on available options, use the help flag:
+
+```bash
+python 01_convert_km3net.py -h
+```
+
+or
+
+```bash
+python 01_convert_km3net.py --help
+```
@@ -7,7 +7,9 @@ Examples are grouped into five numbered subfolders, roughly in order of how you
 2. **Data.** Reading in data in intermediate formats, plotting feature distributions, and converting data between intermediate file formats. These examples are entirely self-contained and can be run by anyone.
 3. **Weights.** Fitting per-event weights.
 4. **Training.** Training GNN models on various physics tasks.
-5**LiquidO.** Converting h5 files from the LiquidO experiment into intermediate formats suitable for deep learning.
+5. **LiquidO.** Converting h5 files from the LiquidO experiment into intermediate formats suitable for deep learning.
+6. **Prometheus.** Converting parquet files from the Prometheus simulation software into intermediate formats suitable for deep learning.
+7. **KM3NeT.** Converting root files from the KM3NeT experiment into intermediate formats suitable for deep learning.
 
 Each subfolder contains similarly numbered example scripts.
 Each example script comes with a simple command-line interface and help functionality, e.g.
 
@@ -52,6 +52,18 @@ class FEATURES:
     ]
     KAGGLE = ["x", "y", "z", "time", "charge", "auxiliary"]
     LIQUIDO = ["sipm_x", "sipm_y", "sipm_z", "t"]
+    KM3NET = [
+        "t",
+        "pos_x",
+        "pos_y",
+        "pos_z",
+        "dir_x",
+        "dir_y",
+        "dir_z",
+        "tot",
+        "trig",
+    ]
+    KM3NET_HNL = KM3NET
 
 
 class TRUTH:
@@ -71,8 +83,6 @@ class TRUTH:
         "interaction_type",
         "interaction_time",  # Added for vertex reconstruction
         "inelasticity",
-        "visible_inelasticity",
-        "visible_energy",
         "stopped_muon",
     ]
     DEEPCORE = ICECUBE86
@@ -167,3 +177,49 @@ class TRUTH:
         "energy",
         "pid",
     ]
+    KM3NET = [
+        "true_pdgid",
+        "true_E",
+        "true_pos_x",
+        "true_pos_y",
+        "true_pos_z",
+        "true_dir_x",
+        "true_dir_y",
+        "true_dir_z",
+        "true_zenith",
+        "true_azimuth",
+        "run_id",
+        "evt_id",
+        "frame_index",
+        "trigger_counter",
+        "n_hits",
+        "event_no",
+        "is_cc_flag",
+        "tau_topology",
+    ]
+    KM3NET_HNL = [
+        "true_pdgid",
+        "true_E",
+        "true_pos_x",
+        "true_pos_y",
+        "true_pos_z",
+        "true_dir_x",
+        "true_dir_y",
+        "true_dir_z",
+        "true_zenith",
+        "run_id",
+        "evt_id",
+        "frame_index",
+        "trigger_counter",
+        "n_hits",
+        "event_no",
+        "is_cc_flag",
+        "tau_topology",
+        "zenith_hnl",
+        "azimuth_hnl",
+        "angle_between_showers",
+        "Energy_hnl",
+        "Energy_second_shower",
+        "Energy_imbalance",
+        "distance",
+    ]
@@ -21,6 +21,7 @@
 from .extractors.liquido import H5Extractor
 from .extractors.internal import ParquetExtractor
 from .extractors.prometheus import PrometheusExtractor
+from .extractors.km3net import KM3NeTExtractor
 
 from .dataclasses import I3FileSet
 
@@ -51,6 +52,7 @@ def __init__(
             List[ParquetExtractor],
             List[H5Extractor],
             List[PrometheusExtractor],
+            List[KM3NeTExtractor],
         ],
         index_column: str = "event_no",
         num_workers: int = 1,
 
@@ -0,0 +1,13 @@
+"""Extractors for extracting pure-python data from KM3NeT-Offline files."""
+
+from .km3netextractor import KM3NeTExtractor
+from .km3netpulseextractor import (
+    KM3NeTTriggPulseExtractor,
+    KM3NeTFullPulseExtractor,
+)
+from .km3nettruthextractor import (
+    KM3NeTTruthExtractor,
+    KM3NeTHNLTruthExtractor,
+    KM3NeTRegularRecoExtractor,
+    KM3NeTHNLRecoExtractor,
+)
@@ -0,0 +1,30 @@
+"""Base class for all KM3NeTROOT extractors."""
+
+from typing import Any
+from abc import abstractmethod
+
+from graphnet.data.extractors import Extractor
+
+# needs to be implemented at the end. It is a class that will kind of
+# gather all the specific extractors for the different data types and
+# help to call them all from the reader. Equivalent to the I3extractor (that
+# I don't yet understand) in the IceCube example
+
+
+class KM3NeTExtractor(Extractor):
+    """Base class for all KM3NeT extractors."""
+
+    def __init__(self, extractor_name: str):
+        """Initailize KM3NeTTExtractor.
+
+        Args:
+            extractor_name: Name of the `KM3NeTExtractor` instance.
+            Used to keep track of the provenance of different data,
+            and to name tables to which this data is saved.
+        """
+        super().__init__(extractor_name=extractor_name)
+
+    @abstractmethod
+    def __call__(self, file: Any) -> dict:
+        """Extract information from file."""
+        pass