From 27d149867f55cf9246a5cf1fd5271e1d070ca5a6 Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Mon, 23 Feb 2026 19:33:12 +0000
Subject: [PATCH 1/9] Initial BDE test on DFT geometries

---
 .../user_guide/benchmarks/molecular.rst       |  42 +++
 .../analysis/molecular/BDEs/analyse_BDEs.py   | 258 ++++++++++++++++++
 ml_peg/analysis/molecular/BDEs/metrics.yml    |   7 +
 ml_peg/app/molecular/BDEs/app_BDEs.py         |  73 +++++
 ml_peg/calcs/molecular/BDEs/calc_BDEs.py      |  67 +++++
 5 files changed, 447 insertions(+)
 create mode 100644 ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
 create mode 100644 ml_peg/analysis/molecular/BDEs/metrics.yml
 create mode 100644 ml_peg/app/molecular/BDEs/app_BDEs.py
 create mode 100644 ml_peg/calcs/molecular/BDEs/calc_BDEs.py

diff --git a/docs/source/user_guide/benchmarks/molecular.rst b/docs/source/user_guide/benchmarks/molecular.rst
index 7b1857d9e..a6d3d7bdd 100644
--- a/docs/source/user_guide/benchmarks/molecular.rst
+++ b/docs/source/user_guide/benchmarks/molecular.rst
@@ -174,3 +174,45 @@ Input structures:
   Binary and Ternary Mixtures of Bis(2-hydroxyethyl)ammonium Acetate with Methanol,
   N,N-Dimethylformamide, and Water at Several Temperatures. J. Chem. Eng. Data 62,
   3958-3966 (2017). https://doi.org/10.1021/acs.jced.7b00654
+
+
+
+BDEs
+=========
+
+Summary
+-------
+
+Performance in [for now] predicting energies of 60 CYP substrates and derived radicals.
+Only [CHO] elements
+
+
+Metrics
+-------
+
+1. Energy RMSE
+
+Accuracy of energy prediction. Measured as binding energy per atom.
+
+2. Force RMSE
+
+
+Computational cost
+------------------
+
+Low: tests are likely to take less than a minute to run on CPU.
+
+
+Data availability
+-----------------
+
+Input structures:
+
+* TODO Input structures were evaluated as part of "Gelzinyte et al., Journal of Chemical Theory and Computation
+  2024 20 (1), 164-177, DOI: 10.1021/acs.jctc.3c00710" but I didn't include the xyzs there... Just the SMILES strings.
+
+
+Reference data:
+
+* (TODO ??Same as input data)
+* B3LYP-D3BJ/def2-SV(P)
diff --git a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
new file mode 100644
index 000000000..9006156b9
--- /dev/null
+++ b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
@@ -0,0 +1,258 @@
+"""Analyse X23 benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase import units
+from ase.io import read
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+D3_MODEL_NAMES = build_d3_name_map(MODELS)
+CALC_PATH = CALCS_ROOT / "molecular" / "BDEs" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "molecular" / "BDEs"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+# Unit conversion
+EV_TO_KJ_PER_MOL = units.mol / units.kJ
+
+
+def into_dict_of_labels(atoms, key):
+    """
+    Group atoms based on their atoms.info[key] labels.
+
+    Parameters
+    ----------
+    atoms : list[Atoms]
+        Atoms objects with info[key] labels to group.
+    key : str
+        Key in atoms.info to group by.
+
+    Returns
+    -------
+    dict[str, list[Atoms]]
+        Dictionary mapping each label value to a list of Atoms objects.
+    """
+    dict_out = {}
+    for at in atoms:
+        label = at.info[key]
+        if label not in dict_out:
+            dict_out[label] = []
+        dict_out[label].append(at)
+    return dict_out
+
+
+def get_bde(mol_energy, rad_energy, isolated_h_energy):
+    """
+    Compute bond dissociation energy.
+
+    Parameters
+    ----------
+    mol_energy : float
+        Energy of the intact molecule.
+    rad_energy : float
+        Energy of the molecule with the hydrogen atom removed.
+    isolated_h_energy : float
+        Energy of the isolated hydrogen atom.
+
+    Returns
+    -------
+    float
+        Bond dissociation energy.
+    """
+    return rad_energy + isolated_h_energy - mol_energy
+
+
+def get_hover_data_labels(key) -> list[str]:
+    """
+    Get labels based on the key.
+
+    Parameters
+    ----------
+    key : str
+        Key in atoms.info to retrieve labels from.
+
+    Returns
+    -------
+    list[str]
+        List of system names from atoms.info entries.
+    """
+    labels = []
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        if model_dir.exists():
+            xyz_files = sorted(model_dir.glob("*.xyz"))
+            if xyz_files:
+                for xyz_file in xyz_files:
+                    atoms = read(xyz_file, ":")
+                    labels += [at.info[key] for at in atoms]
+                break
+    return labels
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure.CYP3A4.dft_opt_geometry.BDEs.json",
+    title="Bond Dissociation Energies on DFT Geometries",
+    x_label="Predicted BDE / kJ/mol",
+    y_label="Reference BDE / kJ/mol",
+    hoverdata={
+        "Compound": get_hover_data_labels("compound"),
+    },
+)
+def dft_geometry_bdes() -> dict[str, list]:
+    """
+    Compute BDEs for all sp3 H atoms using DFT optimised geometries.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted BDEs.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+
+        if not model_dir.exists():
+            continue
+
+        xyz_files = sorted(model_dir.glob("*xyz"))
+        if not xyz_files:
+            continue
+
+        for xyz_file in xyz_files:
+            all_atoms = read(xyz_file, index=":")
+            pred_bdes = process_bdes(all_atoms, "pred_")
+            results[model_name] += pred_bdes
+
+            if not ref_stored:
+                dft_bdes = process_bdes(all_atoms, "dft_")
+                results["ref"] += dft_bdes
+        ref_stored = True
+
+    return results
+
+
+def process_bdes(all_atoms, prefix):
+    """
+    Compute BDEs for given atoms and property prefix.
+
+    Parameters
+    ----------
+    all_atoms
+        List of Atoms with appropriate labels for radicals,
+        molecules and isolated atoms.
+    prefix
+        "dft_" or "pred_" to fetch the appropriate energy.
+
+    Returns
+    -------
+    list[float]
+        List of computed bond dissociation energies.
+    """
+    compound_dict = into_dict_of_labels(all_atoms, key="compound")
+
+    isolated_atoms = compound_dict.pop("isolated_atom")
+    isolated_atoms_dict = into_dict_of_labels(isolated_atoms, "element")
+    isolated_h_energy = isolated_atoms_dict["H"][0].info[f"{prefix}energy"]
+
+    all_bdes = []
+    for compound_atoms in compound_dict.values():
+        mol_rad_dict = into_dict_of_labels(compound_atoms, "mol_or_rad")
+        if "rad" not in mol_rad_dict:
+            continue
+
+        mol = mol_rad_dict["mol"]
+        assert len(mol) == 1
+        mol = mol[0]
+        mol_energy = mol.info[f"{prefix}energy"]
+
+        for rad in mol_rad_dict["rad"]:
+            rad_energy = rad.info[f"{prefix}energy"]
+            bde = get_bde(
+                mol_energy=mol_energy,
+                rad_energy=rad_energy,
+                isolated_h_energy=isolated_h_energy,
+            )
+            all_bdes.append(bde * EV_TO_KJ_PER_MOL)
+
+    return all_bdes
+
+
+@pytest.fixture
+def dft_geometry_bde_errors(dft_geometry_bdes) -> dict[str, float]:
+    """
+    Get mean absolute error for bond dissociation energies.
+
+    Parameters
+    ----------
+    dft_geometry_bdes
+        Dictionary of reference and predicted BDEs.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted BDE errors for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        if dft_geometry_bdes[model_name]:
+            results[model_name] = mae(
+                dft_geometry_bdes["ref"],
+                dft_geometry_bdes[model_name],
+            )
+        else:
+            results[model_name] = None
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "metrics_table.CYP3A4.dft_opt_geometry.BDEs.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    mlip_name_map=D3_MODEL_NAMES,
+)
+def metrics(dft_geometry_bde_errors: dict[str, float]) -> dict[str, dict]:
+    """
+    Get all total rad energy metrics.
+
+    Parameters
+    ----------
+    dft_geometry_bde_errors
+        Mean absolute errors for all systems.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "MAE": dft_geometry_bde_errors,
+    }
+
+
+def test_bdes(metrics: dict[str, dict]) -> None:
+    """
+    Run BDEs test.
+
+    Parameters
+    ----------
+    metrics
+        All BDEs metrics.
+    """
+    return
diff --git a/ml_peg/analysis/molecular/BDEs/metrics.yml b/ml_peg/analysis/molecular/BDEs/metrics.yml
new file mode 100644
index 000000000..0c147b1b1
--- /dev/null
+++ b/ml_peg/analysis/molecular/BDEs/metrics.yml
@@ -0,0 +1,7 @@
+metrics:
+  MAE:
+      good: 0.0
+      bad: 100.0
+      unit: kJ/mol
+      tooltip: "Mean Absolute Error for all BDEs"
+      level of theory: B3LYP-D3BJ/def2-SV(P)
diff --git a/ml_peg/app/molecular/BDEs/app_BDEs.py b/ml_peg/app/molecular/BDEs/app_BDEs.py
new file mode 100644
index 000000000..3db90d030
--- /dev/null
+++ b/ml_peg/app/molecular/BDEs/app_BDEs.py
@@ -0,0 +1,73 @@
+"""Run BDEs app."""
+
+from __future__ import annotations
+
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.build_callbacks import (
+    plot_from_table_column,
+    struct_from_scatter,
+)
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+# Get all models
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "Bond Dissociation Energies"
+DOCS_URL = "https://ddmms.github.io/ml-peg/user_guide/benchmarks/molecular.html#bdes"
+DATA_PATH = APP_ROOT / "data" / "molecular" / "BDEs"
+
+
+class BDEsApp(BaseApp):
+    """BDEs benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """Register callbacks to app."""
+        scatter = read_plot(
+            DATA_PATH / "figure.CYP3A4.dft_opt_geometry.BDEs.json",
+            id=f"{BENCHMARK_NAME}-figure",
+        )
+
+        # Assets dir will be parent directory - individual files for each system
+        structs_dir = DATA_PATH / MODELS[0]
+        structs = [
+            f"assets/molecular/BDEs/{MODELS[0]}/{struct_file.stem}.xyz"
+            for struct_file in sorted(structs_dir.glob("*.xyz"))
+        ]
+
+        plot_from_table_column(
+            table_id=self.table_id,
+            plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
+            column_to_plot={"MAE": scatter},
+        )
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs,
+            mode="struct",
+        )
+
+
+def get_app() -> BDEsApp:
+    """
+    Get bond dissociation energy benchmark app.
+
+    Returns
+    -------
+    BDEsApp
+        Benchmark layout and callback registration.
+    """
+    return BDEsApp(
+        name=BENCHMARK_NAME,
+        description="Bond Dissociation Energies",
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "metrics_table.CYP3A4.dft_opt_geometry.BDEs.json",
+        extra_components=[
+            Div(id=f"{BENCHMARK_NAME}-figure-placeholder"),
+            Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
+        ],
+    )
diff --git a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
new file mode 100644
index 000000000..40bc93090
--- /dev/null
+++ b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
@@ -0,0 +1,67 @@
+"""
+Calculate bond dissociation energies of drug-like molecules.
+
+Journal of Chemical Theory and Computation 2024 20 (1), 164-177
+DOI: 10.1021/acs.jctc.3c00710
+"""
+
+from __future__ import annotations
+
+from copy import copy
+from pathlib import Path
+from typing import Any
+
+from ase import units
+from ase.io import read, write
+import pytest
+
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+DATA_PATH = Path(__file__).parent / "data"
+OUT_PATH = Path(__file__).parent / "outputs"
+
+# Unit conversion
+EV_TO_KJ_PER_MOL = units.mol / units.kJ
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_bond_dissociation_energy(mlip: tuple[str, Any]) -> None:
+    """
+    Calculate C-H bond dissociation energy of drug-like molecules.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    """
+    model_name, model = mlip
+    calc = model.get_calculator()
+
+    # Add D3 calculator for this test (for models where applicable)
+    calc = model.add_d3_calculator(calc)
+
+    bde_dir = (
+        download_s3_data(
+            key="inputs/molecular/BDEs/BDEs.zip",
+            filename="BDEs.zip",
+        )
+        / "BDEs"
+    )
+
+    structures_filename = "cytochrome_p450_substrates.dft_opt.xyz"
+    mols_rads = read(Path(bde_dir) / structures_filename, ":")
+
+    ats_out = []
+    for at in mols_rads:
+        at.calc = copy(calc)
+        at.info["pred_energy"] = at.get_potential_energy()
+        at.arrays["pred_forces"] = at.get_forces()
+        ats_out.append(at)
+
+    write_dir = OUT_PATH / model_name
+    write_dir.mkdir(parents=True, exist_ok=True)
+    write(write_dir / structures_filename, ats_out)

From 866eeb10924fd0c371acadd5f8b798c3a63536a2 Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Thu, 26 Feb 2026 18:17:20 +0000
Subject: [PATCH 2/9] correct units to kcal/mol

---
 ml_peg/analysis/molecular/BDEs/analyse_BDEs.py | 8 ++++----
 ml_peg/analysis/molecular/BDEs/metrics.yml     | 2 +-
 ml_peg/calcs/molecular/BDEs/calc_BDEs.py       | 4 ----
 3 files changed, 5 insertions(+), 9 deletions(-)

diff --git a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
index 9006156b9..cb5330c55 100644
--- a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
+++ b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
@@ -26,7 +26,7 @@
 )
 
 # Unit conversion
-EV_TO_KJ_PER_MOL = units.mol / units.kJ
+EV_TO_KCAL_PER_MOL = units.mol / units.kcal
 
 
 def into_dict_of_labels(atoms, key):
@@ -106,8 +106,8 @@ def get_hover_data_labels(key) -> list[str]:
 @plot_parity(
     filename=OUT_PATH / "figure.CYP3A4.dft_opt_geometry.BDEs.json",
     title="Bond Dissociation Energies on DFT Geometries",
-    x_label="Predicted BDE / kJ/mol",
-    y_label="Reference BDE / kJ/mol",
+    x_label="Predicted BDE / kcal/mol",
+    y_label="Reference BDE / kcal/mol",
     hoverdata={
         "Compound": get_hover_data_labels("compound"),
     },
@@ -188,7 +188,7 @@ def process_bdes(all_atoms, prefix):
                 rad_energy=rad_energy,
                 isolated_h_energy=isolated_h_energy,
             )
-            all_bdes.append(bde * EV_TO_KJ_PER_MOL)
+            all_bdes.append(bde * EV_TO_KCAL_PER_MOL)
 
     return all_bdes
 
diff --git a/ml_peg/analysis/molecular/BDEs/metrics.yml b/ml_peg/analysis/molecular/BDEs/metrics.yml
index 0c147b1b1..1f151995d 100644
--- a/ml_peg/analysis/molecular/BDEs/metrics.yml
+++ b/ml_peg/analysis/molecular/BDEs/metrics.yml
@@ -2,6 +2,6 @@ metrics:
   MAE:
       good: 0.0
       bad: 100.0
-      unit: kJ/mol
+      unit: kcal/mol
       tooltip: "Mean Absolute Error for all BDEs"
       level of theory: B3LYP-D3BJ/def2-SV(P)
diff --git a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
index 40bc93090..828dfcdb3 100644
--- a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
+++ b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
@@ -11,7 +11,6 @@
 from pathlib import Path
 from typing import Any
 
-from ase import units
 from ase.io import read, write
 import pytest
 
@@ -24,9 +23,6 @@
 DATA_PATH = Path(__file__).parent / "data"
 OUT_PATH = Path(__file__).parent / "outputs"
 
-# Unit conversion
-EV_TO_KJ_PER_MOL = units.mol / units.kJ
-
 
 @pytest.mark.parametrize("mlip", MODELS.items())
 def test_bond_dissociation_energy(mlip: tuple[str, Any]) -> None:

From e56aaeb201697169165522583634dd6c5183288a Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Tue, 3 Mar 2026 21:52:07 +0000
Subject: [PATCH 3/9] initial bde rank plots

---
 .../analysis/molecular/BDEs/analyse_BDEs.py   | 186 ++++++++++++++++--
 ml_peg/analysis/molecular/BDEs/metrics.yml    |  18 +-
 ml_peg/app/molecular/BDEs/app_BDEs.py         |   8 +-
 3 files changed, 189 insertions(+), 23 deletions(-)

diff --git a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
index cb5330c55..662bfc177 100644
--- a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
+++ b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
@@ -1,4 +1,4 @@
-"""Analyse X23 benchmark."""
+"""Analyse bond dissociation energy benchmark."""
 
 from __future__ import annotations
 
@@ -6,7 +6,9 @@
 
 from ase import units
 from ase.io import read
+import numpy as np
 import pytest
+from scipy import stats
 
 from ml_peg.analysis.utils.decorators import build_table, plot_parity
 from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
@@ -28,6 +30,10 @@
 # Unit conversion
 EV_TO_KCAL_PER_MOL = units.mol / units.kcal
 
+# Populated by dft_geometry_bdes; shared with downstream fixtures without
+# appearing in the returned dict (which would add it as a scatter trace).
+_COMPOUND_LABELS: list[str] = []
+
 
 def into_dict_of_labels(atoms, key):
     """
@@ -114,14 +120,17 @@ def get_hover_data_labels(key) -> list[str]:
 )
 def dft_geometry_bdes() -> dict[str, list]:
     """
-    Compute BDEs for all sp3 H atoms using DFT optimised geometries.
+    Compute BDEs for all sp3 H atoms on DFT optimised geometries.
 
     Returns
     -------
     dict[str, list]
-        Dictionary of reference and predicted BDEs.
+        Dictionary of reference and predicted BDEs and labels of the
+        compound for the corresponding BDE.
     """
+    global _COMPOUND_LABELS
     results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    _COMPOUND_LABELS = []
     ref_stored = False
 
     for model_name in MODELS:
@@ -136,18 +145,88 @@ def dft_geometry_bdes() -> dict[str, list]:
 
         for xyz_file in xyz_files:
             all_atoms = read(xyz_file, index=":")
-            pred_bdes = process_bdes(all_atoms, "pred_")
-            results[model_name] += pred_bdes
+            pred_bdes_by_label = process_bdes_per_compound(all_atoms, "pred_")
+            for compound_label, pred_bdes in pred_bdes_by_label.items():
+                if not ref_stored:
+                    _COMPOUND_LABELS += [compound_label] * len(pred_bdes)
+                results[model_name] += pred_bdes
 
             if not ref_stored:
-                dft_bdes = process_bdes(all_atoms, "dft_")
-                results["ref"] += dft_bdes
+                dft_bdes_by_label = process_bdes_per_compound(all_atoms, "dft_")
+                for dft_bdes in dft_bdes_by_label.values():
+                    results["ref"] += dft_bdes
         ref_stored = True
 
     return results
 
 
-def process_bdes(all_atoms, prefix):
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure.CYP3A4.dft_opt_geometry.BDE_ranks.json",
+    title="Bond Dissociation Energy ranks on DFT Geometries",
+    x_label="Predicted BDE rank",
+    y_label="Reference BDE rank",
+    hoverdata={
+        "Compound": get_hover_data_labels("compound"),
+    },
+)
+def dft_geometry_bde_ranks(dft_geometry_bdes) -> dict[str, list]:
+    """
+    Compute BDE ranks for all sp3 H atoms using DFT optimised geometries.
+
+    Parameters
+    ----------
+    dft_geometry_bdes : dict[str, list]
+        Dictionary of reference and predicted BDEs and labels of the
+        compound for the corresponding BDE.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted BDE ranks.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    for model_name in results.keys():
+        bdes_by_compound = group_values_by_labels(
+            values=dft_geometry_bdes[model_name],
+            labels=_COMPOUND_LABELS,
+        )
+
+        for compound_bdes in bdes_by_compound.values():
+            ranks = np.argsort(np.argsort(compound_bdes))
+            results[model_name] += list(ranks)
+
+    return results
+
+
+def group_values_by_labels(
+    values: list[float], labels: list[str]
+) -> dict[str, list[float]]:
+    """
+    Group values by their corresponding string labels.
+
+    Parameters
+    ----------
+    values : list[float]
+        Values to group.
+    labels : list[str]
+        Labels corresponding to each value.
+
+    Returns
+    -------
+    dict[str, list[float]]
+        Dictionary mapping each label to a list of its values.
+    """
+    results = {}
+    for label, value in zip(labels, values, strict=False):
+        if label not in results:
+            results[label] = []
+        results[label].append(value)
+
+    return results
+
+
+def process_bdes_per_compound(all_atoms, prefix):
     """
     Compute BDEs for given atoms and property prefix.
 
@@ -161,8 +240,8 @@ def process_bdes(all_atoms, prefix):
 
     Returns
     -------
-    list[float]
-        List of computed bond dissociation energies.
+    dict[str: list[float]]
+        List of computed bond dissociation energies, for each compound.
     """
     compound_dict = into_dict_of_labels(all_atoms, key="compound")
 
@@ -170,12 +249,14 @@ def process_bdes(all_atoms, prefix):
     isolated_atoms_dict = into_dict_of_labels(isolated_atoms, "element")
     isolated_h_energy = isolated_atoms_dict["H"][0].info[f"{prefix}energy"]
 
-    all_bdes = []
-    for compound_atoms in compound_dict.values():
+    all_bdes = {}
+    for compound_name, compound_atoms in compound_dict.items():
         mol_rad_dict = into_dict_of_labels(compound_atoms, "mol_or_rad")
         if "rad" not in mol_rad_dict:
             continue
 
+        all_bdes[compound_name] = []
+
         mol = mol_rad_dict["mol"]
         assert len(mol) == 1
         mol = mol[0]
@@ -188,7 +269,7 @@ def process_bdes(all_atoms, prefix):
                 rad_energy=rad_energy,
                 isolated_h_energy=isolated_h_energy,
             )
-            all_bdes.append(bde * EV_TO_KCAL_PER_MOL)
+            all_bdes[compound_name].append(bde * EV_TO_KCAL_PER_MOL)
 
     return all_bdes
 
@@ -220,6 +301,74 @@ def dft_geometry_bde_errors(dft_geometry_bdes) -> dict[str, float]:
     return results
 
 
+@pytest.fixture
+def dft_geometry_bde_rank_correlations(dft_geometry_bde_ranks) -> dict[str, float]:
+    """
+    Compute mean Kendall's tau rank correlation across all molecules.
+
+    For each molecule, sp3 C-H bond strengths are ranked from lowest to
+    highest by both DFT and MLIP, and the correlation is measured by
+    Kendall's tau. The final metric is the average across all molecules.
+
+    Parameters
+    ----------
+    dft_geometry_bde_ranks : dict[str, list]
+        Dictionary of reference and predicted BDE ranks.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted BDE rank correlation for all models.
+    """
+    ref_ranks_by_compound = group_values_by_labels(
+        values=dft_geometry_bde_ranks["ref"],
+        labels=_COMPOUND_LABELS,
+    )
+
+    results = {}
+    for model_name in MODELS:
+        if dft_geometry_bde_ranks[model_name]:
+            pred_ranks_by_compound = group_values_by_labels(
+                values=dft_geometry_bde_ranks[model_name],
+                labels=_COMPOUND_LABELS,
+            )
+
+            # equivalent to mae, rmse
+            results[model_name] = mean_kendalls_tau(
+                ref_by_label=ref_ranks_by_compound,
+                pred_by_label=pred_ranks_by_compound,
+            )
+
+        else:
+            results[model_name] = None
+    return results
+
+
+def mean_kendalls_tau(ref_by_label, pred_by_label):
+    """
+    Compute mean Kendall's tau across labels.
+
+    Parameters
+    ----------
+    ref_by_label : dict[str, list]
+        Reference ranks grouped by compound label.
+    pred_by_label : dict[str, list]
+        Predicted ranks grouped by compound label.
+
+    Returns
+    -------
+    float
+        Mean Kendall's tau correlation across all labels.
+    """
+    kendalls_taus = []
+    for label in ref_by_label.keys():
+        ref = ref_by_label[label]
+        pred = pred_by_label[label]
+        correlation = stats.kendalltau(ref, pred).correlation
+        kendalls_taus.append(correlation)
+    return np.mean(kendalls_taus)
+
+
 @pytest.fixture
 @build_table(
     filename=OUT_PATH / "metrics_table.CYP3A4.dft_opt_geometry.BDEs.json",
@@ -227,7 +376,10 @@ def dft_geometry_bde_errors(dft_geometry_bdes) -> dict[str, float]:
     thresholds=DEFAULT_THRESHOLDS,
     mlip_name_map=D3_MODEL_NAMES,
 )
-def metrics(dft_geometry_bde_errors: dict[str, float]) -> dict[str, dict]:
+def metrics(
+    dft_geometry_bde_errors: dict[str, float],
+    dft_geometry_bde_rank_correlations: dict[str, float],
+) -> dict[str, dict]:
     """
     Get all total rad energy metrics.
 
@@ -235,6 +387,9 @@ def metrics(dft_geometry_bde_errors: dict[str, float]) -> dict[str, dict]:
     ----------
     dft_geometry_bde_errors
         Mean absolute errors for all systems.
+    dft_geometry_bde_rank_correlations
+        Mean Kendal's tau across predicted and reference
+        BDE ranks within a given molecule.
 
     Returns
     -------
@@ -242,7 +397,8 @@ def metrics(dft_geometry_bde_errors: dict[str, float]) -> dict[str, dict]:
         Metric names and values for all models.
     """
     return {
-        "MAE": dft_geometry_bde_errors,
+        "Direct BDE": dft_geometry_bde_errors,
+        "BDE rank": dft_geometry_bde_rank_correlations,
     }
 
 
diff --git a/ml_peg/analysis/molecular/BDEs/metrics.yml b/ml_peg/analysis/molecular/BDEs/metrics.yml
index 1f151995d..3b6469219 100644
--- a/ml_peg/analysis/molecular/BDEs/metrics.yml
+++ b/ml_peg/analysis/molecular/BDEs/metrics.yml
@@ -1,7 +1,13 @@
 metrics:
-  MAE:
-      good: 0.0
-      bad: 100.0
-      unit: kcal/mol
-      tooltip: "Mean Absolute Error for all BDEs"
-      level of theory: B3LYP-D3BJ/def2-SV(P)
+  Direct BDE:
+    good: 0.0
+    bad: 50.0
+    unit: kcal/mol
+    tooltip: "Mean Absolute Error on DFT-relaxed structures"
+    level of theory: B3LYP-D3BJ/def2-SV(P)
+  BDE rank:
+    good: 1.0
+    bad: 0.0
+    unit: null
+    tooltip: "Mean Kendal rank correlation coefficient"
+    level of theory: B3LYP-D3BJ/def2-SV(P)
diff --git a/ml_peg/app/molecular/BDEs/app_BDEs.py b/ml_peg/app/molecular/BDEs/app_BDEs.py
index 3db90d030..b97fcbfc4 100644
--- a/ml_peg/app/molecular/BDEs/app_BDEs.py
+++ b/ml_peg/app/molecular/BDEs/app_BDEs.py
@@ -26,10 +26,14 @@ class BDEsApp(BaseApp):
 
     def register_callbacks(self) -> None:
         """Register callbacks to app."""
-        scatter = read_plot(
+        scatter_bdes = read_plot(
             DATA_PATH / "figure.CYP3A4.dft_opt_geometry.BDEs.json",
             id=f"{BENCHMARK_NAME}-figure",
         )
+        scatter_ranks = read_plot(
+            DATA_PATH / "figure.CYP3A4.dft_opt_geometry.BDE_ranks.json",
+            id=f"{BENCHMARK_NAME}-ranks-figure",
+        )
 
         # Assets dir will be parent directory - individual files for each system
         structs_dir = DATA_PATH / MODELS[0]
@@ -41,7 +45,7 @@ def register_callbacks(self) -> None:
         plot_from_table_column(
             table_id=self.table_id,
             plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
-            column_to_plot={"MAE": scatter},
+            column_to_plot={"Direct BDE": scatter_bdes, "BDE rank": scatter_ranks},
         )
 
         struct_from_scatter(

From e34e40b72e5ad148a1f22e70b93458078f4ad340 Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Tue, 3 Mar 2026 22:43:48 +0000
Subject: [PATCH 4/9] Tests with geometry optimisation

---
 .../analysis/molecular/BDEs/analyse_BDEs.py   | 451 ++++++++++++------
 ml_peg/analysis/molecular/BDEs/metrics.yml    |  16 +-
 ml_peg/calcs/molecular/BDEs/calc_BDEs.py      |  73 ++-
 .../molecular/BDEs/calc_BDEs_mlff_opt.py      |  55 +++
 4 files changed, 439 insertions(+), 156 deletions(-)
 create mode 100644 ml_peg/calcs/molecular/BDEs/calc_BDEs_mlff_opt.py

diff --git a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
index 662bfc177..e3ae8d65e 100644
--- a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
+++ b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
@@ -30,9 +30,10 @@
 # Unit conversion
 EV_TO_KCAL_PER_MOL = units.mol / units.kcal
 
-# Populated by dft_geometry_bdes; shared with downstream fixtures without
-# appearing in the returned dict (which would add it as a scatter trace).
+# Populated by dft_geometry_bdes / mlff_geometry_bdes; shared with downstream
+# fixtures without appearing in the returned dict (which would add a scatter trace).
 _COMPOUND_LABELS: list[str] = []
+_MLFF_COMPOUND_LABELS: list[str] = []
 
 
 def into_dict_of_labels(atoms, key):
@@ -108,29 +109,130 @@ def get_hover_data_labels(key) -> list[str]:
     return labels
 
 
-@pytest.fixture
-@plot_parity(
-    filename=OUT_PATH / "figure.CYP3A4.dft_opt_geometry.BDEs.json",
-    title="Bond Dissociation Energies on DFT Geometries",
-    x_label="Predicted BDE / kcal/mol",
-    y_label="Reference BDE / kcal/mol",
-    hoverdata={
-        "Compound": get_hover_data_labels("compound"),
-    },
-)
-def dft_geometry_bdes() -> dict[str, list]:
+def group_values_by_labels(
+    values: list[float], labels: list[str]
+) -> dict[str, list[float]]:
     """
-    Compute BDEs for all sp3 H atoms on DFT optimised geometries.
+    Group values by their corresponding string labels.
+
+    Parameters
+    ----------
+    values : list[float]
+        Values to group.
+    labels : list[str]
+        Labels corresponding to each value.
+
+    Returns
+    -------
+    dict[str, list[float]]
+        Dictionary mapping each label to a list of its values.
+    """
+    results = {}
+    for label, value in zip(labels, values, strict=False):
+        if label not in results:
+            results[label] = []
+        results[label].append(value)
+
+    return results
+
+
+def process_bdes_per_compound(all_atoms, prefix):
+    """
+    Compute BDEs for given atoms and property prefix.
+
+    Parameters
+    ----------
+    all_atoms
+        List of Atoms with appropriate labels for radicals,
+        molecules and isolated atoms.
+    prefix
+        "dft_" or "pred_" to fetch the appropriate energy.
+
+    Returns
+    -------
+    dict[str: list[float]]
+        List of computed bond dissociation energies, for each compound.
+    """
+    compound_dict = into_dict_of_labels(all_atoms, key="compound")
+
+    isolated_atoms = compound_dict.pop("isolated_atom")
+    isolated_atoms_dict = into_dict_of_labels(isolated_atoms, "element")
+    isolated_h_energy = isolated_atoms_dict["H"][0].info[f"{prefix}energy"]
+
+    all_bdes = {}
+    for compound_name, compound_atoms in compound_dict.items():
+        mol_rad_dict = into_dict_of_labels(compound_atoms, "mol_or_rad")
+        if "rad" not in mol_rad_dict:
+            continue
+
+        all_bdes[compound_name] = []
+
+        mol = mol_rad_dict["mol"]
+        assert len(mol) == 1
+        mol = mol[0]
+        mol_energy = mol.info[f"{prefix}energy"]
+
+        for rad in mol_rad_dict["rad"]:
+            rad_energy = rad.info[f"{prefix}energy"]
+            bde = get_bde(
+                mol_energy=mol_energy,
+                rad_energy=rad_energy,
+                isolated_h_energy=isolated_h_energy,
+            )
+            all_bdes[compound_name].append(bde * EV_TO_KCAL_PER_MOL)
+
+    return all_bdes
+
+
+def mean_kendalls_tau(ref_by_label, pred_by_label):
+    """
+    Compute mean Kendall's tau across labels.
+
+    Parameters
+    ----------
+    ref_by_label : dict[str, list]
+        Reference ranks grouped by compound label.
+    pred_by_label : dict[str, list]
+        Predicted ranks grouped by compound label.
+
+    Returns
+    -------
+    float
+        Mean Kendall's tau correlation across all labels.
+    """
+    kendalls_taus = []
+    for label in ref_by_label.keys():
+        ref = ref_by_label[label]
+        pred = pred_by_label[label]
+        correlation = stats.kendalltau(ref, pred).correlation
+        kendalls_taus.append(correlation)
+    return np.mean(kendalls_taus)
+
+
+# ---------------------------------------------------------------------------
+# Shared helpers
+# ---------------------------------------------------------------------------
+
+
+def _load_bdes(xyz_suffix: str, labels_out: list) -> dict[str, list]:
+    """
+    Load BDEs from output xyz files for all models.
+
+    Parameters
+    ----------
+    xyz_suffix
+        Suffix of the xyz filename, e.g. "dft_opt" or "mlff_opt".
+    labels_out
+        List to populate in-place with compound labels for each BDE entry.
 
     Returns
     -------
     dict[str, list]
-        Dictionary of reference and predicted BDEs and labels of the
-        compound for the corresponding BDE.
+        Dictionary of reference and predicted BDEs keyed by model name and "ref".
     """
-    global _COMPOUND_LABELS
+    xyz_filename = f"cytochrome_p450_substrates.{xyz_suffix}.xyz"
     results = {"ref": []} | {mlip: [] for mlip in MODELS}
-    _COMPOUND_LABELS = []
+    labels_out.clear()
     ref_stored = False
 
     for model_name in MODELS:
@@ -139,7 +241,7 @@ def dft_geometry_bdes() -> dict[str, list]:
         if not model_dir.exists():
             continue
 
-        xyz_files = sorted(model_dir.glob("*xyz"))
+        xyz_files = sorted(model_dir.glob(xyz_filename))
         if not xyz_files:
             continue
 
@@ -148,7 +250,7 @@ def dft_geometry_bdes() -> dict[str, list]:
             pred_bdes_by_label = process_bdes_per_compound(all_atoms, "pred_")
             for compound_label, pred_bdes in pred_bdes_by_label.items():
                 if not ref_stored:
-                    _COMPOUND_LABELS += [compound_label] * len(pred_bdes)
+                    labels_out += [compound_label] * len(pred_bdes)
                 results[model_name] += pred_bdes
 
             if not ref_stored:
@@ -160,25 +262,16 @@ def dft_geometry_bdes() -> dict[str, list]:
     return results
 
 
-@pytest.fixture
-@plot_parity(
-    filename=OUT_PATH / "figure.CYP3A4.dft_opt_geometry.BDE_ranks.json",
-    title="Bond Dissociation Energy ranks on DFT Geometries",
-    x_label="Predicted BDE rank",
-    y_label="Reference BDE rank",
-    hoverdata={
-        "Compound": get_hover_data_labels("compound"),
-    },
-)
-def dft_geometry_bde_ranks(dft_geometry_bdes) -> dict[str, list]:
+def _compute_ranks(bdes: dict, labels: list) -> dict[str, list]:
     """
-    Compute BDE ranks for all sp3 H atoms using DFT optimised geometries.
+    Compute BDE ranks per compound for all models and reference.
 
     Parameters
     ----------
-    dft_geometry_bdes : dict[str, list]
-        Dictionary of reference and predicted BDEs and labels of the
-        compound for the corresponding BDE.
+    bdes
+        Dictionary of reference and predicted BDEs keyed by model name and "ref".
+    labels
+        Compound label for each BDE entry.
 
     Returns
     -------
@@ -188,90 +281,124 @@ def dft_geometry_bde_ranks(dft_geometry_bdes) -> dict[str, list]:
     results = {"ref": []} | {mlip: [] for mlip in MODELS}
     for model_name in results.keys():
         bdes_by_compound = group_values_by_labels(
-            values=dft_geometry_bdes[model_name],
-            labels=_COMPOUND_LABELS,
+            values=bdes[model_name],
+            labels=labels,
         )
-
         for compound_bdes in bdes_by_compound.values():
             ranks = np.argsort(np.argsort(compound_bdes))
             results[model_name] += list(ranks)
-
     return results
 
 
-def group_values_by_labels(
-    values: list[float], labels: list[str]
-) -> dict[str, list[float]]:
+def _compute_bde_errors(bdes: dict) -> dict[str, float]:
     """
-    Group values by their corresponding string labels.
+    Compute mean absolute error of predicted BDEs against reference.
 
     Parameters
     ----------
-    values : list[float]
-        Values to group.
-    labels : list[str]
-        Labels corresponding to each value.
+    bdes
+        Dictionary of reference and predicted BDEs keyed by model name and "ref".
 
     Returns
     -------
-    dict[str, list[float]]
-        Dictionary mapping each label to a list of its values.
+    dict[str, float]
+        MAE for each model.
     """
-    results = {}
-    for label, value in zip(labels, values, strict=False):
-        if label not in results:
-            results[label] = []
-        results[label].append(value)
-
-    return results
+    return {
+        model_name: mae(bdes["ref"], bdes[model_name]) if bdes[model_name] else None
+        for model_name in MODELS
+    }
 
 
-def process_bdes_per_compound(all_atoms, prefix):
+def _compute_rank_correlations(ranks: dict, labels: list) -> dict[str, float]:
     """
-    Compute BDEs for given atoms and property prefix.
+    Compute mean Kendall's tau rank correlation against reference ranks.
 
     Parameters
     ----------
-    all_atoms
-        List of Atoms with appropriate labels for radicals,
-        molecules and isolated atoms.
-    prefix
-        "dft_" or "pred_" to fetch the appropriate energy.
+    ranks
+        Dictionary of reference and predicted BDE ranks keyed by model name and "ref".
+    labels
+        Compound label for each rank entry.
 
     Returns
     -------
-    dict[str: list[float]]
-        List of computed bond dissociation energies, for each compound.
+    dict[str, float]
+        Mean Kendall's tau for each model.
     """
-    compound_dict = into_dict_of_labels(all_atoms, key="compound")
+    ref_ranks_by_compound = group_values_by_labels(
+        values=ranks["ref"],
+        labels=labels,
+    )
+    results = {}
+    for model_name in MODELS:
+        if ranks[model_name]:
+            pred_ranks_by_compound = group_values_by_labels(
+                values=ranks[model_name],
+                labels=labels,
+            )
+            results[model_name] = mean_kendalls_tau(
+                ref_by_label=ref_ranks_by_compound,
+                pred_by_label=pred_ranks_by_compound,
+            )
+        else:
+            results[model_name] = None
+    return results
 
-    isolated_atoms = compound_dict.pop("isolated_atom")
-    isolated_atoms_dict = into_dict_of_labels(isolated_atoms, "element")
-    isolated_h_energy = isolated_atoms_dict["H"][0].info[f"{prefix}energy"]
 
-    all_bdes = {}
-    for compound_name, compound_atoms in compound_dict.items():
-        mol_rad_dict = into_dict_of_labels(compound_atoms, "mol_or_rad")
-        if "rad" not in mol_rad_dict:
-            continue
+# ---------------------------------------------------------------------------
+# DFT-geometry fixtures
+# ---------------------------------------------------------------------------
 
-        all_bdes[compound_name] = []
 
-        mol = mol_rad_dict["mol"]
-        assert len(mol) == 1
-        mol = mol[0]
-        mol_energy = mol.info[f"{prefix}energy"]
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure.CYP3A4.dft_opt_geometry.BDEs.json",
+    title="Bond Dissociation Energies on DFT Geometries",
+    x_label="Predicted BDE / kcal/mol",
+    y_label="Reference BDE / kcal/mol",
+    hoverdata={
+        "Compound": get_hover_data_labels("compound"),
+    },
+)
+def dft_geometry_bdes() -> dict[str, list]:
+    """
+    Compute BDEs for all sp3 H atoms on DFT optimised geometries.
 
-        for rad in mol_rad_dict["rad"]:
-            rad_energy = rad.info[f"{prefix}energy"]
-            bde = get_bde(
-                mol_energy=mol_energy,
-                rad_energy=rad_energy,
-                isolated_h_energy=isolated_h_energy,
-            )
-            all_bdes[compound_name].append(bde * EV_TO_KCAL_PER_MOL)
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted BDEs and labels of the
+        compound for the corresponding BDE.
+    """
+    return _load_bdes("dft_opt", _COMPOUND_LABELS)
 
-    return all_bdes
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure.CYP3A4.dft_opt_geometry.BDE_ranks.json",
+    title="Bond Dissociation Energy ranks on DFT Geometries",
+    x_label="Predicted BDE rank",
+    y_label="Reference BDE rank",
+    hoverdata={
+        "Compound": get_hover_data_labels("compound"),
+    },
+)
+def dft_geometry_bde_ranks(dft_geometry_bdes) -> dict[str, list]:
+    """
+    Compute BDE ranks for all sp3 H atoms using DFT optimised geometries.
+
+    Parameters
+    ----------
+    dft_geometry_bdes : dict[str, list]
+        Dictionary of reference and predicted BDEs.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted BDE ranks.
+    """
+    return _compute_ranks(dft_geometry_bdes, _COMPOUND_LABELS)
 
 
 @pytest.fixture
@@ -289,16 +416,7 @@ def dft_geometry_bde_errors(dft_geometry_bdes) -> dict[str, float]:
     dict[str, float]
         Dictionary of predicted BDE errors for all models.
     """
-    results = {}
-    for model_name in MODELS:
-        if dft_geometry_bdes[model_name]:
-            results[model_name] = mae(
-                dft_geometry_bdes["ref"],
-                dft_geometry_bdes[model_name],
-            )
-        else:
-            results[model_name] = None
-    return results
+    return _compute_bde_errors(dft_geometry_bdes)
 
 
 @pytest.fixture
@@ -320,53 +438,107 @@ def dft_geometry_bde_rank_correlations(dft_geometry_bde_ranks) -> dict[str, floa
     dict[str, float]
         Dictionary of predicted BDE rank correlation for all models.
     """
-    ref_ranks_by_compound = group_values_by_labels(
-        values=dft_geometry_bde_ranks["ref"],
-        labels=_COMPOUND_LABELS,
-    )
+    return _compute_rank_correlations(dft_geometry_bde_ranks, _COMPOUND_LABELS)
 
-    results = {}
-    for model_name in MODELS:
-        if dft_geometry_bde_ranks[model_name]:
-            pred_ranks_by_compound = group_values_by_labels(
-                values=dft_geometry_bde_ranks[model_name],
-                labels=_COMPOUND_LABELS,
-            )
 
-            # equivalent to mae, rmse
-            results[model_name] = mean_kendalls_tau(
-                ref_by_label=ref_ranks_by_compound,
-                pred_by_label=pred_ranks_by_compound,
-            )
+# ---------------------------------------------------------------------------
+# MLFF-geometry fixtures
+# ---------------------------------------------------------------------------
 
-        else:
-            results[model_name] = None
-    return results
 
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure.CYP3A4.mlff_opt_geometry.BDEs.json",
+    title="Bond Dissociation Energies on MLFF Geometries",
+    x_label="Predicted BDE / kcal/mol",
+    y_label="Reference BDE / kcal/mol",
+    hoverdata={
+        "Compound": get_hover_data_labels("compound"),
+    },
+)
+def mlff_geometry_bdes() -> dict[str, list]:
+    """
+    Compute BDEs for all sp3 H atoms on MLFF optimised geometries.
 
-def mean_kendalls_tau(ref_by_label, pred_by_label):
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted BDEs and labels of the
+        compound for the corresponding BDE.
     """
-    Compute mean Kendall's tau across labels.
+    return _load_bdes("mlff_opt", _MLFF_COMPOUND_LABELS)
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure.CYP3A4.mlff_opt_geometry.BDE_ranks.json",
+    title="Bond Dissociation Energy ranks on MLFF Geometries",
+    x_label="Predicted BDE rank",
+    y_label="Reference BDE rank",
+    hoverdata={
+        "Compound": get_hover_data_labels("compound"),
+    },
+)
+def mlff_geometry_bde_ranks(mlff_geometry_bdes) -> dict[str, list]:
+    """
+    Compute BDE ranks for all sp3 H atoms using MLFF optimised geometries.
 
     Parameters
     ----------
-    ref_by_label : dict[str, list]
-        Reference ranks grouped by compound label.
-    pred_by_label : dict[str, list]
-        Predicted ranks grouped by compound label.
+    mlff_geometry_bdes : dict[str, list]
+        Dictionary of reference and predicted BDEs.
 
     Returns
     -------
-    float
-        Mean Kendall's tau correlation across all labels.
+    dict[str, list]
+        Dictionary of reference and predicted BDE ranks.
     """
-    kendalls_taus = []
-    for label in ref_by_label.keys():
-        ref = ref_by_label[label]
-        pred = pred_by_label[label]
-        correlation = stats.kendalltau(ref, pred).correlation
-        kendalls_taus.append(correlation)
-    return np.mean(kendalls_taus)
+    return _compute_ranks(mlff_geometry_bdes, _MLFF_COMPOUND_LABELS)
+
+
+@pytest.fixture
+def mlff_geometry_bde_errors(mlff_geometry_bdes) -> dict[str, float]:
+    """
+    Get mean absolute error for bond dissociation energies on MLFF geometries.
+
+    Parameters
+    ----------
+    mlff_geometry_bdes
+        Dictionary of reference and predicted BDEs.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted BDE errors for all models.
+    """
+    return _compute_bde_errors(mlff_geometry_bdes)
+
+
+@pytest.fixture
+def mlff_geometry_bde_rank_correlations(mlff_geometry_bde_ranks) -> dict[str, float]:
+    """
+    Compute mean Kendall's tau rank correlation across all molecules on MLFF geometries.
+
+    For each molecule, sp3 C-H bond strengths are ranked from lowest to
+    highest by both DFT and MLIP, and the correlation is measured by
+    Kendall's tau. The final metric is the average across all molecules.
+
+    Parameters
+    ----------
+    mlff_geometry_bde_ranks : dict[str, list]
+        Dictionary of reference and predicted BDE ranks.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted BDE rank correlation for all models.
+    """
+    return _compute_rank_correlations(mlff_geometry_bde_ranks, _MLFF_COMPOUND_LABELS)
+
+
+# ---------------------------------------------------------------------------
+# Metrics table
+# ---------------------------------------------------------------------------
 
 
 @pytest.fixture
@@ -379,17 +551,22 @@ def mean_kendalls_tau(ref_by_label, pred_by_label):
 def metrics(
     dft_geometry_bde_errors: dict[str, float],
     dft_geometry_bde_rank_correlations: dict[str, float],
+    mlff_geometry_bde_errors: dict[str, float],
+    mlff_geometry_bde_rank_correlations: dict[str, float],
 ) -> dict[str, dict]:
     """
-    Get all total rad energy metrics.
+    Get all BDE metrics.
 
     Parameters
     ----------
     dft_geometry_bde_errors
-        Mean absolute errors for all systems.
+        Mean absolute errors on DFT-relaxed structures.
     dft_geometry_bde_rank_correlations
-        Mean Kendal's tau across predicted and reference
-        BDE ranks within a given molecule.
+        Mean Kendall's tau across predicted and reference BDE ranks on DFT geometries.
+    mlff_geometry_bde_errors
+        Mean absolute errors on MLFF-relaxed structures.
+    mlff_geometry_bde_rank_correlations
+        Mean Kendall's tau across predicted and reference BDE ranks on MLFF geometries.
 
     Returns
     -------
@@ -399,6 +576,8 @@ def metrics(
     return {
         "Direct BDE": dft_geometry_bde_errors,
         "BDE rank": dft_geometry_bde_rank_correlations,
+        "Direct BDE (MLFF opt)": mlff_geometry_bde_errors,
+        "BDE rank (MLFF opt)": mlff_geometry_bde_rank_correlations,
     }
 
 
diff --git a/ml_peg/analysis/molecular/BDEs/metrics.yml b/ml_peg/analysis/molecular/BDEs/metrics.yml
index 3b6469219..3d6af0b25 100644
--- a/ml_peg/analysis/molecular/BDEs/metrics.yml
+++ b/ml_peg/analysis/molecular/BDEs/metrics.yml
@@ -1,13 +1,25 @@
 metrics:
   Direct BDE:
-    good: 0.0
+    good: 0.5
     bad: 50.0
     unit: kcal/mol
     tooltip: "Mean Absolute Error on DFT-relaxed structures"
     level of theory: B3LYP-D3BJ/def2-SV(P)
+  Direct BDE (MLFF opt):
+    good: 0.5
+    bad: 50.0
+    unit: kcal/mol
+    tooltip: "Mean Absolute Error on MLFF-relaxed structures"
+    level of theory: B3LYP-D3BJ/def2-SV(P)
   BDE rank:
     good: 1.0
-    bad: 0.0
+    bad: 0.5
     unit: null
     tooltip: "Mean Kendal rank correlation coefficient"
     level of theory: B3LYP-D3BJ/def2-SV(P)
+  BDE rank (MLFF opt):
+    good: 1.0
+    bad: 0.5
+    unit: null
+    tooltip: "Mean Kendall rank correlation coefficient on MLFF-relaxed structures"
+    level of theory: B3LYP-D3BJ/def2-SV(P)
diff --git a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
index 828dfcdb3..ed0ea1456 100644
--- a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
+++ b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
@@ -12,6 +12,7 @@
 from typing import Any
 
 from ase.io import read, write
+from janus_core.calculations.geom_opt import GeomOpt
 import pytest
 
 from ml_peg.calcs.utils.utils import download_s3_data
@@ -20,9 +21,55 @@
 
 MODELS = load_models(current_models)
 
-DATA_PATH = Path(__file__).parent / "data"
 OUT_PATH = Path(__file__).parent / "outputs"
 
+DFT_OPT_FILENAME = "cytochrome_p450_substrates.dft_opt.xyz"
+
+
+def evaluate_bde_structures(
+    model_name: str,
+    model: Any,
+    bde_dir: Path,
+    out_filename: str,
+    geom_opt: bool = False,
+) -> None:
+    """
+    Evaluate MLFF energies on BDE structures, optionally after geometry optimisation.
+
+    Parameters
+    ----------
+    model_name
+        Name of the model, used to determine the output directory.
+    model
+        Model object providing get_calculator() and add_d3_calculator().
+    bde_dir
+        Directory containing the input xyz file.
+    out_filename
+        Name of the output xyz file.
+    geom_opt
+        If True, geometry-optimise each structure with the MLFF before
+        evaluating energies and forces.
+    """
+    calc = model.get_calculator()
+    calc = model.add_d3_calculator(calc)
+
+    mols_rads = read(bde_dir / DFT_OPT_FILENAME, ":")
+
+    ats_out = []
+    for at in mols_rads:
+        at.calc = copy(calc)
+        if geom_opt:
+            geomopt = GeomOpt(struct=at, filter_class=None, write_results=False)
+            geomopt.run()
+            at = geomopt.struct
+        at.info["pred_energy"] = at.get_potential_energy()
+        at.arrays["pred_forces"] = at.get_forces()
+        ats_out.append(at)
+
+    write_dir = OUT_PATH / model_name
+    write_dir.mkdir(parents=True, exist_ok=True)
+    write(write_dir / out_filename, ats_out)
+
 
 @pytest.mark.parametrize("mlip", MODELS.items())
 def test_bond_dissociation_energy(mlip: tuple[str, Any]) -> None:
@@ -35,10 +82,6 @@ def test_bond_dissociation_energy(mlip: tuple[str, Any]) -> None:
         Name of model use and model to get calculator.
     """
     model_name, model = mlip
-    calc = model.get_calculator()
-
-    # Add D3 calculator for this test (for models where applicable)
-    calc = model.add_d3_calculator(calc)
 
     bde_dir = (
         download_s3_data(
@@ -48,16 +91,10 @@ def test_bond_dissociation_energy(mlip: tuple[str, Any]) -> None:
         / "BDEs"
     )
 
-    structures_filename = "cytochrome_p450_substrates.dft_opt.xyz"
-    mols_rads = read(Path(bde_dir) / structures_filename, ":")
-
-    ats_out = []
-    for at in mols_rads:
-        at.calc = copy(calc)
-        at.info["pred_energy"] = at.get_potential_energy()
-        at.arrays["pred_forces"] = at.get_forces()
-        ats_out.append(at)
-
-    write_dir = OUT_PATH / model_name
-    write_dir.mkdir(parents=True, exist_ok=True)
-    write(write_dir / structures_filename, ats_out)
+    evaluate_bde_structures(
+        model_name=model_name,
+        model=model,
+        bde_dir=bde_dir,
+        out_filename=DFT_OPT_FILENAME,
+        geom_opt=False,
+    )
diff --git a/ml_peg/calcs/molecular/BDEs/calc_BDEs_mlff_opt.py b/ml_peg/calcs/molecular/BDEs/calc_BDEs_mlff_opt.py
new file mode 100644
index 000000000..6f68638c2
--- /dev/null
+++ b/ml_peg/calcs/molecular/BDEs/calc_BDEs_mlff_opt.py
@@ -0,0 +1,55 @@
+"""
+Calculate BDEs of drug-like molecules on MLFF-optimised geometries.
+
+Journal of Chemical Theory and Computation 2024 20 (1), 164-177
+DOI: 10.1021/acs.jctc.3c00710
+"""
+
+from __future__ import annotations
+
+from typing import Any
+
+import pytest
+
+from ml_peg.calcs.molecular.BDEs.calc_BDEs import evaluate_bde_structures
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+MLFF_OPT_FILENAME = "cytochrome_p450_substrates.mlff_opt.xyz"
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_bond_dissociation_energy_mlff_opt(mlip: tuple[str, Any]) -> None:
+    """
+    Calculate C-H BDEs of drug-like molecules on MLFF-optimised geometries.
+
+    Geometry-optimises each structure (molecule, radical, isolated atom) using the
+    MLFF before evaluating energies and forces. DFT reference energies are preserved
+    from the input structures. Reference BDEs are computed from DFT energies at DFT
+    geometries.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    """
+    model_name, model = mlip
+
+    bde_dir = (
+        download_s3_data(
+            key="inputs/molecular/BDEs/BDEs.zip",
+            filename="BDEs.zip",
+        )
+        / "BDEs"
+    )
+
+    evaluate_bde_structures(
+        model_name=model_name,
+        model=model,
+        bde_dir=bde_dir,
+        out_filename=MLFF_OPT_FILENAME,
+        geom_opt=True,
+    )

From 6f41ef8f187a3ac7e03aaec542410ce34c83e982 Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Wed, 11 Mar 2026 20:42:49 +0000
Subject: [PATCH 5/9] fix interactie visualisation

---
 .../analysis/molecular/BDEs/analyse_BDEs.py   |  50 +++++-
 ml_peg/app/molecular/BDEs/app_BDEs.py         | 168 +++++++++++++++++-
 2 files changed, 206 insertions(+), 12 deletions(-)

diff --git a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
index e3ae8d65e..dbd0ed195 100644
--- a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
+++ b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
@@ -5,7 +5,7 @@
 from pathlib import Path
 
 from ase import units
-from ase.io import read
+from ase.io import read, write
 import numpy as np
 import pytest
 from scipy import stats
@@ -310,6 +310,46 @@ def _compute_bde_errors(bdes: dict) -> dict[str, float]:
     }
 
 
+def _save_struct_files(xyz_suffix: str) -> None:
+    """
+    Save individual radical XYZ files for app visualisation.
+
+    Writes one file per BDE data point (one per radical) in the same order
+    as ``_load_bdes``, so that scatter-point indices map to structure files.
+    Saves for every model that has the relevant output file.
+
+    Parameters
+    ----------
+    xyz_suffix
+        Suffix of the xyz filename, e.g. ``"dft_opt"`` or ``"mlff_opt"``.
+    """
+    xyz_filename = f"cytochrome_p450_substrates.{xyz_suffix}.xyz"
+    for model_name in MODELS:
+        model_dir = CALC_PATH / model_name
+        if not model_dir.exists():
+            continue
+        xyz_files = sorted(model_dir.glob(xyz_filename))
+        if not xyz_files:
+            continue
+
+        struct_dir = OUT_PATH / model_name / xyz_suffix
+        struct_dir.mkdir(parents=True, exist_ok=True)
+
+        for xyz_file in xyz_files:
+            all_atoms = read(xyz_file, index=":")
+            compound_dict = into_dict_of_labels(all_atoms, key="compound")
+            compound_dict.pop("isolated_atom", None)
+
+            idx = 0
+            for compound_atoms in compound_dict.values():
+                mol_rad_dict = into_dict_of_labels(compound_atoms, "mol_or_rad")
+                if "rad" not in mol_rad_dict:
+                    continue
+                for rad in mol_rad_dict["rad"]:
+                    write(struct_dir / f"{idx}.xyz", rad)
+                    idx += 1
+
+
 def _compute_rank_correlations(ranks: dict, labels: list) -> dict[str, float]:
     """
     Compute mean Kendall's tau rank correlation against reference ranks.
@@ -371,7 +411,9 @@ def dft_geometry_bdes() -> dict[str, list]:
         Dictionary of reference and predicted BDEs and labels of the
         compound for the corresponding BDE.
     """
-    return _load_bdes("dft_opt", _COMPOUND_LABELS)
+    result = _load_bdes("dft_opt", _COMPOUND_LABELS)
+    _save_struct_files("dft_opt")
+    return result
 
 
 @pytest.fixture
@@ -466,7 +508,9 @@ def mlff_geometry_bdes() -> dict[str, list]:
         Dictionary of reference and predicted BDEs and labels of the
         compound for the corresponding BDE.
     """
-    return _load_bdes("mlff_opt", _MLFF_COMPOUND_LABELS)
+    result = _load_bdes("mlff_opt", _MLFF_COMPOUND_LABELS)
+    _save_struct_files("mlff_opt")
+    return result
 
 
 @pytest.fixture
diff --git a/ml_peg/app/molecular/BDEs/app_BDEs.py b/ml_peg/app/molecular/BDEs/app_BDEs.py
index b97fcbfc4..3a3115d1b 100644
--- a/ml_peg/app/molecular/BDEs/app_BDEs.py
+++ b/ml_peg/app/molecular/BDEs/app_BDEs.py
@@ -2,7 +2,8 @@
 
 from __future__ import annotations
 
-from dash.html import Div
+from dash import Input, Output, callback
+from dash.html import Div, Iframe
 
 from ml_peg.app import APP_ROOT
 from ml_peg.app.base_app import BaseApp
@@ -11,6 +12,7 @@
     struct_from_scatter,
 )
 from ml_peg.app.utils.load import read_plot
+from ml_peg.app.utils.weas import generate_weas_html
 from ml_peg.models.get_models import get_model_names
 from ml_peg.models.models import current_models
 
@@ -34,27 +36,175 @@ def register_callbacks(self) -> None:
             DATA_PATH / "figure.CYP3A4.dft_opt_geometry.BDE_ranks.json",
             id=f"{BENCHMARK_NAME}-ranks-figure",
         )
+        scatter_mlff_bdes = read_plot(
+            DATA_PATH / "figure.CYP3A4.mlff_opt_geometry.BDEs.json",
+            id=f"{BENCHMARK_NAME}-mlff-figure",
+        )
+        scatter_mlff_ranks = read_plot(
+            DATA_PATH / "figure.CYP3A4.mlff_opt_geometry.BDE_ranks.json",
+            id=f"{BENCHMARK_NAME}-mlff-ranks-figure",
+        )
+
+        def _get_structs(suffix: str) -> list[str]:
+            """
+            Return asset paths for the first model with saved structure files.
+
+            Parameters
+            ----------
+            suffix
+                Geometry suffix, e.g. ``"dft_opt"`` or ``"mlff_opt"``.
+
+            Returns
+            -------
+            list[str]
+                Asset URL paths for each structure file, or empty list if none found.
+            """
+            for model_name in MODELS:
+                structs_dir = DATA_PATH / model_name / suffix
+                xyz_files = sorted(structs_dir.glob("*.xyz"), key=lambda p: int(p.stem))
+                if xyz_files:
+                    return [
+                        f"assets/molecular/BDEs/{model_name}/{suffix}/{f.stem}.xyz"
+                        for f in xyz_files
+                    ]
+            return []
+
+        def _get_structs_per_model(suffix: str) -> dict[str, list[str]]:
+            """
+            Return asset paths for each model that has saved structure files.
 
-        # Assets dir will be parent directory - individual files for each system
-        structs_dir = DATA_PATH / MODELS[0]
-        structs = [
-            f"assets/molecular/BDEs/{MODELS[0]}/{struct_file.stem}.xyz"
-            for struct_file in sorted(structs_dir.glob("*.xyz"))
-        ]
+            Parameters
+            ----------
+            suffix
+                Geometry suffix, e.g. ``"dft_opt"`` or ``"mlff_opt"``.
+
+            Returns
+            -------
+            dict[str, list[str]]
+                Mapping of model name to asset URL paths for its structure files.
+            """
+            result = {}
+            for model_name in MODELS:
+                structs_dir = DATA_PATH / model_name / suffix
+                xyz_files = sorted(structs_dir.glob("*.xyz"), key=lambda p: int(p.stem))
+                if xyz_files:
+                    result[model_name] = [
+                        f"assets/molecular/BDEs/{model_name}/{suffix}/{f.stem}.xyz"
+                        for f in xyz_files
+                    ]
+            return result
+
+        structs_dft = _get_structs("dft_opt")
+        structs_mlff_by_model = _get_structs_per_model("mlff_opt")
 
         plot_from_table_column(
             table_id=self.table_id,
             plot_id=f"{BENCHMARK_NAME}-figure-placeholder",
-            column_to_plot={"Direct BDE": scatter_bdes, "BDE rank": scatter_ranks},
+            column_to_plot={
+                "Direct BDE": scatter_bdes,
+                "BDE rank": scatter_ranks,
+                "Direct BDE (MLFF opt)": scatter_mlff_bdes,
+                "BDE rank (MLFF opt)": scatter_mlff_ranks,
+            },
         )
 
         struct_from_scatter(
             scatter_id=f"{BENCHMARK_NAME}-figure",
             struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
-            structs=structs,
+            structs=structs_dft,
+            mode="struct",
+        )
+
+        struct_from_scatter(
+            scatter_id=f"{BENCHMARK_NAME}-ranks-figure",
+            struct_id=f"{BENCHMARK_NAME}-struct-placeholder",
+            structs=structs_dft,
             mode="struct",
         )
 
+        def _show_mlff_struct_from_click(click_data):
+            """
+            Return structure viewer for the clicked MLFF scatter point.
+
+            Parameters
+            ----------
+            click_data
+                Clicked data point in scatter plot.
+
+            Returns
+            -------
+            Div
+                Visualised structure on plot click.
+            """
+            if not click_data:
+                return Div("Click on a metric to view the structure.")
+            point = click_data["points"][0]
+            idx = point["pointNumber"]
+            curve_num = point["curveNumber"]
+            if curve_num >= len(MODELS):
+                return Div("No structures available for this model.")
+            model_name = MODELS[curve_num]
+            structs = structs_mlff_by_model.get(model_name, [])
+            if not structs:
+                return Div("No structures available for this model.")
+            return Div(
+                Iframe(
+                    srcDoc=generate_weas_html(structs[idx], "struct", 0),
+                    style={
+                        "height": "550px",
+                        "width": "100%",
+                        "border": "1px solid #ddd",
+                        "borderRadius": "5px",
+                    },
+                )
+            )
+
+        @callback(
+            Output(
+                f"{BENCHMARK_NAME}-struct-placeholder", "children", allow_duplicate=True
+            ),
+            Input(f"{BENCHMARK_NAME}-mlff-figure", "clickData"),
+            prevent_initial_call="initial_duplicate",
+        )
+        def show_mlff_struct(click_data):
+            """
+            Show structure for the clicked MLFF BDE scatter point.
+
+            Parameters
+            ----------
+            click_data
+                Clicked data point in scatter plot.
+
+            Returns
+            -------
+            Div
+                Visualised structure on plot click.
+            """
+            return _show_mlff_struct_from_click(click_data)
+
+        @callback(
+            Output(
+                f"{BENCHMARK_NAME}-struct-placeholder", "children", allow_duplicate=True
+            ),
+            Input(f"{BENCHMARK_NAME}-mlff-ranks-figure", "clickData"),
+            prevent_initial_call="initial_duplicate",
+        )
+        def show_mlff_rank_struct(click_data):
+            """
+            Show structure for the clicked MLFF BDE rank scatter point.
+
+            Parameters
+            ----------
+            click_data
+                Clicked data point in scatter plot.
+
+            Returns
+            -------
+            Div
+                Visualised structure on plot click.
+            """
+            return _show_mlff_struct_from_click(click_data)
+
 
 def get_app() -> BDEsApp:
     """

From b87fc886f4d311de348f37b87db17937ad91406c Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Wed, 11 Mar 2026 21:41:07 +0000
Subject: [PATCH 6/9] float64 for BDE tests

---
 ml_peg/calcs/molecular/BDEs/calc_BDEs.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
index ed0ea1456..e10f540f7 100644
--- a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
+++ b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
@@ -50,6 +50,8 @@ def evaluate_bde_structures(
         If True, geometry-optimise each structure with the MLFF before
         evaluating energies and forces.
     """
+    model.default_dtype = "float64"
+    model.d3_kwargs["dtype"] = "float64"
     calc = model.get_calculator()
     calc = model.add_d3_calculator(calc)
 

From 616793ad8764ed39573ea945c559683a628347a2 Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Wed, 11 Mar 2026 21:53:27 +0000
Subject: [PATCH 7/9] update docs

---
 .../user_guide/benchmarks/molecular.rst       | 37 +++++++++++++------
 1 file changed, 26 insertions(+), 11 deletions(-)

diff --git a/docs/source/user_guide/benchmarks/molecular.rst b/docs/source/user_guide/benchmarks/molecular.rst
index a6d3d7bdd..ab3e4937c 100644
--- a/docs/source/user_guide/benchmarks/molecular.rst
+++ b/docs/source/user_guide/benchmarks/molecular.rst
@@ -178,29 +178,44 @@ Input structures:
 
 
 BDEs
-=========
+====
 
 Summary
 -------
 
-Performance in [for now] predicting energies of 60 CYP substrates and derived radicals.
-Only [CHO] elements
+Performance in predicting C-H bond dissociation energies (BDEs) for 60 CYP3A4 drug-like
+substrates (CHO elements only), comprising 1117 sp3 C-H bonds across all molecules.
 
 
 Metrics
 -------
 
-1. Energy RMSE
+1. Direct BDE
+
+Mean absolute error (MAE) of predicted BDEs against DFT reference values, in kcal/mol.
+
+For each molecule, BDEs are computed as: BDE = E(radical) + E(H) − E(molecule), where
+energies are evaluated on DFT-optimised geometries.
+
+2. BDE rank
+
+Mean Kendall's τ rank correlation of predicted BDE rankings against reference rankings,
+evaluated per molecule and averaged across all 60 compounds.
 
-Accuracy of energy prediction. Measured as binding energy per atom.
+3. Direct BDE (MLFF opt)
 
-2. Force RMSE
+Same as (1), but geometries are first relaxed using the MLFF before evaluating energies.
+
+4. BDE rank (MLFF opt)
+
+Same as (2), but using MLFF-optimised geometries.
 
 
 Computational cost
 ------------------
 
-Low: tests are likely to take less than a minute to run on CPU.
+Medium: the DFT geometry tests are fast, but the MLFF geometry optimisation tests may
+take several minutes per model on CPU.
 
 
 Data availability
@@ -208,11 +223,11 @@ Data availability
 
 Input structures:
 
-* TODO Input structures were evaluated as part of "Gelzinyte et al., Journal of Chemical Theory and Computation
-  2024 20 (1), 164-177, DOI: 10.1021/acs.jctc.3c00710" but I didn't include the xyzs there... Just the SMILES strings.
-
+* Gelzinyte, E. et al. Transferable Machine Learning Interatomic Potential for Bond
+  Dissociation Energy Estimation. J. Chem. Theory Comput. 20, 164-177 (2024).
+  DOI: 10.1021/acs.jctc.3c00710
 
 Reference data:
 
-* (TODO ??Same as input data)
+* Same as input data
 * B3LYP-D3BJ/def2-SV(P)

From 8d3175e0b9da47427d27a0fac95989ec57ec8f67 Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Wed, 11 Mar 2026 21:58:17 +0000
Subject: [PATCH 8/9] make standalone app

---
 ml_peg/app/molecular/BDEs/app_BDEs.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/ml_peg/app/molecular/BDEs/app_BDEs.py b/ml_peg/app/molecular/BDEs/app_BDEs.py
index 3a3115d1b..9ba1716c6 100644
--- a/ml_peg/app/molecular/BDEs/app_BDEs.py
+++ b/ml_peg/app/molecular/BDEs/app_BDEs.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from dash import Input, Output, callback
+from dash import Dash, Input, Output, callback
 from dash.html import Div, Iframe
 
 from ml_peg.app import APP_ROOT
@@ -225,3 +225,16 @@ def get_app() -> BDEsApp:
             Div(id=f"{BENCHMARK_NAME}-struct-placeholder"),
         ],
     )
+
+
+if __name__ == "__main__":
+    # Create Dash app
+    full_app = Dash(__name__, assets_folder=DATA_PATH.parent.parent)
+
+    # Construct layout and register callbacks
+    bdes_app = get_app()
+    full_app.layout = bdes_app.layout
+    bdes_app.register_callbacks()
+
+    # Run app
+    full_app.run(port=8055, debug=True)

From 8262e91ffdb82a1cff862b8ea1789883c72e17e4 Mon Sep 17 00:00:00 2001
From: elena <elena.gelzinyte@gmail.com>
Date: Thu, 12 Mar 2026 21:51:43 +0000
Subject: [PATCH 9/9] changes to work with main

---
 ml_peg/analysis/molecular/BDEs/analyse_BDEs.py | 8 ++++++--
 ml_peg/calcs/molecular/BDEs/calc_BDEs.py       | 2 +-
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
index dbd0ed195..44ca3315c 100644
--- a/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
+++ b/ml_peg/analysis/molecular/BDEs/analyse_BDEs.py
@@ -11,14 +11,18 @@
 from scipy import stats
 
 from ml_peg.analysis.utils.decorators import build_table, plot_parity
-from ml_peg.analysis.utils.utils import build_d3_name_map, load_metrics_config, mae
+from ml_peg.analysis.utils.utils import (
+    build_dispersion_name_map,
+    load_metrics_config,
+    mae,
+)
 from ml_peg.app import APP_ROOT
 from ml_peg.calcs import CALCS_ROOT
 from ml_peg.models.get_models import get_model_names
 from ml_peg.models.models import current_models
 
 MODELS = get_model_names(current_models)
-D3_MODEL_NAMES = build_d3_name_map(MODELS)
+D3_MODEL_NAMES = build_dispersion_name_map(MODELS)
 CALC_PATH = CALCS_ROOT / "molecular" / "BDEs" / "outputs"
 OUT_PATH = APP_ROOT / "data" / "molecular" / "BDEs"
 
diff --git a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
index e10f540f7..e8b06f633 100644
--- a/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
+++ b/ml_peg/calcs/molecular/BDEs/calc_BDEs.py
@@ -51,7 +51,7 @@ def evaluate_bde_structures(
         evaluating energies and forces.
     """
     model.default_dtype = "float64"
-    model.d3_kwargs["dtype"] = "float64"
+    model.dispersion_kwargs["dtype"] = "float64"
     calc = model.get_calculator()
     calc = model.add_d3_calculator(calc)