ddmms · lwehrhan · Mar 10, 2026 · Mar 10, 2026 · Mar 13, 2026 · Mar 18, 2026
diff --git a/ml_peg/analysis/conformers/Folmsbee/analyse_Folmsbee.py b/ml_peg/analysis/conformers/Folmsbee/analyse_Folmsbee.py
@@ -0,0 +1,134 @@
+"""Analyse Folmsbee benchmark."""
+
+from __future__ import annotations
+
+from pathlib import Path
+
+from ase import units
+from ase.io import read, write
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import (
+    build_dispersion_name_map,
+    load_metrics_config,
+    mae,
+)
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS)
+
+EV_TO_KCAL = units.mol / units.kcal
+CALC_PATH = CALCS_ROOT / "conformers" / "Folmsbee" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "conformers" / "Folmsbee"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+def labels() -> list:
+    """
+    Get list of system names.
+
+    Returns
+    -------
+    list
+        List of all system names.
+    """
+    for model_name in MODELS:
+        labels_list = [path.stem for path in sorted((CALC_PATH / model_name).glob("*"))]
+        break
+    return labels_list
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_folmsbee.json",
+    title="Energies",
+    x_label="Predicted energy / kcal/mol",
+    y_label="Reference energy / kcal/mol",
+    hoverdata={
+        "Labels": labels(),
+    },
+)
+def conformer_energies() -> dict[str, list]:
+    """
+    Get conformer energies for all systems.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of all reference and predicted barrier heights.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        for label in labels():
+            atoms = read(CALC_PATH / model_name / f"{label}.xyz")
+
+            results[model_name].append(atoms.info["model_rel_energy"] * EV_TO_KCAL)
+            if not ref_stored:
+                results["ref"].append(atoms.info["ref_energy"] * EV_TO_KCAL)
+
+            # Write structures for app
+            structs_dir = OUT_PATH / model_name
+            structs_dir.mkdir(parents=True, exist_ok=True)
+            write(structs_dir / f"{label}.xyz", atoms)
+        ref_stored = True
+    return results
+
+
+@pytest.fixture
+def get_mae(conformer_energies) -> dict[str, float]:
+    """
+    Get mean absolute error for conformer energies.
+
+    Parameters
+    ----------
+    conformer_energies
+        Dictionary of reference and predicted conformer energies.
+
+    Returns
+    -------
+    dict[str, float]
+        Dictionary of predicted conformer energies errors for all models.
+    """
+    results = {}
+    for model_name in MODELS:
+        results[model_name] = mae(
+            conformer_energies["ref"], conformer_energies[model_name]
+        )
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "folmsbee_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    mlip_name_map=DISPERSION_NAME_MAP,
+)
+def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
+    """
+    Get all metrics.
+
+    Parameters
+    ----------
+    get_mae
+        Mean absolute errors for all models.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "MAE": get_mae,
+    }
diff --git a/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py b/ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py
@@ -0,0 +1,101 @@
+"""
+Compute the Folmsbee dataset of molecular conformers.
+
+Assessing conformer energies using electronic structure and
+machine learning methods
+
+Dakota Folmsbee, Geoffrey Hutchinson
+International Journal of Quantum Chemistry 2020 121 (1) e26381
+DOI: 10.1002/qua.26381
+"""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+from typing import Any
+
+from ase import Atoms, units
+from ase.io import write
+import pytest
+from tqdm import tqdm
+
+from ml_peg.models.get_models import load_models
-from ml_peg.models.get_models import load_models
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
-from ml_peg.models.get_models import load_models
+from ml_peg.calcs.utils.utils import download_s3_data
+from ml_peg.models.get_models import load_models
+from ml_peg.models.models import current_models
+
+MODELS = load_models(current_models)
+
+KCAL_TO_EV = units.kcal / units.mol
+
+OUT_PATH = Path(__file__).parent / "outputs"
+
+
+def get_relative_energies(energies: list[float], ref_idx: int) -> list[float]:
+    """
+    Get energies relative to reference.
+
+    Parameters
+    ----------
+    energies
+        List of energy values.
+    ref_idx
+        Index of reference energy.
+
+    Returns
+    -------
+    list[float]
+        Energies relative to the reference conformer.
+    """
+    return [x - energies[ref_idx] for x in energies]
+
+
+@pytest.mark.parametrize("mlip", MODELS.items())
+def test_folmsbee(mlip: tuple[str, Any]) -> None:
+    """
+    Benchmark the Folmsbee dataset.
+
+    Parameters
+    ----------
+    mlip
+        Name of model use and model to get calculator.
+    """
+    model_name, model = mlip
+    # Use double precision
+    model.default_dtype = "float64"
+    calc = model.get_calculator()
+    # Add D3 calculator for this test
+    calc = model.add_d3_calculator(calc)
+
+    data_path = Path(__file__).parent / "data" / "folmsbee_dataset.json"
-    data_path = Path(__file__).parent / "data" / "folmsbee_dataset.json"
+    data_path = (
+        download_s3_data(
+            filename="Folmsbee.zip",
+            key="inputs/conformers/Folmsbee/Folmsbee.zip",
+        )
+        / "Folmsbee"
+    )
-    data_path = Path(__file__).parent / "data" / "folmsbee_dataset.json"
+    data_path = (
+        download_s3_data(
+            filename="Folmsbee.zip",
+            key="inputs/conformers/Folmsbee/Folmsbee.zip",
+        )
+        / "Folmsbee"
+    )
+    out_path = OUT_PATH / model_name
+
+    with open(data_path) as f:
+        data = json.load(f)
+    progress = tqdm(total=len(data))
+    for structure_data in data:
+        structure_name = structure_data["molecule_name"]
+        conformers = []
+        model_energies = []
+        raw_energies = structure_data["dft_energy_profile"]
+        ref_min_conformer_idx = raw_energies.index(min(raw_energies))
+        ref_energies = get_relative_energies(raw_energies, ref_min_conformer_idx)
+        ref_energies *= KCAL_TO_EV
+
+        for i, conf_positions in enumerate(structure_data["conformer_coordinates"]):
+            conf_atoms = Atoms(
+                positions=conf_positions, symbols=structure_data["atom_symbols"]
+            )
+            conf_atoms.calc = calc
+            conf_atoms.info.update({"charge": 0, "spin": 1})
+            conf_atoms.info["ref_rel_energy"] = ref_energies[i]
+
+            conformers.append(conf_atoms)
+            model_energies.append(conf_atoms.get_potential_energy())
+
+        model_energies = get_relative_energies(model_energies, ref_min_conformer_idx)
+        out_path.mkdir(parents=True, exist_ok=True)
+        for i, conf_atoms in enumerate(conformers):
+            conf_atoms.info["model_rel_energy"] = model_energies[i]
+            write(out_path / f"{structure_name}_conf{i}.xyz", conf_atoms)
+
+        progress.update()