Skip to content
Open
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
134 changes: 134 additions & 0 deletions ml_peg/analysis/conformers/Folmsbee/analyse_Folmsbee.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
"""Analyse Folmsbee benchmark."""

from __future__ import annotations

from pathlib import Path

from ase import units
from ase.io import read, write
import pytest

from ml_peg.analysis.utils.decorators import build_table, plot_parity
from ml_peg.analysis.utils.utils import (
build_dispersion_name_map,
load_metrics_config,
mae,
)
from ml_peg.app import APP_ROOT
from ml_peg.calcs import CALCS_ROOT
from ml_peg.models.get_models import load_models
from ml_peg.models.models import current_models

MODELS = load_models(current_models)
DISPERSION_NAME_MAP = build_dispersion_name_map(MODELS)

EV_TO_KCAL = units.mol / units.kcal
CALC_PATH = CALCS_ROOT / "conformers" / "Folmsbee" / "outputs"
OUT_PATH = APP_ROOT / "data" / "conformers" / "Folmsbee"

METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
METRICS_CONFIG_PATH
)


def labels() -> list:
"""
Get list of system names.

Returns
-------
list
List of all system names.
"""
for model_name in MODELS:
labels_list = [path.stem for path in sorted((CALC_PATH / model_name).glob("*"))]
break
return labels_list


@pytest.fixture
@plot_parity(
filename=OUT_PATH / "figure_folmsbee.json",
title="Energies",
x_label="Predicted energy / kcal/mol",
y_label="Reference energy / kcal/mol",
hoverdata={
"Labels": labels(),
},
)
def conformer_energies() -> dict[str, list]:
"""
Get conformer energies for all systems.

Returns
-------
dict[str, list]
Dictionary of all reference and predicted barrier heights.
"""
results = {"ref": []} | {mlip: [] for mlip in MODELS}
ref_stored = False

for model_name in MODELS:
for label in labels():
atoms = read(CALC_PATH / model_name / f"{label}.xyz")

results[model_name].append(atoms.info["model_rel_energy"] * EV_TO_KCAL)
if not ref_stored:
results["ref"].append(atoms.info["ref_energy"] * EV_TO_KCAL)

# Write structures for app
structs_dir = OUT_PATH / model_name
structs_dir.mkdir(parents=True, exist_ok=True)
write(structs_dir / f"{label}.xyz", atoms)
ref_stored = True
return results


@pytest.fixture
def get_mae(conformer_energies) -> dict[str, float]:
"""
Get mean absolute error for conformer energies.

Parameters
----------
conformer_energies
Dictionary of reference and predicted conformer energies.

Returns
-------
dict[str, float]
Dictionary of predicted conformer energies errors for all models.
"""
results = {}
for model_name in MODELS:
results[model_name] = mae(
conformer_energies["ref"], conformer_energies[model_name]
)
return results


@pytest.fixture
@build_table(
filename=OUT_PATH / "folmsbee_metrics_table.json",
metric_tooltips=DEFAULT_TOOLTIPS,
thresholds=DEFAULT_THRESHOLDS,
mlip_name_map=DISPERSION_NAME_MAP,
)
def metrics(get_mae: dict[str, float]) -> dict[str, dict]:
"""
Get all metrics.

Parameters
----------
get_mae
Mean absolute errors for all models.

Returns
-------
dict[str, dict]
Metric names and values for all models.
"""
return {
"MAE": get_mae,
}
101 changes: 101 additions & 0 deletions ml_peg/calcs/conformers/Folmsbee/calc_Folmsbee.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
"""
Compute the Folmsbee dataset of molecular conformers.

Assessing conformer energies using electronic structure and
machine learning methods

Dakota Folmsbee, Geoffrey Hutchinson
International Journal of Quantum Chemistry 2020 121 (1) e26381
DOI: 10.1002/qua.26381
"""

from __future__ import annotations

import json
from pathlib import Path
from typing import Any

from ase import Atoms, units
from ase.io import write
import pytest
from tqdm import tqdm

from ml_peg.models.get_models import load_models
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
from ml_peg.models.get_models import load_models
from ml_peg.calcs.utils.utils import download_s3_data
from ml_peg.models.get_models import load_models

from ml_peg.models.models import current_models

MODELS = load_models(current_models)

KCAL_TO_EV = units.kcal / units.mol

OUT_PATH = Path(__file__).parent / "outputs"


def get_relative_energies(energies: list[float], ref_idx: int) -> list[float]:
"""
Get energies relative to reference.

Parameters
----------
energies
List of energy values.
ref_idx
Index of reference energy.

Returns
-------
list[float]
Energies relative to the reference conformer.
"""
return [x - energies[ref_idx] for x in energies]


@pytest.mark.parametrize("mlip", MODELS.items())
def test_folmsbee(mlip: tuple[str, Any]) -> None:
"""
Benchmark the Folmsbee dataset.

Parameters
----------
mlip
Name of model use and model to get calculator.
"""
model_name, model = mlip
# Use double precision
model.default_dtype = "float64"
calc = model.get_calculator()
# Add D3 calculator for this test
calc = model.add_d3_calculator(calc)

data_path = Path(__file__).parent / "data" / "folmsbee_dataset.json"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
data_path = Path(__file__).parent / "data" / "folmsbee_dataset.json"
data_path = (
download_s3_data(
filename="Folmsbee.zip",
key="inputs/conformers/Folmsbee/Folmsbee.zip",
)
/ "Folmsbee"
)

out_path = OUT_PATH / model_name

with open(data_path) as f:
data = json.load(f)
progress = tqdm(total=len(data))
for structure_data in data:
structure_name = structure_data["molecule_name"]
conformers = []
model_energies = []
raw_energies = structure_data["dft_energy_profile"]
ref_min_conformer_idx = raw_energies.index(min(raw_energies))
ref_energies = get_relative_energies(raw_energies, ref_min_conformer_idx)
ref_energies *= KCAL_TO_EV

for i, conf_positions in enumerate(structure_data["conformer_coordinates"]):
conf_atoms = Atoms(
positions=conf_positions, symbols=structure_data["atom_symbols"]
)
conf_atoms.calc = calc
conf_atoms.info.update({"charge": 0, "spin": 1})
conf_atoms.info["ref_rel_energy"] = ref_energies[i]

conformers.append(conf_atoms)
model_energies.append(conf_atoms.get_potential_energy())

model_energies = get_relative_energies(model_energies, ref_min_conformer_idx)
out_path.mkdir(parents=True, exist_ok=True)
for i, conf_atoms in enumerate(conformers):
conf_atoms.info["model_rel_energy"] = model_energies[i]
write(out_path / f"{structure_name}_conf{i}.xyz", conf_atoms)

progress.update()
Loading