ddmms · vue1999 · Mar 4, 2026 · joehart2001 · Mar 20, 2026
diff --git a/docs/source/user_guide/benchmarks/surfaces.rst b/docs/source/user_guide/benchmarks/surfaces.rst
@@ -145,3 +145,56 @@ Reference data:
   * S. P. Ong, W. D. Richards, A. Jain, G. Hautier, M. Kocher, S. Cholia, D. Gunter, V. Chevrier, K. A. Persson, G. Ceder, "Python Materials Genomics (pymatgen): A Robust, Open-Source Python Library for Materials Analysis," Comput. Mater. Sci., 2013, 68, 314–319. https://doi.org/10.1016/j.commatsci.2012.10.028
 
 * Tran et al. relaxed the slabs using spin-polarized PBE calculations performed in VASP, with a cutoff energy of 400 eV.
+
+Cleavage Energy
+===============
+
+Summary
+-------
+
+Performance in predicting cleavage energies for 36,718 surface configurations
+across a wide range of materials and Miller indices.
+
+Metrics
+-------
+
+1. Cleavage energy MAE
+
+Accuracy of cleavage energy predictions compared to DFT reference values.
+
+For each surface, the cleavage energy is calculated as
+``(E_slab - thickness_ratio * E_bulk) / (2 * A)``, where ``E_slab`` and
+``E_bulk`` are single-point energies of the slab and the lattice-matched bulk
+unit cell, ``thickness_ratio`` is the number of bulk unit cells in the slab
+thickness, and ``A`` is the surface area. Results are reported in meV/A^2.
+The mean absolute error is computed over all 36,718 surfaces.
+
+2. Cleavage energy RMSE
+
+Root mean squared error of cleavage energy predictions across all surfaces.
+
+Computational cost
+------------------
+
+Medium: benchmark involves only single-point calculations, but for 36,718 slab-bulk pairs.
+
+Data availability
+-----------------
+
+Input data:
+
+* Surface configurations were obtained from the Materials Project, covering
+  3,699 unique bulk materials with multiple Miller indices and terminations
+  per material. The original unfiltered data source is available at
+  Zenodo (DOI: 10.5281/zenodo.10381505).
+
+Reference data:
+
+* DFT cleavage energies calculated using PBE functional.
+
+Publication:
+
+* A. Mehdizadeh and P. Schindler, "Surface stability modeling with universal
+  machine learning interatomic potentials: a comprehensive cleavage energy
+  benchmarking study," Mach. Learn.: Sci. Technol., 2025.
+  https://iopscience.iop.org/article/10.1088/3050-287X/ae1408
diff --git a/ml_peg/analysis/surfaces/cleavage_energy/analyse_cleavage_energy.py b/ml_peg/analysis/surfaces/cleavage_energy/analyse_cleavage_energy.py
@@ -0,0 +1,238 @@
+"""Analyse cleavage energy benchmark."""
+
+from __future__ import annotations
+
+import json
+from pathlib import Path
+
+import numpy as np
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import load_metrics_config, mae
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+CALC_PATH = CALCS_ROOT / "surfaces" / "cleavage_energy" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "surfaces" / "cleavage_energy"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+EV_TO_MEV = 1000.0
+
+
+def _load_model_results(model_name: str) -> dict | None:
+    """
+    Load the JSON energy results for a model, or None if absent.
+
+    Parameters
+    ----------
+    model_name
+        Name of the model whose results file to load.
+
+    Returns
+    -------
+    dict | None
+        Parsed JSON results dictionary, or None if the file does not exist.
+    """
+    result_file = CALC_PATH / f"{model_name}.json"
+    if not result_file.exists():
+        return None
+    with open(result_file, encoding="utf-8") as f:
+        return json.load(f)
+
+
+def system_names() -> list[str]:
+    """
+    Get list of system identifiers from calc outputs.
+
+    Returns
+    -------
+    list[str]
+        Sorted list of unique_id values from the first model with results.
+    """
+    for model_name in MODELS:
+        data = _load_model_results(model_name)
+        if data is not None:
+            return sorted(data.keys())
+    return []
+
+
+def compute_cleavage_energy(
+    slab_energy: float,
+    bulk_energy: float,
+    thickness_ratio: float,
+    area_slab: float,
+) -> float:
+    """
+    Compute cleavage energy from slab and bulk energies.
+
+    Parameters
+    ----------
+    slab_energy
+        Total energy of the slab.
+    bulk_energy
+        Total energy of the lattice-matched bulk unit cell.
+    thickness_ratio
+        Number of bulk unit cells in the slab thickness.
+    area_slab
+        Surface area of the slab in Angstrom^2.
+
+    Returns
+    -------
+    float
+        Cleavage energy in eV/Angstrom^2.
+    """
+    return (slab_energy - thickness_ratio * bulk_energy) / (2 * area_slab)
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_cleavage_energies.json",
+    title="Cleavage Energies",
+    x_label="Predicted cleavage energy / meV/\u00c5\u00b2",
+    y_label="Reference cleavage energy / meV/\u00c5\u00b2",
+    hoverdata={
+        "System": system_names(),
+    },
+)
+def cleavage_energies() -> dict[str, list]:
+    """
+    Get cleavage energies for all systems in meV/A^2.
+
+    Also saves per-system errors for the distribution plot.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted cleavage energies in meV/A^2.
+    """
+    results = {"ref": []} | {mlip: [] for mlip in MODELS}
+    canonical_ids = None
+
+    for model_name in MODELS:
+        data = _load_model_results(model_name)
+        if data is None:
+            continue
+
+        if canonical_ids is None:
+            canonical_ids = sorted(data.keys())
+            for uid in canonical_ids:
+                entry = data[uid]
+                results["ref"].append(entry["ref_cleavage_energy"] * EV_TO_MEV)
+
+        for uid in canonical_ids:
+            entry = data[uid]
+            pred_ce = (
+                compute_cleavage_energy(
+                    entry["slab_energy"],
+                    entry["bulk_energy"],
+                    entry["thickness_ratio"],
+                    entry["area_slab"],
+                )
+                * EV_TO_MEV
+            )
+            results[model_name].append(pred_ce)
+
+    return results
+
+
+@pytest.fixture
+def cleavage_mae(cleavage_energies: dict[str, list]) -> dict[str, float]:
+    """
+    Get mean absolute error for cleavage energies.
+
+    Parameters
+    ----------
+    cleavage_energies
+        Dictionary of reference and predicted cleavage energies.
+
+    Returns
+    -------
+    dict[str, float]
+        MAE for each model.
+    """
+    results = {}
+    for model_name in MODELS:
+        if cleavage_energies[model_name]:
+            results[model_name] = mae(
+                cleavage_energies["ref"], cleavage_energies[model_name]
+            )
+        else:
+            results[model_name] = None
+    return results
+
+
+@pytest.fixture
+def cleavage_rmse(cleavage_energies: dict[str, list]) -> dict[str, float]:
+    """
+    Get root mean squared error for cleavage energies.
+
+    Parameters
+    ----------
+    cleavage_energies
+        Dictionary of reference and predicted cleavage energies.
+
+    Returns
+    -------
+    dict[str, float]
+        RMSE for each model.
+    """
+    results = {}
+    for model_name in MODELS:
+        if cleavage_energies[model_name]:
+            ref = np.array(cleavage_energies["ref"])
+            pred = np.array(cleavage_energies[model_name])
+            results[model_name] = float(np.sqrt(np.mean((pred - ref) ** 2)))
+        else:
+            results[model_name] = None
+    return results
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "cleavage_energy_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+)
+def metrics(
+    cleavage_mae: dict[str, float],
+    cleavage_rmse: dict[str, float],
+) -> dict[str, dict]:
+    """
+    Get all cleavage energy metrics.
+
+    Parameters
+    ----------
+    cleavage_mae
+        Mean absolute errors for all models.
+    cleavage_rmse
+        Root mean squared errors for all models.
+
+    Returns
+    -------
+    dict[str, dict]
+        Metric names and values for all models.
+    """
+    return {
+        "MAE": cleavage_mae,
+        "RMSE": cleavage_rmse,
+    }
+
+
+def test_cleavage_energy(metrics: dict[str, dict]) -> None:
+    """
+    Run cleavage energy analysis.
+
+    Parameters
+    ----------
+    metrics
+        All cleavage energy metrics.
+    """
+    return
diff --git a/ml_peg/analysis/surfaces/cleavage_energy/metrics.yml b/ml_peg/analysis/surfaces/cleavage_energy/metrics.yml
@@ -0,0 +1,13 @@
+metrics:
+  MAE:
+    good: 0.0
+    bad: 10.0
+    unit: meV/A^2
+    tooltip: Mean Absolute Error of cleavage energies
+    level_of_theory: PBE
+  RMSE:
+    good: 0.0
+    bad: 10.0
+    unit: meV/A^2
+    tooltip: Root Mean Squared Error of cleavage energies
+    level_of_theory: PBE
diff --git a/ml_peg/app/surfaces/cleavage_energy/app_cleavage_energy.py b/ml_peg/app/surfaces/cleavage_energy/app_cleavage_energy.py
@@ -0,0 +1,68 @@
+"""Run cleavage energy app."""
+
+from __future__ import annotations
+
+from dash import Dash
+from dash.html import Div
+
+from ml_peg.app import APP_ROOT
+from ml_peg.app.base_app import BaseApp
+from ml_peg.app.utils.load import read_plot
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+BENCHMARK_NAME = "Cleavage Energy"
+DOCS_URL = (
+    "https://ddmms.github.io/ml-peg/user_guide/benchmarks/surfaces.html#cleavage-energy"
+)
+DATA_PATH = APP_ROOT / "data" / "surfaces" / "cleavage_energy"
+
+
+class CleavageEnergyApp(BaseApp):
+    """Cleavage energy benchmark app layout and callbacks."""
+
+    def register_callbacks(self) -> None:
+        """No interactive callbacks needed."""
+
+
+def get_app() -> CleavageEnergyApp:
+    """
+    Get cleavage energy benchmark app layout and callback registration.
+
+    Returns
+    -------
+    CleavageEnergyApp
+        Benchmark layout and callback registration.
+    """
+    scatter = read_plot(
+        DATA_PATH / "figure_cleavage_energies.json",
+        id=f"{BENCHMARK_NAME}-figure",
+    )
+
+    return CleavageEnergyApp(
+        name=BENCHMARK_NAME,
+        description=(
+            "Performance in predicting cleavage energies for "
+            "36,718 surface configurations."
+        ),
+        docs_url=DOCS_URL,
+        table_path=DATA_PATH / "cleavage_energy_metrics_table.json",
+        extra_components=[
+            Div(scatter, style={"marginTop": "20px"}),
+        ],
+    )
+
+
+if __name__ == "__main__":
+    full_app = Dash(
+        __name__,
+        assets_folder=DATA_PATH.parent.parent,
+        suppress_callback_exceptions=True,
+    )
+
+    cleavage_energy_app = get_app()
+    full_app.layout = cleavage_energy_app.layout
+    cleavage_energy_app.register_callbacks()
+
+    full_app.run(port=8056, debug=True)