ddmms · JonathanSchmidt1 · Feb 3, 2026 · Feb 4, 2026 · Feb 4, 2026 · Feb 4, 2026
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,5 @@
 *.json
+!ml_peg/calcs/bulk_crystal/qha_lattice_constants/data/*.json
 *.html
 *.dat
 *.lock

diff --git a/docs/source/user_guide/benchmarks/bulk_crystal.rst b/docs/source/user_guide/benchmarks/bulk_crystal.rst
@@ -115,3 +115,95 @@ Reference data:
 
 * Same as input data
 * PBE
+
+
+Quasiharmonic Approximation
+===========================
+
+Summary
+-------
+
+Performance in evaluating temperature-dependent thermodynamic properties using the
+quasiharmonic approximation (QHA). Each data point is a (structure, temperature, pressure)
+triple with experimental reference values.
+
+The QHA extends the harmonic approximation by accounting for volume-dependent phonon
+frequencies, enabling prediction of thermal expansion and bulk properties at temperature.
+
+
+Metrics
+-------
+
+(1) Lattice constant MAE (Experimental)
+
+Mean absolute error of equilibrium lattice constants at target temperature and pressure
+conditions.
+
+(2) Volume per atom MAE (Experimental)
+
+Mean absolute error of equilibrium volume per atom at target (structure, T, P) conditions.
+
+(3) Thermal expansion MAE (Experimental)
+
+Mean absolute error of thermal expansion coefficient (in 10⁻⁶ K⁻¹) at target conditions.
+
+(4) Bulk modulus MAE (Experimental)
+
+Mean absolute error of isothermal bulk modulus (in GPa) at target temperature and pressure.
+
+(5) Heat capacity MAE (Experimental)
+
+Mean absolute error of heat capacity at constant pressure (in J/mol·K) at target conditions.
+
+
+Methodology
+-----------
+
+The benchmark uses atomate2's ``ForceFieldQhaMaker`` workflow, which performs:
+
+1. **Initial relaxation**: The input structure is fully relaxed (cell + positions) using
+   the MLIP. We use an fmax of 0.0005 eV/Å.
+
+2. **Equation of state (EOS) sampling**: Multiple strained structures are generated around
+   the equilibrium volume (default: ±5% strain, 10 volumes).
+
+3. **Phonon calculations**: For each strained volume, phonon frequencies are computed using
+   the finite displacement method with a supercell approach using the default strategy in atomate2.
+
+4. **Free energy fitting**: The Helmholtz free energy F(V,T) is computed at each volume and
+   temperature using phonopy by combining:
+
+   - Electronic energy E(V) from the EOS
+   - Vibrational free energy F_vib(V,T) from phonon calculations
+
+5. **QHA analysis**: Temperature-dependent equilibrium properties are extracted by minimizing
+   F(V,T) at each temperature:
+
+   - V(T): equilibrium volume vs temperature
+   - α(T): thermal expansion coefficient
+   - B(T): isothermal bulk modulus
+   - C_p(T): heat capacity at constant pressure
+
+The workflow uses the Vinet equation of state for fitting and supports pressure-dependent
+calculations.
+
+
+Computational cost
+------------------
+
+High: Each QHA calculation requires multiple phonon calculations (one per volume point).
+The cost depends strongly on the size and symmetry of the structure. As a rough guide, Silicon
+in the diamond structure takes around a minute on gpu.
+
+
+Data availability
+-----------------
+
+Input structures:
+
+* CIF files for bulk crystals (e.g., Si in diamond structure)
+
+Reference data:
+
+* Experimental thermophysical properties from TPRC Data Series (Thermophysical properties
+  of matter, Vol. 12-13)
diff --git a/ml_peg/analysis/bulk_crystal/quasiharmonic/analyse_quasiharmonic.py b/ml_peg/analysis/bulk_crystal/quasiharmonic/analyse_quasiharmonic.py
@@ -0,0 +1,233 @@
+"""Analyse quasiharmonic benchmark results."""
+
+from __future__ import annotations
+
+from pathlib import Path
+from typing import Any
+
+import pandas as pd
+import pytest
+
+from ml_peg.analysis.utils.decorators import build_table, plot_parity
+from ml_peg.analysis.utils.utils import load_metrics_config, mae
+from ml_peg.app import APP_ROOT
+from ml_peg.calcs import CALCS_ROOT
+from ml_peg.models.get_models import get_model_names
+from ml_peg.models.models import current_models
+
+MODELS = get_model_names(current_models)
+CALC_PATH = CALCS_ROOT / "bulk_crystal" / "quasiharmonic" / "outputs"
+OUT_PATH = APP_ROOT / "data" / "bulk_crystal" / "quasiharmonic"
+
+METRICS_CONFIG_PATH = Path(__file__).with_name("metrics.yml")
+DEFAULT_THRESHOLDS, DEFAULT_TOOLTIPS, DEFAULT_WEIGHTS = load_metrics_config(
+    METRICS_CONFIG_PATH
+)
+
+
+def _make_hoverdata() -> dict[str, list]:
+    """
+    Create empty hoverdata dictionary for QHA plots.
+
+    Returns
+    -------
+    dict[str, list]
+        Empty hoverdata with Material, Temperature, and Pressure keys.
+    """
+    return {
+        "Material": [],
+        "Temperature (K)": [],
+        "Pressure (GPa)": [],
+    }
+
+
+# Hover data for interactive plots (populated during fixture execution)
+HOVERDATA_VOLUME = _make_hoverdata()
+
+
+def _load_results(model_name: str) -> pd.DataFrame:
+    """
+    Load results for a given model.
+
+    Parameters
+    ----------
+    model_name
+        Name of the model.
+
+    Returns
+    -------
+    pd.DataFrame
+        Results dataframe or empty dataframe if missing.
+    """
+    path = CALC_PATH / model_name / "results.csv"
+    if not path.exists():
+        return pd.DataFrame()
+    df = pd.read_csv(path)
+    # Filter to only successful calculations
+    if "status" in df.columns:
+        df = df[df["status"] == "success"]
+    return df.sort_values(["material", "temperature_K", "pressure_GPa"]).reset_index(
+        drop=True
+    )
+
+
+def _gather_parity_data(
+    ref_col: str,
+    pred_col: str,
+    hoverdata: dict[str, list],
+) -> dict[str, list]:
+    """
+    Gather reference and predicted values for parity plotting.
+
+    Parameters
+    ----------
+    ref_col
+        Column name for reference values.
+    pred_col
+        Column name for predicted values.
+    hoverdata
+        Hoverdata dictionary to populate (mutated in place).
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted values per model.
+    """
+    OUT_PATH.mkdir(parents=True, exist_ok=True)
+
+    results: dict[str, list] = {"ref": []} | {mlip: [] for mlip in MODELS}
+    ref_stored = False
+
+    for model_name in MODELS:
+        df = _load_results(model_name)
+        if df.empty:
+            continue
+
+        if ref_col not in df.columns or pred_col not in df.columns:
+            continue
+
+        valid_df = df.dropna(subset=[pred_col, ref_col])
+        results[model_name] = valid_df[pred_col].tolist()
+
+        if not ref_stored and not valid_df.empty:
+            results["ref"] = valid_df[ref_col].tolist()
+            hoverdata["Material"] = valid_df["material"].tolist()
+            hoverdata["Temperature (K)"] = valid_df["temperature_K"].tolist()
+            hoverdata["Pressure (GPa)"] = valid_df["pressure_GPa"].tolist()
+            ref_stored = True
+
+    return results
+
+
+def _calculate_mae(parity_data: dict[str, list]) -> dict[str, float | None]:
+    """
+    Calculate MAE for each model from parity data.
+
+    Parameters
+    ----------
+    parity_data
+        Dictionary with 'ref' key and model name keys containing lists.
+
+    Returns
+    -------
+    dict[str, float | None]
+        MAE values for each model, None if data is missing or mismatched.
+    """
+    results: dict[str, float | None] = {}
+    ref = parity_data.get("ref", [])
+    for model_name in MODELS:
+        pred = parity_data.get(model_name, [])
+        if not ref or not pred or len(ref) != len(pred):
+            results[model_name] = None
+        else:
+            results[model_name] = mae(ref, pred)
+    return results
+
+
+@pytest.fixture
+@plot_parity(
+    filename=OUT_PATH / "figure_qha_volume_per_atom.json",
+    title="QHA Volume per Atom",
+    x_label="Predicted volume per atom / Å³/atom",
+    y_label="Reference volume per atom / Å³/atom",
+    hoverdata=HOVERDATA_VOLUME,
+)
+def qha_volume_per_atom() -> dict[str, list]:
+    """
+    Gather reference and predicted equilibrium volume per atom.
+
+    Returns
+    -------
+    dict[str, list]
+        Dictionary of reference and predicted values per model.
+    """
+    return _gather_parity_data(
+        "ref_volume_per_atom", "pred_volume_per_atom", HOVERDATA_VOLUME
+    )
+
+
+@pytest.fixture
+def volume_per_atom_mae(
+    qha_volume_per_atom: dict[str, list],
+) -> dict[str, float | None]:
+    """
+    Mean absolute error for equilibrium volume per atom.
+
+    Parameters
+    ----------
+    qha_volume_per_atom
+        Reference and predicted volumes per atom.
+
+    Returns
+    -------
+    dict[str, float | None]
+        MAE values for each model.
+    """
+    return _calculate_mae(qha_volume_per_atom)
+
+
+@pytest.fixture
+@build_table(
+    filename=OUT_PATH / "quasiharmonic_metrics_table.json",
+    metric_tooltips=DEFAULT_TOOLTIPS,
+    thresholds=DEFAULT_THRESHOLDS,
+    weights=DEFAULT_WEIGHTS,
+)
+def metrics(
+    volume_per_atom_mae: dict[str, float | None],
+) -> dict[str, dict[str, float | None]]:
+    """
+    Build metrics dictionary for quasiharmonic benchmark.
+
+    Each metric is computed over (structure, temperature, pressure) data points.
+
+    Parameters
+    ----------
+    volume_per_atom_mae
+        Volume per atom MAE per model.
+
+    Returns
+    -------
+    dict[str, dict[str, float | None]]
+        Metrics dictionary.
+    """
+    return {
+        "Volume per atom MAE": volume_per_atom_mae,
+    }
+
+
+def test_quasiharmonic(
+    metrics: dict[str, dict[str, Any]],
+    qha_volume_per_atom: dict[str, list],
+) -> None:
+    """
+    Run quasiharmonic analysis test.
+
+    Parameters
+    ----------
+    metrics
+        Metrics dictionary.
+    qha_volume_per_atom
+        Volume per atom parity data.
+    """
+    return
diff --git a/ml_peg/analysis/bulk_crystal/quasiharmonic/metrics.yml b/ml_peg/analysis/bulk_crystal/quasiharmonic/metrics.yml
@@ -0,0 +1,8 @@
+metrics:
+  Volume per atom MAE:
+    good: 0.02
+    bad: 0.1
+    unit: "Å³/atom"
+    tooltip: "Mean Absolute Error of equilibrium volume per atom at target (structure, T, P) conditions"
+    level_of_theory: Experimental
+    weight: 1.0
-Original file line number
+Diff line change
@@ -1,4 +1,5 @@
     *.json
+    !ml_peg/calcs/bulk_crystal/qha_lattice_constants/data/*.json
     *.html
     *.dat
     *.lock
@@ Expand Down @@