diff --git a/.gitignore b/.gitignore index 72e0aedc..3536b5c7 100644 --- a/.gitignore +++ b/.gitignore @@ -86,6 +86,9 @@ ipython_config.py .DS_Store **/.DS_Store +# Model Card Unit Tests +mlruns/ + # pyenv # For a library or package, you might want to ignore these files since the code is # intended to run in multiple environments; otherwise, check them in: diff --git a/src/edvise/reporting/model_card/base.py b/src/edvise/reporting/model_card/base.py index 8c8d134a..c2524688 100644 --- a/src/edvise/reporting/model_card/base.py +++ b/src/edvise/reporting/model_card/base.py @@ -56,6 +56,7 @@ def __init__( self.context: dict[str, t.Any] = {} self.assets_folder = assets_path or self.DEFAULT_ASSETS_FOLDER + self.run_id = self._resolve_run_id() self.output_path = self._build_output_path() self.template_path = self._resolve( "edvise.reporting.template", self.TEMPLATE_FILENAME @@ -348,13 +349,21 @@ def export_to_pdf(self): filename=self.pdf_path, catalog=self.catalog, institution_id=self.cfg.institution_id, + run_id=self.run_id, ) def _build_output_path(self) -> str: """ Builds the output path for the model card. """ - out_dir = os.path.join(tempfile.gettempdir(), "model_cards") + run_id = getattr(self, "run_id", None) + + if not run_id: + raise RuntimeError( + "ModelCard.run_id must be set before building output path" + ) + + out_dir = os.path.join(tempfile.gettempdir(), "model_cards", run_id) os.makedirs(out_dir, exist_ok=True) filename = f"model-card-{self.model_name}.md" return os.path.join(out_dir, filename) @@ -371,3 +380,14 @@ def _resolve(self, package: str, filename: str) -> Traversable: the file exists within the SST package itself. """ return files(package).joinpath(filename) + + def _resolve_run_id(self) -> str: + model_cfg = getattr(self.cfg, "model", None) + run_id = getattr(model_cfg, "run_id", None) if model_cfg is not None else None + + if not run_id: + raise ValueError( + f"config.model.run_id is required for ModelCard '{self.model_name}', " + "but it was missing (config.model is None or run_id is empty)." + ) + return str(run_id) diff --git a/src/edvise/reporting/utils/utils.py b/src/edvise/reporting/utils/utils.py index bd28a565..bd9573c8 100644 --- a/src/edvise/reporting/utils/utils.py +++ b/src/edvise/reporting/utils/utils.py @@ -136,7 +136,9 @@ def download_static_asset( return dst_path -def save_card_to_gold_volume(filename: str, catalog: str, institution_id: str) -> None: +def save_card_to_gold_volume( + filename: str, catalog: str, institution_id: str, run_id: str +) -> None: """ Saves the model card PDF to a subdirectory of "model_cards" in a Unity Catalog-backed gold volume. @@ -150,7 +152,7 @@ def save_card_to_gold_volume(filename: str, catalog: str, institution_id: str) - schema = f"{institution_id}_gold" file_volume = "gold_volume" volume_dir = f"/Volumes/{catalog}/{schema}/{file_volume}" - model_card_dir = os.path.join(volume_dir, "model_cards") + model_card_dir = os.path.join(volume_dir, "model_cards", run_id) dst_path = os.path.join(model_card_dir, os.path.basename(filename)) # Check if the volume exists diff --git a/tests/reporting/template/test_custom_template_context.py b/tests/reporting/template/test_custom_template_context.py index 2455b19e..d52c8db5 100644 --- a/tests/reporting/template/test_custom_template_context.py +++ b/tests/reporting/template/test_custom_template_context.py @@ -89,6 +89,7 @@ def make_custom_project_config(): return CustomProjectConfig( institution_id="custom_inst_id", institution_name="Custom Institution", + model={"experiment_id": "exp123", "run_id": "abc"}, student_id_col="student_id", target_col="target", split_col="split", diff --git a/tests/reporting/template/test_h2o_custom_template_context.py b/tests/reporting/template/test_h2o_custom_template_context.py index fc801b40..19ebc747 100644 --- a/tests/reporting/template/test_h2o_custom_template_context.py +++ b/tests/reporting/template/test_h2o_custom_template_context.py @@ -89,6 +89,7 @@ def make_custom_project_config(): return CustomProjectConfig( institution_id="custom_inst_id", institution_name="Custom Institution", + model={"experiment_id": "exp123", "run_id": "abc"}, student_id_col="student_id", target_col="target", split_col="split", diff --git a/tests/reporting/template/test_h2o_pdp_template_context.py b/tests/reporting/template/test_h2o_pdp_template_context.py index 0fb44d0f..99e91b60 100644 --- a/tests/reporting/template/test_h2o_pdp_template_context.py +++ b/tests/reporting/template/test_h2o_pdp_template_context.py @@ -1,6 +1,7 @@ import pytest import pandas as pd import re +from types import SimpleNamespace from unittest.mock import patch from edvise.reporting.model_card.h2o_pdp import H2OPDPModelCard from edvise.configs.pdp import PDPProjectConfig @@ -63,13 +64,20 @@ def __init__(self): self.features = DummyFeaturesConfig() -# Dummy config for base ModelCard class DummyConfig: def __init__(self): - self.institution_id = "test_uni" - self.institution_name = "Test University" - self.modeling = DummyModelingConfig() - self.preprocessing = DummyPreprocessingConfig() + self.institution_id = "test_inst" + self.institution_name = "Test Institution" + self.split_col = None + + self.model = SimpleNamespace( + run_id="dummy_run_id", + experiment_id="dummy_experiment_id", + mlflow_model_uri="models:/dummy/Production", + framework="sklearn", + ) + + self.modeling = None # Valid PDPProjectConfig diff --git a/tests/reporting/template/test_pdp_template_context.py b/tests/reporting/template/test_pdp_template_context.py index 13d5523b..4872dd4a 100644 --- a/tests/reporting/template/test_pdp_template_context.py +++ b/tests/reporting/template/test_pdp_template_context.py @@ -2,6 +2,7 @@ import pandas as pd import re from unittest.mock import patch +from types import SimpleNamespace from edvise.reporting.model_card.base import ModelCard from edvise.reporting.model_card.pdp import PDPModelCard from edvise.configs.pdp import PDPProjectConfig @@ -67,10 +68,18 @@ def __init__(self): # Dummy config for base ModelCard class DummyConfig: def __init__(self): - self.institution_id = "test_uni" - self.institution_name = "Test University" - self.modeling = DummyModelingConfig() - self.preprocessing = DummyPreprocessingConfig() + self.institution_id = "test_inst" + self.institution_name = "Test Institution" + self.split_col = None + + self.model = SimpleNamespace( + run_id="dummy_run_id", + experiment_id="dummy_experiment_id", + mlflow_model_uri="models:/dummy/Production", + framework="sklearn", + ) + + self.modeling = None # Valid PDPProjectConfig @@ -78,7 +87,7 @@ def make_pdp_config() -> PDPProjectConfig: return PDPProjectConfig( institution_id="inst_id", institution_name="Inst Name", - model={"experiment_id": "exp123", "run_id": "abc", "framework": "sklearn"}, + model={"experiment_id": "exp123", "run_id": "abc"}, datasets={ "raw_course": "dummy.csv", "raw_cohort": "dummy.csv",