Giskard-AI · andreybavt · Aug 30, 2023 · Aug 2, 2023 · Aug 2, 2023 · Aug 2, 2023
diff --git a/python-client/docs/assets/integrations/wandb/wandb-categorical-chart.png b/python-client/docs/assets/integrations/wandb/wandb-categorical-chart.png
diff --git a/python-client/docs/assets/integrations/wandb/wandb-dataset.png b/python-client/docs/assets/integrations/wandb/wandb-dataset.png
diff --git a/python-client/docs/assets/integrations/wandb/wandb-global-chart.png b/python-client/docs/assets/integrations/wandb/wandb-global-chart.png
diff --git a/python-client/docs/assets/integrations/wandb/wandb-logo-yellow-dots-black-wb.png b/python-client/docs/assets/integrations/wandb/wandb-logo-yellow-dots-black-wb.png
diff --git a/python-client/docs/assets/integrations/wandb/wandb-numerical-chart.png b/python-client/docs/assets/integrations/wandb/wandb-numerical-chart.png
diff --git a/python-client/docs/assets/integrations/wandb/wandb-scanning-result.png b/python-client/docs/assets/integrations/wandb/wandb-scanning-result.png
diff --git a/python-client/docs/assets/integrations/wandb/wandb-test-suite-result.png b/python-client/docs/assets/integrations/wandb/wandb-test-suite-result.png
diff --git a/python-client/docs/integrations/index.md b/python-client/docs/integrations/index.md
@@ -7,6 +7,7 @@
 :hidden:
 
 mlflow/index
+wandb/index
 ```
 
 ::::::{grid} 1 1 2 2
@@ -18,4 +19,11 @@ mlflow/index
 &nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<img src="../assets/integrations/mlflow/MLflow-logo-final-white-TM.png" alt="mlflow" width="82%">
 :::
 :::::
+
+:::::{grid-item}
+:::{card}
+:link: wandb/index.md
+&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;<img src="../assets/integrations/wandb/wandb-logo-yellow-dots-black-wb.png" alt="wandb">
+:::
+:::::
 ::::::
diff --git a/python-client/docs/integrations/wandb/index.md b/python-client/docs/integrations/wandb/index.md
@@ -0,0 +1,67 @@
+# Weights and Biases
+
+Giskard can log SHAP plots, scan reports and test suites into Weights & Biases:
+- **Understand feature importance**: Giskard generates plots to highlight feature importance using the SHAP library.
+- **Scan your model to find dozens of hidden vulnerabilities**: The Giskard scan automatically detects vulnerability issues such as performance bias, data leakage, unrobustness, spurious correlation, overconfidence, underconfidence, unethical issue, etc.
+- **Instantaneously generate domain-specific tests**: Giskard automatically generates relevant tests based on the vulnerabilities detected by the scan. You can easily customize the tests depending on your use case by defining domain-specific data slicers and transformers as fixtures of your test suites.
+
+## Setup
+To use Giskard with Weights & Biases, you need to follow these steps:
+
+1. Setup Weights & Biases:
+   - sign up for a Weights & Biases account [here](https://wandb.ai/site).
+   - install and open your docker app.
+   - install the `wandb` python package and server:
+     ```shell
+     pip install wandb
+     wandb login --relogin # input the API key you get from the website
+     wandb server start --upgrade # this will download the docker images if they're not already downloaded
+     ```
+
+2. Setup Giskard:
+   - install the giskard library by following these [instructions](https://docs.giskard.ai/en/latest/guides/installation_library/index.html).
+
+## Logging from Giskard to Weights & Biases
+In order to get the most out this integration, you would need to follow these three steps to diagnose your ML model:
+- wrap your dataset by following this [guide](https://docs.giskard.ai/en/latest/guides/wrap_dataset/index.html).
+- wrap your ML model by following this [guide](https://docs.giskard.ai/en/latest/guides/wrap_model/index.html).
+- scan your ML model for vulnerabilities by following this [guide](https://docs.giskard.ai/en/latest/guides/scan/index.html).
+
+Once the above steps are done, you can know log the results into Weights & Biases by doing the following:
+```python
+import giskard, wandb
+# [...] wrap model and dataset with giskard
+scan_results = giskard.scan(giskard_model, giskard_dataset)
+test_suite_results = scan_results.generate_test_suite().run()
+
+wandb.login()
+giskard_dataset.to_wandb() # log your dataset as a table
+scan_results.to_wandb() # log scan results as an HTML report
+test_suite_results.to_wandb() # log test suite results as a table
+# TODO: log SHAP plots
+```
+
+```{eval-rst}
+.. note:: You can pass to :code:`to_wandb()` all the arguments you can pass to :code:`wandb.init()` (see `here <https://docs.wandb.ai/ref/python/init>`_)
+```
+
+
+## Notebook examples
+::::::{grid} 1 1 2 2
+:gutter: 1
+
+:::::{grid-item}
+:::{card} <br><h3><center>📊 Tabular</center></h3>
+:link: wandb-tabular-example.ipynb
+:::
+:::::
+::::::
+
+```{toctree}
+:caption: Table of Contents
+:name: mastertoc
+:maxdepth: 2
+:hidden:
+
+wandb-tabular-example
+```
diff --git a/python-client/docs/integrations/wandb/wandb-tabular-example.ipynb b/python-client/docs/integrations/wandb/wandb-tabular-example.ipynb
@@ -0,0 +1,118 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "source": [
+    "# Notebook Example - Tabular"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Detecting tabular ML models vulnerabilities in W&B with Giskard\n",
+    "This example demonstrates how to efficiently scan two tabular ML models for hidden vulnerabilities using Giskard, log the results and interpret them within the W&B framework in just a few lines of code. We will use the following two tabular ML models:\n",
+    "\n",
+    "| Model    | Description                                                            | Training data   |\n",
+    "|----------|------------------------------------------------------------------------|-----------------|\n",
+    "| `model1` | A simple sklearn `LogisticRegression` model trained only for 5 epochs. | Titanic dataset |\n",
+    "| `model2` | A simple sklearn `LogisticRegression` model trained for 100 epochs.    | Titanic dataset |"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "import wandb\n",
+    "\n",
+    "from giskard import Model, Dataset, demo, explain_with_shap, scan\n",
+    "\n",
+    "model1, df = demo.titanic(max_iter=5)\n",
+    "model2, __ = demo.titanic(max_iter=100)  # Datasets are identical.\n",
+    "models = {\"titanic-max_iter=5\": model1, \"titanic-max_iter=100\": model2}\n",
+    "\n",
+    "wrapped_data = Dataset(df=df, \n",
+    "                       target=\"Survived\",\n",
+    "                       cat_columns=['Pclass', 'Sex', \"SibSp\", \"Parch\", \"Embarked\"])\n",
+    "\n",
+    "for model_name, model in models.items():\n",
+    "    wrapped_model = Model(model=model.predict_proba,\n",
+    "                          model_type=\"classification\",\n",
+    "                          feature_names=['PassengerId', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked'], \n",
+    "                          classification_labels=model.classes_)\n",
+    "    \n",
+    "    # Log results to the new W&B run.\n",
+    "    wrapped_data.to_wandb(name=model_name)\n",
+    "    \n",
+    "    shap_explanation_result = explain_with_shap(wrapped_model, wrapped_data)\n",
+    "    shap_explanation_result.to_wandb()\n",
+    "    \n",
+    "    scan_results = scan(wrapped_model, wrapped_data)\n",
+    "    scan_results.to_wandb()\n",
+    "    \n",
+    "    test_suite = scan_results.generate_test_suite()\n",
+    "    test_suite.run().to_wandb()\n",
+    "\n",
+    "    # Finish a current run.\n",
+    "    wandb.finish()"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "After logging the results, you can visualise them on the W&B User Interface by running `wandb server start` via <http://localhost:8080>. You will be able to visualise the following:\n",
+    "  \n",
+    "### The dataset\n",
+    "<img src=\"../../assets/integrations/wandb/wandb-dataset.png\">\n",
+    "\n",
+    "### The SHAP bar plots for categorical features\n",
+    "<img src=\"../../assets/integrations/wandb/wandb-categorical-chart.png\">\n",
+    "\n",
+    "### The SHAP scatter plots for numerical features\n",
+    "<img src=\"../../assets/integrations/wandb/wandb-numerical-chart.png\">\n",
+    "\n",
+    "### The SHAP global feature importance plot\n",
+    "<img src=\"../../assets/integrations/wandb/wandb-global-chart.png\">\n",
+    "\n",
+    "### The Giskard scan results\n",
+    "<img src=\"../../assets/integrations/wandb/wandb-scanning-result.png\">\n",
+    "\n",
+    "### The Giskard test-suite results\n",
+    "<img src=\"../../assets/integrations/wandb/wandb-test-suite-result.png\">"
+   ],
+   "metadata": {
+    "collapsed": false
+   }
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 2
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython2",
+   "version": "2.7.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/python-client/giskard/__init__.py b/python-client/giskard/__init__.py
@@ -15,6 +15,7 @@
 from giskard.ml_worker.testing.test_result import TestResult
 from giskard.ml_worker.utils.logging import configure_logging
 from giskard.models.automodel import Model
+from giskard.models.model_explanation import explain_with_shap
 from . import demo
 from .ml_worker.utils.network import check_latest_giskard_version
 from .scanner import scan
@@ -51,6 +52,7 @@ def get_version() -> str:
     "SuiteInput",
     "SlicingFunction",
     "scan",
+    "explain_with_shap",
     "TestResult",
     "GiskardTest",
     "demo",

diff --git a/python-client/giskard/core/suite.py b/python-client/giskard/core/suite.py
@@ -28,6 +28,7 @@
 )
 from giskard.models.base import BaseModel
 
+
 logger = logging.getLogger(__name__)
 
 suite_input_types: List[type] = [
@@ -74,6 +75,48 @@ def to_mlflow(self, mlflow_client: MlflowClient = None, mlflow_run_id: str = Non
 
         return metrics
 
+    def to_wandb(self, **kwargs) -> None:
+        """Log the test-suite result to the WandB run.
+
+        Log the current test-suite result in a table format to the active WandB run.
+
+        Parameters
+        ----------
+        **kwargs :
+            Additional keyword arguments
+            (see https://docs.wandb.ai/ref/python/init) to be added to the active WandB run.
+        """
+        from giskard.integrations.wandb.wandb_utils import wandb_run, _parse_test_name
+        import wandb
+        from ..utils.analytics_collector import analytics
+
+        with wandb_run(**kwargs) as run:
+            # Log just a test description and a metric.
+            columns = ["Metric name", "Data slice", "Metric value", "Passed"]
+            try:
+                data = [[*_parse_test_name(result[0]), result[1].metric, result[1].passed] for result in self.results]
+                analytics.track(
+                    "wandb_integration:test_suite",
+                    {
+                        "wandb_run_id": run.id,
+                        "tests_cnt": len(data),
+                    },
+                )
+            except Exception as e:
+                analytics.track(
+                    "wandb_integration:test_suite:error:unknown",
+                    {
+                        "wandb_run_id": wandb.run.id,
+                        "error": str(e),
+                    },
+                )
+                raise ValueError(
+                    "An error occurred while logging the test suite into wandb. "
+                    "Please submit the traceback as a GitHub issue in the following "
+                    "repository for further assistance: https://github.com/Giskard-AI/giskard."
+                ) from e
+            run.log({"Test suite results/Test-Suite Results": wandb.Table(columns=columns, data=data)})
+
 
 class SuiteInput:
     """