Giskard-AI · andreybavt · Aug 30, 2022 · Aug 2, 2022 · Aug 2, 2022 · Aug 2, 2022
diff --git a/giskard-ml-worker/ml_worker/core/giskard_dataset.py b/giskard-ml-worker/ml_worker/core/giskard_dataset.py
@@ -16,6 +16,11 @@ def __init__(self, df: pd.DataFrame, target: Optional[str], feature_types: Dict[
         self.feature_types = feature_types
         self.column_types = column_types
 
+
+    @property
+    def columns(self):
+        return self.df.columns
+
     def slice(self, slice_fn: Callable):
         if slice_fn is None:
             return self

diff --git a/giskard-ml-worker/ml_worker/testing/drift_tests.py b/giskard-ml-worker/ml_worker/testing/drift_tests.py
diff --git a/giskard-ml-worker/ml_worker/testing/heuristic_tests.py b/giskard-ml-worker/ml_worker/testing/heuristic_tests.py
@@ -22,32 +22,31 @@ def test_right_label(self,
         of people with high-salaries are classified as “non default”
 
         Args:
-            actual_slice:
-                slice of the actual dataset
-            model:
-                uploaded model
-            classification_label:
-                classification label you want to test
-            threshold:
-                threshold for the percentage of passed rows
+           actual_slice(GiskardDataset):
+              Slice of the  actual dataset
+          model(GiskardModel):
+              Model used to compute the test
+          classification_label(str):
+              Classification label you want to test
+          threshold(float):
+              Threshold for the percentage of passed rows
 
         Returns:
-            slice_nb_rows:
-                length of actual_slice tested
-            metrics:
-                the ratio of raws with the right classification label over the total of raws in the slice
-            passed:
-                TRUE if passed_ratio > threshold
-
+          actual_slices_size:
+              Length of actual_slice tested
+          metrics:
+              The ratio of rows with the right classification label over the total of rows in the slice
+          passed:
+              TRUE if passed_ratio > threshold
         """
-
+        actual_slice.df.reset_index(drop=True, inplace=True)
         prediction_results = model.run_predict(actual_slice).prediction
         assert classification_label in model.classification_labels, \
             f'"{classification_label}" is not part of model labels: {",".join(model.classification_labels)}'
 
-        passed_slice = actual_slice.df.loc[prediction_results == classification_label]
+        passed_idx = actual_slice.df.loc[prediction_results == classification_label].index.values
 
-        passed_ratio = len(passed_slice) / len(actual_slice)
+        passed_ratio = len(passed_idx) / len(actual_slice)
         return self.save_results(SingleTestResult(
             actual_slices_size=[len(actual_slice)],
             metric=passed_ratio,
@@ -78,52 +77,49 @@ def test_output_in_range(self,
 
         For Regression : The predicted Sale Price of a house in the city falls in a particular range
         Args:
-            actual_slice:
-                slice of the actual dataset
-            model:
-                uploaded model
-            classification_label:
-                classification label you want to test
-            min_range:
-                minimum probability of occurrence of classification label
-            max_range:
-                maximum probability of occurrence of classification label
-            threshold:
-                threshold for the percentage of passed rows
+            actual_slice(GiskardDataset):
+                Slice of the actual dataset
+            model(GiskardModel):
+                Model used to compute the test
+            classification_label(str):
+                Optional. Classification label you want to test
+            min_range(float):
+                Minimum probability of occurrence of classification label
+            max_range(float):
+                Maximum probability of occurrence of classification label
+            threshold(float):
+                Threshold for the percentage of passed rows
 
         Returns:
-            slice_nb_rows:
-                length of actual_slice tested
-
+            actual_slices_size:
+                Length of actual_slice tested
             metrics:
-                the proportion of rows in the right range inside the slice
+                The proportion of rows in the right range inside the slice
             passed:
                 TRUE if metric > threshold
-
         """
         results_df = pd.DataFrame()
+        actual_slice.df.reset_index(drop=True, inplace=True)
 
         prediction_results = model.run_predict(actual_slice)
 
         if model.model_type == "regression":
             results_df["output"] = prediction_results.raw_prediction
 
         elif model.model_type == "classification":
-            results_df["output"] = prediction_results.all_predictions[classification_label]
             assert classification_label in model.classification_labels, \
                 f'"{classification_label}" is not part of model labels: {",".join(model.classification_labels)}'
+            results_df["output"] = prediction_results.all_predictions[classification_label]
 
         else:
             raise ValueError(
                 f"Prediction task is not supported: {model.model_type}"
             )
 
-        matching_prediction_mask = \
-            (results_df["output"] <= max_range) & \
-            (results_df["output"] >= min_range)
+        passed_idx = actual_slice.df.loc[(results_df["output"] <= max_range) & (results_df["output"] >= min_range)].index.values
+
+        passed_ratio = len(passed_idx) / len(actual_slice)
 
-        expected = actual_slice.df[matching_prediction_mask]
-        passed_ratio = len(expected) / len(actual_slice)
         return self.save_results(SingleTestResult(
             actual_slices_size=[len(actual_slice)],
             metric=passed_ratio,

diff --git a/giskard-ml-worker/ml_worker/testing/metamorphic_tests.py b/giskard-ml-worker/ml_worker/testing/metamorphic_tests.py
@@ -1,11 +1,11 @@
 import pandas as pd
 
-from generated.ml_worker_pb2 import SingleTestResult
+from generated.ml_worker_pb2 import SingleTestResult, TestMessage, TestMessageType
+
 from ml_worker.core.giskard_dataset import GiskardDataset
 from ml_worker.core.model import GiskardModel
 from ml_worker.testing.abstract_test_collection import AbstractTestCollection
 from ml_worker.testing.utils import apply_perturbation_inplace
-from ml_worker.testing.utils import save_df, compress
 
 
 class MetamorphicTests(AbstractTestCollection):
@@ -72,6 +72,8 @@ def _test_metamorphic(self,
                           output_sensitivity=None,
                           output_proba=True
                           ) -> SingleTestResult:
+        actual_slice.df.reset_index(drop=True, inplace=True)
+
         results_df, modified_rows_count = self._perturb_and_predict(actual_slice,
                                                                     model,
                                                                     perturbation_dict,
@@ -82,23 +84,24 @@ def _test_metamorphic(self,
                                                           model.model_type,
                                                           output_sensitivity,
                                                           flag)
-        failed_df = actual_slice.df.loc[failed_idx]
         passed_ratio = len(passed_idx) / modified_rows_count if modified_rows_count != 0 else 1
 
-        output_df_sample = compress(save_df(failed_df))
+        messages = [TestMessage(
+            type=TestMessageType.INFO,
+            text=f"{modified_rows_count} number of rows were perturbed"
+        )]
 
         return self.save_results(SingleTestResult(
             actual_slices_size=[len(actual_slice)],
-            number_of_perturbed_rows=modified_rows_count,
             metric=passed_ratio,
             passed=passed_ratio > threshold,
-            output_df=output_df_sample))
+            messages=messages))
 
     def test_metamorphic_invariance(self,
                                     df: GiskardDataset,
                                     model,
                                     perturbation_dict,
-                                    threshold=1,
+                                    threshold=0.5,
                                     output_sensitivity=None) -> SingleTestResult:
         """
         Summary: Tests if the model prediction is invariant when the feature values are perturbed
@@ -116,30 +119,28 @@ def test_metamorphic_invariance(self,
 
         Args:
             df(GiskardDataset):
-                Dataset used to compute the test
+              Dataset used to compute the test
             model(GiskardModel):
-                Model used to compute the test
+              Model used to compute the test
             perturbation_dict(dict):
-                Dictionary of the perturbations. It provides the perturbed features as key and a perturbation lambda function as value
+              Dictionary of the perturbations. It provides the perturbed features as key
+              and a perturbation lambda function as value
             threshold(float):
-                Threshold of the ratio of invariant rows
+              Threshold of the ratio of invariant rows
             output_sensitivity(float):
-                the threshold for ratio between the difference between perturbed prediction and actual prediction over
+                Optional. The threshold for ratio between the difference between perturbed prediction and actual prediction over
                 the actual prediction for a regression model. We consider there is a prediction difference for
                 regression if the ratio is above the output_sensitivity of 0.1
 
         Returns:
             actual_slices_size:
-                total number of rows of dataframe
-            number_of_perturbed_rows:
-                number of perturbed rows
+              Length of actual_slice tested
+            message:
+              Test result message
             metric:
-                the ratio of invariant rows over the perturbed rows
+              The ratio of unchanged rows over the perturbed rows
             passed:
-                TRUE if passed_ratio > threshold
-            output_df:
-                dataframe containing the non-invariant rows
-
+              TRUE if metric > threshold
         """
 
         return self._test_metamorphic(flag='Invariance',
@@ -155,7 +156,7 @@ def test_metamorphic_increasing(self,
                                     df: GiskardDataset,
                                     model,
                                     perturbation_dict,
-                                    threshold=1,
+                                    threshold=0.5,
                                     classification_label=None):
         """
         Summary: Tests if the model probability increases when the feature values are perturbed
@@ -173,29 +174,26 @@ def test_metamorphic_increasing(self,
 
         Args:
             df(GiskardDataset):
-                Dataset used to compute the test
+              Dataset used to compute the test
             model(GiskardModel):
-                Model used to compute the test
+              Model used to compute the test
             perturbation_dict(dict):
-                Dictionary of the perturbations. It provides the perturbed features as key
-                and a perturbation lambda function as value
+              Dictionary of the perturbations. It provides the perturbed features as key
+              and a perturbation lambda function as value
             threshold(float):
-                Threshold of the ratio of increasing rows
+              Threshold of the ratio of increasing rows
             classification_label(str):
-                one specific label value from the target column
+              Optional.One specific label value from the target column
 
         Returns:
             actual_slices_size:
-                total number of rows of dataframe
-            number_of_perturbed_rows:
-                number of perturbed rows
+              Length of actual_slice tested
+            message:
+              Test result message
             metric:
-                the ratio of increasing rows over the perturbed rows
+              The ratio of increasing rows over the perturbed rows
             passed:
-                TRUE if passed_ratio > threshold
-            output_df:
-                dataframe containing the rows whose probability doesn't increase after perturbation
-
+              TRUE if metric > threshold
         """
         assert model.model_type != "classification" or str(classification_label) in model.classification_labels, \
             f'"{classification_label}" is not part of model labels: {",".join(model.classification_labels)}'
@@ -211,7 +209,7 @@ def test_metamorphic_decreasing(self,
                                     df: GiskardDataset,
                                     model,
                                     perturbation_dict,
-                                    threshold=1,
+                                    threshold=0.5,
                                     classification_label=None
                                     ):
         """
@@ -225,34 +223,31 @@ def test_metamorphic_decreasing(self,
 
         The test is passed when the percentage of rows that are decreasing is higher than the threshold
 
-        Example : For a credit scoring model, the test is passed when a increase of wage by 10%,
+        Example : For a credit scoring model, the test is passed when an increase of wage by 10%,
          default probability is decreasing for more than 50% of people in the dataset
 
         Args:
             df(GiskardDataset):
-                Dataset used to compute the test
+              Dataset used to compute the test
             model(GiskardModel):
-                Model used to compute the test
+              Model used to compute the test
             perturbation_dict(dict):
-                Dictionary of the perturbations. It provides the perturbed features as key
-                and a perturbation lambda function as value
+              Dictionary of the perturbations. It provides the perturbed features as key
+              and a perturbation lambda function as value
             threshold(float):
-                Threshold of the ratio of decreasing rows
+              Threshold of the ratio of decreasing rows
             classification_label(str):
-                one specific label value from the target column
+              Optional. One specific label value from the target column
 
         Returns:
             actual_slices_size:
-                total number of rows of dataframe
-            number_of_perturbed_rows:
-                number of perturbed rows
+              Length of actual_slice tested
+            message:
+              Test result message
             metric:
-                the ratio of decreasing rows over the perturbed rows
+              The ratio of decreasing rows over the perturbed rows
             passed:
-                TRUE if passed_ratio > threshold
-            output_df:
-                dataframe containing the rows whose probability doesn't decrease after perturbation
-
+              TRUE if metric > threshold
         """
 
         assert model.model_type != "classification" or classification_label in model.classification_labels, \