Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions giskard/client/giskard_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,14 @@ def load_model_meta(self, project_key: str, uuid: str) -> ModelMetaInfo:
def load_dataset_meta(self, project_key: str, uuid: str) -> DatasetMeta:
res = self._session.get(f"project/{project_key}/datasets/{uuid}").json()
info = DatasetMetaInfo.parse_obj(res) # Used for validation, and avoid extraand typos
analytics.track("hub:dataset:download", {
"project": anonymize(project_key),
"name": anonymize(info.name),
"target": anonymize(info.target),
"columnTypes": anonymize(info.columnTypes),
"columnDtypes": anonymize(info.columnDtypes),
"nb_rows": info.numberOfRows,
})
return DatasetMeta(
name=info.name,
target=info.target,
Expand Down
2 changes: 2 additions & 0 deletions giskard/commands/cli_hub.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,7 +192,9 @@ def _pull_image(version):
if not _check_downloaded(version):
logger.info(f"Downloading image for version {version}")
try:
analytics.track('giskard-server:install:start', {'version': version})
create_docker_client().images.pull(IMAGE_NAME, tag=version)
analytics.track('giskard-server:install:success', {'version': version})
except NotFound:
logger.error(
f"Image {get_image_name(version)} not found. Use a valid `--version` argument or check the content of $GSK_HOME/server-settings.yml"
Expand Down
4 changes: 3 additions & 1 deletion giskard/core/suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@
from giskard.ml_worker.testing.test_result import TestMessage, TestMessageLevel, TestResult
from giskard.models.base import BaseModel

from ..utils.analytics_collector import analytics

logger = logging.getLogger(__name__)

suite_input_types: List[type] = [
Expand Down Expand Up @@ -122,7 +124,6 @@ def to_wandb(self, run: Optional["wandb.wandb_sdk.wandb_run.Run"] = None) -> Non
except ImportError as e:
raise GiskardImportError("wandb") from e
from ..integrations.wandb.wandb_utils import _parse_test_name, get_wandb_run
from ..utils.analytics_collector import analytics

run = get_wandb_run(run)
# Log just a test description and a metric.
Expand Down Expand Up @@ -438,6 +439,7 @@ def upload(self, client: GiskardClient, project_key: str):
self.id = client.save_test_suite(self.to_dto(client, project_key, uploaded_uuids))
project_id = client.get_project(project_key).project_id
print(f"Test suite has been saved: {client.host_url}/main/projects/{project_id}/test-suite/{self.id}/overview")
analytics.track("hub:test_suite:uploaded")
return self

def to_dto(self, client: GiskardClient, project_key: str, uploaded_uuids: Optional[List[str]] = None):
Expand Down
5 changes: 2 additions & 3 deletions giskard/datasets/base/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,14 +31,14 @@
from giskard.settings import settings

from ...ml_worker.utils.file_utils import get_file_name
from ...utils.analytics_collector import analytics
from ..metadata.indexing import ColumnMetadataMixin

try:
import wandb # noqa
except ImportError:
pass


SAMPLE_SIZE = 1000

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -230,7 +230,7 @@ def __init__(
}

self.data_processor = DataProcessor()

analytics.track("wrap:dataset:success", {"nb_rows": self.number_of_rows})
logger.info("Your 'pandas.DataFrame' is successfully wrapped by Giskard's 'Dataset' wrapper class.")

@property
Expand Down Expand Up @@ -743,7 +743,6 @@ def to_wandb(self, run: Optional["wandb.wandb_sdk.wandb_run.Run"] = None) -> Non
except ImportError as e:
raise GiskardImportError("wandb") from e
from ...integrations.wandb.wandb_utils import get_wandb_run
from ...utils.analytics_collector import analytics

run = get_wandb_run(run)

Expand Down
15 changes: 12 additions & 3 deletions giskard/ml_worker/testing/registry/decorators.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
from typing import Callable, List, Optional, Type, TypeVar, Union

import functools
import inspect
import sys
from typing import Callable, Optional, List, Union, Type, TypeVar

from giskard.core.core import TestFunctionMeta
from giskard.ml_worker.testing.registry.decorators_utils import make_all_optional_or_suite_input, set_return_type
from giskard.ml_worker.testing.registry.giskard_test import GiskardTestMethod, GiskardTest
from giskard.ml_worker.testing.registry.decorators_utils import (
make_all_optional_or_suite_input,
set_return_type,
)
from giskard.ml_worker.testing.registry.giskard_test import GiskardTest, GiskardTestMethod

from ....utils.analytics_collector import analytics


# TODO: I think this should be moved into giskard_test.py ?
# For slicing_function and transformation_function the decorator is in the same file as the class
def test(
_fn=None,
name=None,
Expand All @@ -34,6 +42,7 @@ def inner(
TestFunctionMeta(original, name=name, tags=tags, debug_description=debug_description, type="TEST")
)

analytics.track("custom:test:created", {"name": original.__name__})
if inspect.isclass(original) and issubclass(original, GiskardTest):
return original

Expand Down
4 changes: 3 additions & 1 deletion giskard/ml_worker/testing/registry/slicing_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
)
from giskard.ml_worker.testing.registry.registry import get_object_uuid, tests_registry

from ....utils.analytics_collector import analytics

SlicingFunctionType = Callable[..., bool]

default_tags = ["filter"]
Expand Down Expand Up @@ -157,6 +159,7 @@ def inner(func: Union[SlicingFunctionType, Type[SlicingFunction]]) -> SlicingFun
cell_level=cell_level,
)
)
analytics.track("custom:slicing_function:created", {"name": func.__name__})
if inspect.isclass(func) and issubclass(func, SlicingFunction):
return func

Expand All @@ -178,5 +181,4 @@ def _wrap_slicing_function(original: Callable, row_level: bool, cell_level: bool

make_all_optional_or_suite_input(slicing_fn)
set_return_type(slicing_fn, SlicingFunction)

return slicing_fn()
3 changes: 3 additions & 0 deletions giskard/ml_worker/testing/registry/transformation_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
)
from giskard.ml_worker.testing.registry.registry import get_object_uuid, tests_registry

from ....utils.analytics_collector import analytics

TransformationFunctionType = Callable[..., Union[pd.Series, pd.DataFrame]]

default_tags = ["transformation"]
Expand Down Expand Up @@ -128,6 +130,7 @@ def inner(func: Union[TransformationFunctionType, Type[TransformationFunction]])
)
)

analytics.track("custom:transformation_function:created", {"name": func.__name__})
if inspect.isclass(func) and issubclass(func, TransformationFunction):
return func
return _wrap_transformation_function(func, row_level, cell_level)()
Expand Down
21 changes: 21 additions & 0 deletions giskard/models/automodel.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pandas as pd

from ..core.core import ModelType, SupportedModelTypes
from ..utils.analytics_collector import analytics
from .base.serialization import CloudpickleSerializableModel
from .function import PredictionFunctionModel

Expand Down Expand Up @@ -115,6 +116,12 @@ def __new__(
"""

if not model:
analytics.track(
"wrap:model:fail",
{
"reason": "no_model",
},
)
raise ValueError(
"The 'Model' class cannot be initiated without a `model` argument. "
"\n`model` can be either a model object (classifier, regressor, etc.) or a prediction function."
Expand Down Expand Up @@ -144,6 +151,12 @@ def __new__(
)
possibly_overriden_cls = giskard_cls
else: # possibly_overriden_cls = CloudpickleSerializableModel
analytics.track(
"wrap:model:fail",
{
"reason": "model_library_not_supported",
},
)
raise NotImplementedError(
"We could not infer your model library. You have two options:"
"\n- Pass a prediction_function to the Model class "
Expand Down Expand Up @@ -176,4 +189,12 @@ def __new__(
obj.meta.loader_class = possibly_overriden_cls.__name__
obj.meta.loader_module = possibly_overriden_cls.__module__

analytics.track(
"wrap:model:success",
{
"type": model_type,
"features": len(feature_names if feature_names is not None else []),
},
)

return obj
11 changes: 6 additions & 5 deletions giskard/models/base/model.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Iterable, List, Optional, Tuple, Type, Union

import builtins
import importlib
import logging
Expand All @@ -13,12 +15,9 @@
import numpy as np
import pandas as pd
import yaml
from typing import Iterable, List, Optional, Tuple, Type, Union

from giskard.client.dtos import ModelMetaInfo
from .model_prediction import ModelPredictionResults
from ..cache import get_cache_enabled
from ..utils import np_types_to_native

from ...client.giskard_client import GiskardClient
from ...core.core import ModelMeta, ModelType, SupportedModelTypes
from ...core.validation import configured_validate_arguments
Expand All @@ -28,6 +27,9 @@
from ...models.cache import ModelCache
from ...path_utils import get_size
from ...settings import settings
from ..cache import get_cache_enabled
from ..utils import np_types_to_native
from .model_prediction import ModelPredictionResults

META_FILENAME = "giskard-model-meta.yaml"

Expand Down Expand Up @@ -427,7 +429,6 @@ def upload(self, client: GiskardClient, project_key, validate_ds=None, *_args, *
if client is not None:
client.log_artifacts(f, posixpath.join(project_key, "models", str(self.id)))
client.save_model_meta(project_key, self.id, self.meta, platform.python_version(), get_size(f))

return str(self.id)

@classmethod
Expand Down
1 change: 0 additions & 1 deletion giskard/scanner/report.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,6 @@ def to_wandb(self, run: Optional["wandb.wandb_sdk.wandb_run.Run"] = None) -> Non
except ImportError as e:
raise GiskardImportError("wandb") from e
from ..integrations.wandb.wandb_utils import get_wandb_run
from ..utils.analytics_collector import analytics

run = get_wandb_run(run)
try:
Expand Down